diff options
75 files changed, 739 insertions, 601 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1068742a6df..e464cf0b5025 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -599,6 +599,17 @@ altogether. For more information, see include/linux/dma-contiguous.h + cma_pernuma=nn[MG] + [ARM64,KNL] + Sets the size of kernel per-numa memory area for + contiguous memory allocations. A value of 0 disables + per-numa CMA altogether. And If this option is not + specificed, the default value is 0. + With per-numa CMA enabled, DMA users on node nid will + first try to allocate buffer from the pernuma area + which is located in node nid, if the allocation fails, + they will fallback to the global default memory area. + cmo_free_hint= [PPC] Format: { yes | no } Specify whether pages are marked as being inactive when they are freed. This is used in CMO environments diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 3b3abbbb4b9a..90239348b30f 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -586,30 +586,6 @@ the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and continuing on for size. Again, you *must* observe the cache line boundaries when doing this. -:: - - int - dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size); - -Declare region of memory to be handed out by dma_alloc_coherent() when -it's asked for coherent memory for this device. - -phys_addr is the CPU physical address to which the memory is currently -assigned (this will be ioremapped so the CPU can access the region). - -device_addr is the DMA address the device needs to be programmed -with to actually address this memory (this will be handed out as the -dma_addr_t in dma_alloc_coherent()). - -size is the size of the area (must be multiples of PAGE_SIZE). - -As a simplification for the platforms, only *one* such region of -memory may be declared per device. - -For reasons of efficiency, most platforms choose to track the declared -region only at the granularity of a page. For smaller allocations, -you should use the dma_pool() API. Part III - Debug drivers use of the DMA-API ------------------------------------------- diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 81037907268d..6f7de4f4e191 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -141,12 +141,7 @@ iommu_arena_find_pages(struct device *dev, struct pci_iommu_arena *arena, unsigned long boundary_size; base = arena->dma_base >> PAGE_SHIFT; - if (dev) { - boundary_size = dma_get_seg_boundary(dev) + 1; - boundary_size >>= PAGE_SHIFT; - } else { - boundary_size = 1UL << (32 - PAGE_SHIFT); - } + boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT); /* Search forward for the first mask-aligned sequence of N free ptes */ ptes = arena->ptes; diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index f4b719bde763..d3e00ea92088 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -24,7 +24,7 @@ #include <linux/slab.h> #include <linux/page-flags.h> #include <linux/device.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/dmapool.h> #include <linux/list.h> #include <linux/scatterlist.h> diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index 7c3001a6a775..77fcb7ee5ec9 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -2,13 +2,44 @@ #ifndef ASM_ARM_DMA_DIRECT_H #define ASM_ARM_DMA_DIRECT_H 1 -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +#include <asm/memory.h> + +/* + * dma_to_pfn/pfn_to_dma/virt_to_dma are architecture private + * functions used internally by the DMA-mapping API to provide DMA + * addresses. They must not be used by drivers. + */ +static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) +{ + if (dev && dev->dma_range_map) + pfn = PFN_DOWN(translate_phys_to_dma(dev, PFN_PHYS(pfn))); + return (dma_addr_t)__pfn_to_bus(pfn); +} + +static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) +{ + unsigned long pfn = __bus_to_pfn(addr); + + if (dev && dev->dma_range_map) + pfn = PFN_DOWN(translate_dma_to_phys(dev, PFN_PHYS(pfn))); + return pfn; +} + +static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) +{ + if (dev) + return pfn_to_dma(dev, virt_to_pfn(addr)); + + return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { unsigned int offset = paddr & ~PAGE_MASK; return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) { unsigned int offset = dev_addr & ~PAGE_MASK; return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index bdd80ddbca34..0a1a536368c3 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -8,8 +8,6 @@ #include <linux/scatterlist.h> #include <linux/dma-debug.h> -#include <asm/memory.h> - #include <xen/xen.h> #include <asm/xen/hypervisor.h> @@ -23,74 +21,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -#ifdef __arch_page_to_dma -#error Please update to __arch_pfn_to_dma -#endif - -/* - * dma_to_pfn/pfn_to_dma/dma_to_virt/virt_to_dma are architecture private - * functions used internally by the DMA-mapping API to provide DMA - * addresses. They must not be used by drivers. - */ -#ifndef __arch_pfn_to_dma -static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) -{ - if (dev) - pfn -= dev->dma_pfn_offset; - return (dma_addr_t)__pfn_to_bus(pfn); -} - -static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) -{ - unsigned long pfn = __bus_to_pfn(addr); - - if (dev) - pfn += dev->dma_pfn_offset; - - return pfn; -} - -static inline void *dma_to_virt(struct device *dev, dma_addr_t addr) -{ - if (dev) { - unsigned long pfn = dma_to_pfn(dev, addr); - - return phys_to_virt(__pfn_to_phys(pfn)); - } - - return (void *)__bus_to_virt((unsigned long)addr); -} - -static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) -{ - if (dev) - return pfn_to_dma(dev, virt_to_pfn(addr)); - - return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); -} - -#else -static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) -{ - return __arch_pfn_to_dma(dev, pfn); -} - -static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) -{ - return __arch_dma_to_pfn(dev, addr); -} - -static inline void *dma_to_virt(struct device *dev, dma_addr_t addr) -{ - return __arch_dma_to_virt(dev, addr); -} - -static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) -{ - return __arch_virt_to_dma(dev, addr); -} -#endif - /** * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index 638808c4e122..09a65c2dfd73 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -8,6 +8,7 @@ */ #include <linux/io.h> #include <linux/of.h> +#include <linux/dma-mapping.h> #include <linux/init.h> #include <linux/of_platform.h> #include <linux/of_address.h> @@ -24,8 +25,7 @@ #include "keystone.h" -static unsigned long keystone_dma_pfn_offset __read_mostly; - +#ifdef CONFIG_ARM_LPAE static int keystone_platform_notifier(struct notifier_block *nb, unsigned long event, void *data) { @@ -38,9 +38,12 @@ static int keystone_platform_notifier(struct notifier_block *nb, return NOTIFY_BAD; if (!dev->of_node) { - dev->dma_pfn_offset = keystone_dma_pfn_offset; - dev_err(dev, "set dma_pfn_offset%08lx\n", - dev->dma_pfn_offset); + int ret = dma_direct_set_offset(dev, KEYSTONE_HIGH_PHYS_START, + KEYSTONE_LOW_PHYS_START, + KEYSTONE_HIGH_PHYS_SIZE); + dev_err(dev, "set dma_offset%08llx%s\n", + KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START, + ret ? " failed" : ""); } return NOTIFY_OK; } @@ -48,14 +51,14 @@ static int keystone_platform_notifier(struct notifier_block *nb, static struct notifier_block platform_nb = { .notifier_call = keystone_platform_notifier, }; +#endif /* CONFIG_ARM_LPAE */ static void __init keystone_init(void) { - if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) { - keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - - KEYSTONE_LOW_PHYS_START); +#ifdef CONFIG_ARM_LPAE + if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) bus_register_notifier(&platform_bus_type, &platform_nb); - } +#endif keystone_pm_runtime_init(); } diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index 1142560e0078..36bc0000cb6a 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -14,42 +14,11 @@ * OMAP-1510 bus address is translated into a Local Bus address if the * OMAP bus type is lbus. We do the address translation based on the * device overriding the defaults used in the dma-mapping API. - * Note that the is_lbus_device() test is not very efficient on 1510 - * because of the strncmp(). */ -#if defined(CONFIG_ARCH_OMAP15XX) && !defined(__ASSEMBLER__) /* * OMAP-1510 Local Bus address offset */ #define OMAP1510_LB_OFFSET UL(0x30000000) -#define virt_to_lbus(x) ((x) - PAGE_OFFSET + OMAP1510_LB_OFFSET) -#define lbus_to_virt(x) ((x) - OMAP1510_LB_OFFSET + PAGE_OFFSET) -#define is_lbus_device(dev) (cpu_is_omap15xx() && dev && (strncmp(dev_name(dev), "ohci", 4) == 0)) - -#define __arch_pfn_to_dma(dev, pfn) \ - ({ dma_addr_t __dma = __pfn_to_phys(pfn); \ - if (is_lbus_device(dev)) \ - __dma = __dma - PHYS_OFFSET + OMAP1510_LB_OFFSET; \ - __dma; }) - -#define __arch_dma_to_pfn(dev, addr) \ - ({ dma_addr_t __dma = addr; \ - if (is_lbus_device(dev)) \ - __dma += PHYS_OFFSET - OMAP1510_LB_OFFSET; \ - __phys_to_pfn(__dma); \ - }) - -#define __arch_dma_to_virt(dev, addr) ({ (void *) (is_lbus_device(dev) ? \ - lbus_to_virt(addr) : \ - __phys_to_virt(addr)); }) - -#define __arch_virt_to_dma(dev, addr) ({ unsigned long __addr = (unsigned long)(addr); \ - (dma_addr_t) (is_lbus_device(dev) ? \ - virt_to_lbus(__addr) : \ - __virt_to_phys(__addr)); }) - -#endif /* CONFIG_ARCH_OMAP15XX */ - #endif diff --git a/arch/arm/mach-omap1/usb.c b/arch/arm/mach-omap1/usb.c index d8e9bbda8f7b..ba8566204ea9 100644 --- a/arch/arm/mach-omap1/usb.c +++ b/arch/arm/mach-omap1/usb.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/platform_device.h> +#include <linux/dma-mapping.h> #include <linux/io.h> #include <asm/irq.h> @@ -542,6 +543,25 @@ bad: /* ULPD_APLL_CTRL */ #define APLL_NDPLL_SWITCH (1 << 0) +static int omap_1510_usb_ohci_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct device *dev = data; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + if (strncmp(dev_name(dev), "ohci", 4) == 0 && + dma_direct_set_offset(dev, PHYS_OFFSET, OMAP1510_LB_OFFSET, + (u64)-1)) + WARN_ONCE(1, "failed to set DMA offset\n"); + return NOTIFY_OK; +} + +static struct notifier_block omap_1510_usb_ohci_nb = { + .notifier_call = omap_1510_usb_ohci_notifier, +}; + static void __init omap_1510_usb_init(struct omap_usb_config *config) { unsigned int val; @@ -600,6 +620,8 @@ static void __init omap_1510_usb_init(struct omap_usb_config *config) if (config->register_host) { int status; + bus_register_notifier(&platform_bus_type, + &omap_1510_usb_ohci_nb); ohci_device.dev.platform_data = config; status = platform_device_register(&ohci_device); if (status) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 481d22c32a2e..f1c75957ff3c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -429,6 +429,8 @@ void __init bootmem_init(void) arm64_hugetlb_cma_reserve(); #endif + dma_pernuma_cma_reserve(); + /* * sparse_init() tries to allocate memory from memblock, so must be * done after the fixed reservations diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 5b4ec80bf586..513ba0c5d336 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_HAS_DMA_MARK_CLEAN select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ACPI @@ -32,8 +33,6 @@ config IA64 select TTY select HAVE_ARCH_TRACEHOOK select HAVE_VIRT_CPU_ACCOUNTING - select DMA_NONCOHERENT_MMAP - select ARCH_HAS_SYNC_DMA_FOR_CPU select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 656a4888c300..b49b73a95067 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -485,8 +485,7 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev, ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); - boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1; - boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift; + boundary_size = dma_get_seg_boundary_nr_pages(dev, iovp_shift); BUG_ON(ioc->ibase & ~iovp_mask); shift = ioc->ibase >> iovp_shift; diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 09ef9ce9988d..f640ed6fe1d5 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/dma-direct.h> +#include <linux/dma-mapping.h> #include <linux/export.h> /* Set this to 1 if there is a HW IOMMU in the system */ @@ -7,15 +7,3 @@ int iommu_detected __read_mostly; const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); - -void *arch_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) -{ - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); -} - -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); -} diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 0b3fb4c7af29..02e5aa08294e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -73,8 +73,7 @@ __ia64_sync_icache_dcache (pte_t pte) * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to * flush them when they get mapped into an executable vm-area. */ -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void arch_dma_mark_clean(phys_addr_t paddr, size_t size) { unsigned long pfn = PHYS_PFN(paddr); diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c index df56bf4179e3..49061b870680 100644 --- a/arch/mips/bmips/dma.c +++ b/arch/mips/bmips/dma.c @@ -40,7 +40,7 @@ static struct bmips_dma_range *bmips_dma_ranges; #define FLUSH_RAC 0x100 -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t pa) { struct bmips_dma_range *r; @@ -52,7 +52,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa) return pa; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { struct bmips_dma_range *r; diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index 14ea680d180e..232fa1017b1e 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -168,7 +168,7 @@ void __init octeon_pci_dma_init(void) } #endif /* CONFIG_PCI */ -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { #ifdef CONFIG_PCI if (dev && dev_is_pci(dev)) @@ -177,7 +177,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { #ifdef CONFIG_PCI if (dev && dev_is_pci(dev)) diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h index 14e352651ce9..9a640118316c 100644 --- a/arch/mips/include/asm/dma-direct.h +++ b/arch/mips/include/asm/dma-direct.h @@ -2,7 +2,7 @@ #ifndef _MIPS_DMA_DIRECT_H #define _MIPS_DMA_DIRECT_H 1 -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); #endif /* _MIPS_DMA_DIRECT_H */ diff --git a/arch/mips/include/asm/jazzdma.h b/arch/mips/include/asm/jazzdma.h index d13f940022d5..c831da7fa898 100644 --- a/arch/mips/include/asm/jazzdma.h +++ b/arch/mips/include/asm/jazzdma.h @@ -10,8 +10,6 @@ */ extern unsigned long vdma_alloc(unsigned long paddr, unsigned long size); extern int vdma_free(unsigned long laddr); -extern int vdma_remap(unsigned long laddr, unsigned long paddr, - unsigned long size); extern unsigned long vdma_phys2log(unsigned long paddr); extern unsigned long vdma_log2phys(unsigned long laddr); extern void vdma_stats(void); /* for debugging only */ diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 014773f0bfcd..dab4d058cea9 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -16,7 +16,6 @@ #include <linux/memblock.h> #include <linux/spinlock.h> #include <linux/gfp.h> -#include <linux/dma-direct.h> #include <linux/dma-noncoherent.h> #include <asm/mipsregs.h> #include <asm/jazz.h> @@ -210,76 +209,6 @@ int vdma_free(unsigned long laddr) EXPORT_SYMBOL(vdma_free); /* - * Map certain page(s) to another physical address. - * Caller must have allocated the page(s) before. - */ -int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) -{ - int first, pages; - - if (laddr > 0xffffff) { - if (vdma_debug) - printk - ("vdma_map: Invalid logical address: %08lx\n", - laddr); - return -EINVAL; /* invalid logical address */ - } - if (paddr > 0x1fffffff) { - if (vdma_debug) - printk - ("vdma_map: Invalid physical address: %08lx\n", - paddr); - return -EINVAL; /* invalid physical address */ - } - - pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; - first = laddr >> 12; - if (vdma_debug) - printk("vdma_remap: first=%x, pages=%x\n", first, pages); - if (first + pages > VDMA_PGTBL_ENTRIES) { - if (vdma_debug) - printk("vdma_alloc: Invalid size: %08lx\n", size); - return -EINVAL; - } - - paddr &= ~(VDMA_PAGESIZE - 1); - while (pages > 0 && first < VDMA_PGTBL_ENTRIES) { - if (pgtbl[first].owner != laddr) { - if (vdma_debug) - printk("Trying to remap other's pages.\n"); - return -EPERM; /* not owner */ - } - pgtbl[first].frame = paddr; - paddr += VDMA_PAGESIZE; - first++; - pages--; - } - - /* - * Update translation table - */ - r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0); - - if (vdma_debug > 2) { - int i; - pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; - first = laddr >> 12; - printk("LADDR: "); - for (i = first; i < first + pages; i++) - printk("%08x ", i << 12); - printk("\nPADDR: "); - for (i = first; i < first + pages; i++) - printk("%08x ", pgtbl[i].frame); - printk("\nOWNER: "); - for (i = first; i < first + pages; i++) - printk("%08x ", pgtbl[i].owner); - printk("\n"); - } - - return 0; -} - -/* * Translate a physical address to a logical address. * This will return the logical address of the first * match. @@ -562,26 +491,39 @@ int vdma_get_enable(int channel) static void *jazz_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { + struct page *page; void *ret; - ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - if (!ret) - return NULL; + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; - *dma_handle = vdma_alloc(virt_to_phys(ret), size); - if (*dma_handle == DMA_MAPPING_ERROR) { - dma_direct_free_pages(dev, size, ret, *dma_handle, attrs); + size = PAGE_ALIGN(size); + page = alloc_pages(gfp, get_order(size)); + if (!page) return NULL; - } + ret = page_address(page); + memset(ret, 0, size); + *dma_handle = vdma_alloc(virt_to_phys(ret), size); + if (*dma_handle == DMA_MAPPING_ERROR) + goto out_free_pages; + + if (attrs & DMA_ATTR_NON_CONSISTENT) + return ret; + arch_dma_prep_coherent(page, size); + return (void *)(UNCAC_BASE + __pa(ret)); - return ret; +out_free_pages: + __free_pages(page, get_order(size)); + return NULL; } static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { vdma_free(dma_handle); - dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) + vaddr = __va(vaddr - UNCAC_BASE); + __free_pages(virt_to_page(vaddr), get_order(size)); } static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page, @@ -678,7 +620,6 @@ const struct dma_map_ops jazz_dma_ops = { .sync_single_for_device = jazz_dma_sync_single_for_device, .sync_sg_for_cpu = jazz_dma_sync_sg_for_cpu, .sync_sg_for_device = jazz_dma_sync_sg_for_device, - .dma_supported = dma_direct_supported, .cache_sync = arch_dma_cache_sync, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, diff --git a/arch/mips/loongson2ef/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c index e122292bf666..cea167d8aba8 100644 --- a/arch/mips/loongson2ef/fuloong-2e/dma.c +++ b/arch/mips/loongson2ef/fuloong-2e/dma.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/dma-direct.h> -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr | 0x80000000; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr & 0x7fffffff; } diff --git a/arch/mips/loongson2ef/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c index abf0e39d7e46..3c9e99456357 100644 --- a/arch/mips/loongson2ef/lemote-2f/dma.c +++ b/arch/mips/loongson2ef/lemote-2f/dma.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/dma-direct.h> -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr | 0x80000000; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { if (dma_addr > 0x8fffffff) return dma_addr; diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index dbfe6e82fddd..364f2f27c872 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -4,7 +4,7 @@ #include <linux/swiotlb.h> #include <boot_param.h> -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ @@ -13,7 +13,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return ((nid << 44) ^ paddr) | (nid << node_id_offset); } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 563c2c0d0c81..97a14adbafc9 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -55,22 +55,34 @@ void *arch_dma_set_uncached(void *addr, size_t size) return (void *)(__pa(addr) + UNCAC_BASE); } -static inline void dma_sync_virt(void *addr, size_t size, +static inline void dma_sync_virt_for_device(void *addr, size_t size, enum dma_data_direction dir) { switch (dir) { case DMA_TO_DEVICE: dma_cache_wback((unsigned long)addr, size); break; - case DMA_FROM_DEVICE: dma_cache_inv((unsigned long)addr, size); break; - case DMA_BIDIRECTIONAL: dma_cache_wback_inv((unsigned long)addr, size); break; + default: + BUG(); + } +} +static inline void dma_sync_virt_for_cpu(void *addr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv((unsigned long)addr, size); + break; default: BUG(); } @@ -82,7 +94,7 @@ static inline void dma_sync_virt(void *addr, size_t size, * configured then the bulk of this loop gets optimized out. */ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, bool for_device) { struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); unsigned long offset = paddr & ~PAGE_MASK; @@ -90,18 +102,20 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, do { size_t len = left; + void *addr; if (PageHighMem(page)) { - void *addr; - if (offset + len > PAGE_SIZE) len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + if (for_device) + dma_sync_virt_for_device(addr + offset, len, dir); + else + dma_sync_virt_for_cpu(addr + offset, len, dir); + kunmap_atomic(addr); - addr = kmap_atomic(page); - dma_sync_virt(addr + offset, len, dir); - kunmap_atomic(addr); - } else - dma_sync_virt(page_address(page) + offset, size, dir); offset = 0; page++; left -= len; @@ -111,7 +125,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir, true); } #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU @@ -119,16 +133,14 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { if (cpu_needs_post_dma_flush()) - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir, false); } #endif void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { - BUG_ON(direction == DMA_NONE); - - dma_sync_virt(vaddr, size, direction); + dma_sync_virt_for_device(vaddr, size, direction); } #ifdef CONFIG_DMA_PERDEV_COHERENT diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 490953f51528..cef4a47ab063 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -170,12 +170,12 @@ static inline dma_addr_t ar2315_dev_offset(struct device *dev) return 0; } -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + ar2315_dev_offset(dev); } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr - ar2315_dev_offset(dev); } diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 9b3cc775c55e..50f7d42cca5a 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -25,7 +25,7 @@ /* * Common phys<->dma mapping for platforms using pci xtalk bridge */ -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { struct pci_dev *pdev = to_pci_dev(dev); struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus); @@ -33,7 +33,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return bc->baddr + paddr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr & ~(0xffUL << 56); } diff --git a/arch/mips/sgi-ip32/ip32-dma.c b/arch/mips/sgi-ip32/ip32-dma.c index fa7b17cb5385..20c6da9d76bc 100644 --- a/arch/mips/sgi-ip32/ip32-dma.c +++ b/arch/mips/sgi-ip32/ip32-dma.c @@ -18,7 +18,7 @@ #define RAM_OFFSET_MASK 0x3fffffffUL -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { dma_addr_t dma_addr = paddr & RAM_OFFSET_MASK; @@ -27,7 +27,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return dma_addr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { phys_addr_t paddr = dma_addr & RAM_OFFSET_MASK; diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index abc154d784b0..128304cbee1d 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -2,12 +2,12 @@ #ifndef ASM_POWERPC_DMA_DIRECT_H #define ASM_POWERPC_DMA_DIRECT_H 1 -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + dev->archdata.dma_offset; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { return daddr - dev->archdata.dma_offset; } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 9704f3f76e63..5b69a6a72a0e 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -172,7 +172,6 @@ static unsigned long iommu_range_alloc(struct device *dev, int largealloc = npages > 15; int pass = 0; unsigned long align_mask; - unsigned long boundary_size; unsigned long flags; unsigned int pool_nr; struct iommu_pool *pool; @@ -236,15 +235,9 @@ again: } } - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << tbl->it_page_shift); - else - boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift); - /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ - n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset, - boundary_size >> tbl->it_page_shift, align_mask); + dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift), + align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 64b1399a73f0..4a37d8f4de9d 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -261,13 +261,11 @@ static unsigned long __dma_alloc_iommu(struct device *dev, unsigned long start, int size) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, zdev->start_dma >> PAGE_SHIFT, - boundary_size, 0); + dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), + 0); } static dma_addr_t dma_alloc_address(struct device *dev, int size) diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c index e0b568aaa701..4468289ab2ca 100644 --- a/arch/sh/drivers/pci/pcie-sh7786.c +++ b/arch/sh/drivers/pci/pcie-sh7786.c @@ -12,6 +12,7 @@ #include <linux/io.h> #include <linux/async.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/clk.h> #include <linux/sh_clk.h> @@ -31,6 +32,8 @@ struct sh7786_pcie_port { static struct sh7786_pcie_port *sh7786_pcie_ports; static unsigned int nr_ports; static unsigned long dma_pfn_offset; +size_t memsize; +u64 memstart; static struct sh7786_pcie_hwops { int (*core_init)(void); @@ -301,7 +304,6 @@ static int __init pcie_init(struct sh7786_pcie_port *port) struct pci_channel *chan = port->hose; unsigned int data; phys_addr_t memstart, memend; - size_t memsize; int ret, i, win; /* Begin initialization */ @@ -368,8 +370,6 @@ static int __init pcie_init(struct sh7786_pcie_port *port) memstart = ALIGN_DOWN(memstart, memsize); memsize = roundup_pow_of_two(memend - memstart); - dma_pfn_offset = memstart >> PAGE_SHIFT; - /* * If there's more than 512MB of memory, we need to roll over to * LAR1/LAMR1. @@ -487,7 +487,8 @@ int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) void pcibios_bus_add_device(struct pci_dev *pdev) { - pdev->dev.dma_pfn_offset = dma_pfn_offset; + dma_direct_set_offset(&pdev->dev, __pa(memory_start), + __pa(memory_start) - memstart, memsize); } static int __init sh7786_pcie_core_init(void) diff --git a/arch/sparc/kernel/iommu-common.c b/arch/sparc/kernel/iommu-common.c index 59cb16691322..23ca75f09277 100644 --- a/arch/sparc/kernel/iommu-common.c +++ b/arch/sparc/kernel/iommu-common.c @@ -166,13 +166,6 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, } } - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << iommu->table_shift); - else - boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); - - boundary_size = boundary_size >> iommu->table_shift; /* * if the skip_span_boundary_check had been set during init, we set * things up so that iommu_is_span_boundary() merely checks if the @@ -181,6 +174,9 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { shift = 0; boundary_size = iommu->poolsize * iommu->nr_pools; + } else { + boundary_size = dma_get_seg_boundary_nr_pages(dev, + iommu->table_shift); } n = iommu_area_alloc(iommu->map, limit, start, npages, shift, boundary_size, align_mask); diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 4ae7388b1bff..c3e4e2df26a8 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -472,8 +472,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, outs->dma_length = 0; max_seg_size = dma_get_max_seg_size(dev); - seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + seg_boundary_size = dma_get_seg_boundary_nr_pages(dev, IO_PAGE_SHIFT); base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 14b93c5564e3..6b92dd51c002 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -508,8 +508,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, iommu_batch_start(dev, prot, ~0UL); max_seg_size = dma_get_max_seg_size(dev); - seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + seg_boundary_size = dma_get_seg_boundary_nr_pages(dev, IO_PAGE_SHIFT); mask = *dev->dma_mask; if (!iommu_use_atu(iommu, mask)) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e89031e9c847..153374b996a2 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -96,8 +96,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT); spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, @@ -468,7 +467,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, { void *vaddr; - vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs); + vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); if (!vaddr || !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) return vaddr; @@ -480,7 +479,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, goto out_free; return vaddr; out_free: - dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs); + dma_direct_free(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -490,7 +489,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static int no_agp; diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index c313d784efab..324a207f9995 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -133,7 +133,7 @@ static void sta2x11_map_ep(struct pci_dev *pdev) struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev); struct device *dev = &pdev->dev; u32 amba_base, max_amba_addr; - int i; + int i, ret; if (!instance) return; @@ -141,7 +141,9 @@ static void sta2x11_map_ep(struct pci_dev *pdev) pci_read_config_dword(pdev, AHB_BASE(0), &amba_base); max_amba_addr = amba_base + STA2X11_AMBA_SIZE - 1; - dev->dma_pfn_offset = PFN_DOWN(-amba_base); + ret = dma_direct_set_offset(dev, 0, amba_base, STA2X11_AMBA_SIZE); + if (ret) + dev_err(dev, "sta2x11: could not set DMA offset\n"); dev->bus_dma_limit = max_amba_addr; pci_set_consistent_dma_mask(pdev, max_amba_addr); diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index ec782e4a0fe4..de18c07ca02c 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -18,6 +18,7 @@ #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/dma-mapping.h> #define IORT_TYPE_MASK(type) (1 << (type)) #define IORT_MSI_TYPE (1 << ACPI_IORT_NODE_ITS_GROUP) @@ -1184,8 +1185,9 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) *dma_addr = dmaaddr; *dma_size = size; - dev->dma_pfn_offset = PFN_DOWN(offset); - dev_dbg(dev, "dma_pfn_offset(%#08llx)\n", offset); + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/base/core.c b/drivers/base/core.c index bb5806a2bd4c..d00ff3ec8f0f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1788,6 +1788,8 @@ static void device_release(struct kobject *kobj) */ devres_release_all(dev); + kfree(dev->dma_range_map); + if (dev->release) dev->release(dev); else if (dev->type && dev->type->release) diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index ed5d86617802..9cf6473032f7 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/of_device.h> #include <linux/of_graph.h> +#include <linux/dma-mapping.h> #include <linux/platform_device.h> #include <linux/reset.h> @@ -810,8 +811,13 @@ static int sun4i_backend_bind(struct device *dev, struct device *master, * because of an old DT, we need to set the DMA offset by hand * on our device since the RAM mapping is at 0 for the DMA bus, * unlike the CPU. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. */ - drm->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + ret = dma_direct_set_offset(drm->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; } backend->engine.node = dev->of_node; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 87b17bac04c2..ed5c57e96e8b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3779,7 +3779,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, */ if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { tlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), + phys_to_dma_unencrypted(dev, io_tlb_start), paddr, size, aligned_size, dir, attrs); if (tlb_addr == DMA_MAPPING_ERROR) { goto swiotlb_error; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index dc7bcf858b6d..f87cbb822a3d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -751,11 +751,6 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) return NULL; - if (!selftest_running && cfg->iommu_dev->dma_pfn_offset) { - dev_err(cfg->iommu_dev, "Cannot accommodate DMA offset for IOMMU page tables\n"); - return NULL; - } - data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c index 5319eb1ab309..307997ee7f96 100644 --- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c +++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c @@ -7,6 +7,7 @@ */ #include <linux/clk.h> +#include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/mutex.h> @@ -182,8 +183,14 @@ static int sun4i_csi_probe(struct platform_device *pdev) if (ret) return ret; } else { + /* + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. + */ #ifdef PHYS_PFN_OFFSET - csi->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + ret = dma_direct_set_offset(csi->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; #endif } diff --git a/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c b/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c index 28e89340fed9..e69e14379fc6 100644 --- a/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c +++ b/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c @@ -899,8 +899,15 @@ static int sun6i_csi_probe(struct platform_device *pdev) return -ENOMEM; sdev->dev = &pdev->dev; - /* The DMA bus has the memory mapped at 0 */ - sdev->dev->dma_pfn_offset = PHYS_OFFSET >> PAGE_SHIFT; + /* + * The DMA bus has the memory mapped at 0. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. + */ + ret = dma_direct_set_offset(sdev->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; ret = sun6i_csi_resource_request(sdev, pdev); if (ret) diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index b9bb086785db..8a7c2c5711d5 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -35,6 +35,7 @@ config SCIF_BUS config VOP_BUS tristate "VOP Bus Driver" + depends on HAS_DMA select DMA_OPS help This option is selected by any driver which registers a diff --git a/drivers/of/address.c b/drivers/of/address.c index da4f7341323f..eb9ab4f1e80b 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -13,6 +13,7 @@ #include <linux/sizes.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/dma-direct.h> /* for bus_dma_region */ #include "of_private.h" @@ -937,33 +938,33 @@ void __iomem *of_io_request_and_map(struct device_node *np, int index, } EXPORT_SYMBOL(of_io_request_and_map); +#ifdef CONFIG_HAS_DMA /** - * of_dma_get_range - Get DMA range info + * of_dma_get_range - Get DMA range info and put it into a map array * @np: device node to get DMA range info - * @dma_addr: pointer to store initial DMA address of DMA range - * @paddr: pointer to store initial CPU address of DMA range - * @size: pointer to store size of DMA range + * @map: dma range structure to return * * Look in bottom up direction for the first "dma-ranges" property - * and parse it. - * dma-ranges format: + * and parse it. Put the information into a DMA offset map array. + * + * dma-ranges format: * DMA addr (dma_addr) : naddr cells * CPU addr (phys_addr_t) : pna cells * size : nsize cells * - * It returns -ENODEV if "dma-ranges" property was not found - * for this device in DT. + * It returns -ENODEV if "dma-ranges" property was not found for this + * device in the DT. */ -int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size) +int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) { struct device_node *node = of_node_get(np); const __be32 *ranges = NULL; - int len; - int ret = 0; bool found_dma_ranges = false; struct of_range_parser parser; struct of_range range; - u64 dma_start = U64_MAX, dma_end = 0, dma_offset = 0; + struct bus_dma_region *r; + int len, num_ranges = 0; + int ret = 0; while (node) { ranges = of_get_property(node, "dma-ranges", &len); @@ -989,49 +990,39 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz } of_dma_range_parser_init(&parser, node); + for_each_of_range(&parser, &range) + num_ranges++; + + r = kcalloc(num_ranges + 1, sizeof(*r), GFP_KERNEL); + if (!r) { + ret = -ENOMEM; + goto out; + } + /* + * Record all info in the generic DMA ranges array for struct device. + */ + *map = r; + of_dma_range_parser_init(&parser, node); for_each_of_range(&parser, &range) { pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", range.bus_addr, range.cpu_addr, range.size); - - if (dma_offset && range.cpu_addr - range.bus_addr != dma_offset) { - pr_warn("Can't handle multiple dma-ranges with different offsets on node(%pOF)\n", node); - /* Don't error out as we'd break some existing DTs */ - continue; - } if (range.cpu_addr == OF_BAD_ADDR) { pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", range.bus_addr, node); continue; } - dma_offset = range.cpu_addr - range.bus_addr; - - /* Take lower and upper limits */ - if (range.bus_addr < dma_start) - dma_start = range.bus_addr; - if (range.bus_addr + range.size > dma_end) - dma_end = range.bus_addr + range.size; - } - - if (dma_start >= dma_end) { - ret = -EINVAL; - pr_debug("Invalid DMA ranges configuration on node(%pOF)\n", - node); - goto out; + r->cpu_start = range.cpu_addr; + r->dma_start = range.bus_addr; + r->size = range.size; + r->offset = range.cpu_addr - range.bus_addr; + r++; } - - *dma_addr = dma_start; - *size = dma_end - dma_start; - *paddr = dma_start + dma_offset; - - pr_debug("final: dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", - *dma_addr, *paddr, *size); - out: of_node_put(node); - return ret; } +#endif /* CONFIG_HAS_DMA */ /** * of_dma_is_coherent - Check if device is coherent diff --git a/drivers/of/device.c b/drivers/of/device.c index b439c1e05434..6e3ae7ebc33e 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -5,7 +5,7 @@ #include <linux/of_device.h> #include <linux/of_address.h> #include <linux/of_iommu.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> /* for bus_dma_region */ #include <linux/init.h> #include <linux/module.h> #include <linux/mod_devicetable.h> @@ -90,14 +90,14 @@ int of_device_add(struct platform_device *ofdev) int of_dma_configure_id(struct device *dev, struct device_node *np, bool force_dma, const u32 *id) { - u64 dma_addr, paddr, size = 0; - int ret; - bool coherent; - unsigned long offset; const struct iommu_ops *iommu; - u64 mask, end; + const struct bus_dma_region *map = NULL; + dma_addr_t dma_start = 0; + u64 mask, end, size = 0; + bool coherent; + int ret; - ret = of_dma_get_range(np, &dma_addr, &paddr, &size); + ret = of_dma_get_range(np, &map); if (ret < 0) { /* * For legacy reasons, we have to assume some devices need @@ -106,26 +106,35 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, */ if (!force_dma) return ret == -ENODEV ? 0 : ret; - - dma_addr = offset = 0; } else { - offset = PFN_DOWN(paddr - dma_addr); + const struct bus_dma_region *r = map; + dma_addr_t dma_end = 0; + + /* Determine the overall bounds of all DMA regions */ + for (dma_start = ~(dma_addr_t)0; r->size; r++) { + /* Take lower and upper limits */ + if (r->dma_start < dma_start) + dma_start = r->dma_start; + if (r->dma_start + r->size > dma_end) + dma_end = r->dma_start + r->size; + } + size = dma_end - dma_start; /* * Add a work around to treat the size as mask + 1 in case * it is defined in DT as a mask. */ if (size & 1) { - dev_warn(dev, "Invalid size 0x%llx for dma-range\n", + dev_warn(dev, "Invalid size 0x%llx for dma-range(s)\n", size); size = size + 1; } if (!size) { dev_err(dev, "Adjusted size 0x%llx invalid\n", size); + kfree(map); return -EINVAL; } - dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset); } /* @@ -144,13 +153,11 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, else if (!size) size = 1ULL << 32; - dev->dma_pfn_offset = offset; - /* * Limit coherent and dma mask based on size and default mask * set by the driver. */ - end = dma_addr + size - 1; + end = dma_start + size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; @@ -163,14 +170,17 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, coherent ? " " : " not "); iommu = of_iommu_configure(dev, np, id); - if (PTR_ERR(iommu) == -EPROBE_DEFER) + if (PTR_ERR(iommu) == -EPROBE_DEFER) { + kfree(map); return -EPROBE_DEFER; + } dev_dbg(dev, "device is%sbehind an iommu\n", iommu ? " " : " not "); - arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent); + arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); + dev->dma_range_map = map; return 0; } EXPORT_SYMBOL_GPL(of_dma_configure_id); diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index edc682249c00..d9e6a324de0a 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -157,12 +157,13 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np, extern int of_bus_n_addr_cells(struct device_node *np); extern int of_bus_n_size_cells(struct device_node *np); -#ifdef CONFIG_OF_ADDRESS -extern int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size); +struct bus_dma_region; +#if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA) +int of_dma_get_range(struct device_node *np, + const struct bus_dma_region **map); #else -static inline int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size) +static inline int of_dma_get_range(struct device_node *np, + const struct bus_dma_region **map) { return -ENODEV; } diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 9b7e84bdc7d4..06cc988faf78 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -7,6 +7,7 @@ #include <linux/memblock.h> #include <linux/clk.h> +#include <linux/dma-direct.h> /* to test phys_to_dma/dma_to_phys */ #include <linux/err.h> #include <linux/errno.h> #include <linux/hashtable.h> @@ -869,10 +870,11 @@ static void __init of_unittest_changeset(void) } static void __init of_unittest_dma_ranges_one(const char *path, - u64 expect_dma_addr, u64 expect_paddr, u64 expect_size) + u64 expect_dma_addr, u64 expect_paddr) { +#ifdef CONFIG_HAS_DMA struct device_node *np; - u64 dma_addr, paddr, size; + const struct bus_dma_region *map = NULL; int rc; np = of_find_node_by_path(path); @@ -881,28 +883,40 @@ static void __init of_unittest_dma_ranges_one(const char *path, return; } - rc = of_dma_get_range(np, &dma_addr, &paddr, &size); + rc = of_dma_get_range(np, &map); unittest(!rc, "of_dma_get_range failed on node %pOF rc=%i\n", np, rc); + if (!rc) { - unittest(size == expect_size, - "of_dma_get_range wrong size on node %pOF size=%llx\n", np, size); + phys_addr_t paddr; + dma_addr_t dma_addr; + struct device dev_bogus; + + dev_bogus.dma_range_map = map; + paddr = dma_to_phys(&dev_bogus, expect_dma_addr); + dma_addr = phys_to_dma(&dev_bogus, expect_paddr); + unittest(paddr == expect_paddr, - "of_dma_get_range wrong phys addr (%llx) on node %pOF", paddr, np); + "of_dma_get_range: wrong phys addr %pap (expecting %llx) on node %pOF\n", + &paddr, expect_paddr, np); unittest(dma_addr == expect_dma_addr, - "of_dma_get_range wrong DMA addr (%llx) on node %pOF", dma_addr, np); + "of_dma_get_range: wrong DMA addr %pad (expecting %llx) on node %pOF\n", + &dma_addr, expect_dma_addr, np); + + kfree(map); } of_node_put(np); +#endif } static void __init of_unittest_parse_dma_ranges(void) { of_unittest_dma_ranges_one("/testcase-data/address-tests/device@70000000", - 0x0, 0x20000000, 0x40000000); + 0x0, 0x20000000); of_unittest_dma_ranges_one("/testcase-data/address-tests/bus@80000000/device@1000", - 0x100000000, 0x20000000, 0x2000000000); + 0x100000000, 0x20000000); of_unittest_dma_ranges_one("/testcase-data/address-tests/pci@90000000", - 0x80000000, 0x20000000, 0x10000000); + 0x80000000, 0x20000000); } static void __init of_unittest_pci_dma_ranges(void) diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index a5507f75b524..ba16b7f8f806 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -356,8 +356,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size) ** ggg sacrifices another 710 to the computer gods. */ - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, - 1ULL << IOVP_SHIFT) >> IOVP_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, IOVP_SHIFT); if (pages_needed <= 8) { /* diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index d4314fba0269..959bda193b96 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -342,8 +342,7 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev, unsigned long shift; int ret; - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, - 1ULL << IOVP_SHIFT) >> IOVP_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, IOVP_SHIFT); #if defined(ZX1_SUPPORT) BUG_ON(ioc->ibase & ~IOVP_MASK); diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 09d06b082f8b..72114907c0e4 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -516,7 +516,7 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, p_dev->dev.parent = s->dev.parent; p_dev->dev.release = pcmcia_release_dev; /* by default don't allow DMA */ - p_dev->dma_mask = DMA_MASK_NONE; + p_dev->dma_mask = 0; p_dev->dev.dma_mask = &p_dev->dma_mask; dev_set_name(&p_dev->dev, "%d.%d", p_dev->socket->sock, p_dev->device_no); if (!dev_name(&p_dev->dev)) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 7f90eeea67e2..8157dd491d28 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/dma-mapping.h> +#include <linux/dma-direct.h> /* XXX: pokes into bus_dma_range */ #include <linux/firmware.h> #include <linux/string.h> #include <linux/debugfs.h> @@ -458,6 +459,25 @@ static void rproc_rvdev_release(struct device *dev) kfree(rvdev); } +static int copy_dma_range_map(struct device *to, struct device *from) +{ + const struct bus_dma_region *map = from->dma_range_map, *new_map, *r; + int num_ranges = 0; + + if (!map) + return 0; + + for (r = map; r->size; r++) + num_ranges++; + + new_map = kmemdup(map, array_size(num_ranges + 1, sizeof(*map)), + GFP_KERNEL); + if (!new_map) + return -ENOMEM; + to->dma_range_map = new_map; + return 0; +} + /** * rproc_handle_vdev() - handle a vdev fw resource * @rproc: the remote processor @@ -529,7 +549,9 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc, /* Initialise vdev subdevice */ snprintf(name, sizeof(name), "vdev%dbuffer", rvdev->index); rvdev->dev.parent = &rproc->dev; - rvdev->dev.dma_pfn_offset = rproc->dev.parent->dma_pfn_offset; + ret = copy_dma_range_map(&rvdev->dev, rproc->dev.parent); + if (ret) + return ret; rvdev->dev.release = rproc_rvdev_release; dev_set_name(&rvdev->dev, "%s#%s", dev_name(rvdev->dev.parent), name); dev_set_drvdata(&rvdev->dev, rvdev); diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index 1744e6fcc999..bcf050a04ffc 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c @@ -227,11 +227,17 @@ int cedrus_hw_probe(struct cedrus_dev *dev) * the RAM offset to the physcal addresses. * * This information will eventually be obtained from device-tree. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. */ #ifdef PHYS_PFN_OFFSET - if (!(variant->quirks & CEDRUS_QUIRK_NO_DMA_OFFSET)) - dev->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + if (!(variant->quirks & CEDRUS_QUIRK_NO_DMA_OFFSET)) { + ret = dma_direct_set_offset(dev->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; + } #endif ret = of_reserved_mem_device_init(dev->dev); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index ae1de9cc4b09..7d6bbbf4a916 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1947,12 +1947,6 @@ free_interfaces: intf->dev.bus = &usb_bus_type; intf->dev.type = &usb_if_device_type; intf->dev.groups = usb_interface_groups; - /* - * Please refer to usb_alloc_dev() to see why we set - * dma_mask and dma_pfn_offset. - */ - intf->dev.dma_mask = dev->dev.dma_mask; - intf->dev.dma_pfn_offset = dev->dev.dma_pfn_offset; INIT_WORK(&intf->reset_ws, __usb_queue_reset_device); intf->minor = -1; device_initialize(&intf->dev); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index bafc113f2b3e..9b4ac4415f1a 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -599,18 +599,6 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, dev->dev.bus = &usb_bus_type; dev->dev.type = &usb_device_type; dev->dev.groups = usb_device_groups; - /* - * Fake a dma_mask/offset for the USB device: - * We cannot really use the dma-mapping API (dma_alloc_* and - * dma_map_*) for USB devices but instead need to use - * usb_alloc_coherent and pass data in 'urb's, but some subsystems - * manually look into the mask/offset pair to determine whether - * they need bounce buffers. - * Note: calling dma_set_mask() on a USB device would set the - * mask for the entire HCD, so don't do that. - */ - dev->dev.dma_mask = bus->sysdev->dma_mask; - dev->dev.dma_pfn_offset = bus->sysdev->dma_pfn_offset; set_dev_node(&dev->dev, dev_to_node(bus->sysdev)); dev->state = USB_STATE_ATTACHED; dev->lpm_disable_count = 1; diff --git a/include/linux/cma.h b/include/linux/cma.h index 6ff79fefd01f..217999c8a762 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -18,6 +18,8 @@ #endif +#define CMA_MAX_NAME 64 + struct cma; extern unsigned long totalcma_pages; diff --git a/include/linux/device.h b/include/linux/device.h index 9e6ea8931a52..f85163701322 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -467,7 +467,7 @@ struct dev_links_info { * such descriptors. * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller * DMA limit than the device itself supports. - * @dma_pfn_offset: offset of DMA memory range relatively of RAM + * @dma_range_map: map for DMA memory ranges relative to that of RAM * @dma_parms: A low level driver may set these to teach IOMMU code about * segment limitations. * @dma_pools: Dma pools (if dma'ble device). @@ -562,7 +562,7 @@ struct device { 64 bit addresses for consistent allocations such descriptors. */ u64 bus_dma_limit; /* upstream dma constraint */ - unsigned long dma_pfn_offset; + const struct bus_dma_region *dma_range_map; struct device_dma_parameters *dma_parms; diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 03f8e98e3bcc..fe55e004f1f4 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -171,6 +171,12 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, #endif +#ifdef CONFIG_DMA_PERNUMA_CMA +void dma_pernuma_cma_reserve(void); +#else +static inline void dma_pernuma_cma_reserve(void) { } +#endif + #endif #endif diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 6e87225600ae..83f797e0cb78 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -14,57 +14,95 @@ extern unsigned int zone_dma_bits; -#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA -#include <asm/dma-direct.h> -#else -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +/* + * Record the mapping of CPU physical to DMA addresses for a given region. + */ +struct bus_dma_region { + phys_addr_t cpu_start; + dma_addr_t dma_start; + u64 size; + u64 offset; +}; + +static inline dma_addr_t translate_phys_to_dma(struct device *dev, + phys_addr_t paddr) { - dma_addr_t dev_addr = (dma_addr_t)paddr; + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; m->size; m++) + if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size) + return (dma_addr_t)paddr - m->offset; - return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + /* make sure dma_capable fails when no translation is available */ + return DMA_MAPPING_ERROR; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t translate_dma_to_phys(struct device *dev, + dma_addr_t dma_addr) { - phys_addr_t paddr = (phys_addr_t)dev_addr; + const struct bus_dma_region *m; - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + for (m = dev->dma_range_map; m->size; m++) + if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size) + return (phys_addr_t)dma_addr + m->offset; + + return (phys_addr_t)-1; } -#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ -#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED -bool force_dma_unencrypted(struct device *dev); +#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA +#include <asm/dma-direct.h> +#ifndef phys_to_dma_unencrypted +#define phys_to_dma_unencrypted phys_to_dma +#endif #else -static inline bool force_dma_unencrypted(struct device *dev) +static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, + phys_addr_t paddr) { - return false; + if (dev->dma_range_map) + return translate_phys_to_dma(dev, paddr); + return paddr; } -#endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */ /* * If memory encryption is supported, phys_to_dma will set the memory encryption - * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma - * and __dma_to_phys versions should only be used on non-encrypted memory for - * special occasions like DMA coherent buffers. + * bit in the DMA address, and dma_to_phys will clear it. + * phys_to_dma_unencrypted is for use on special unencrypted memory like swiotlb + * buffers. */ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return __sme_set(__phys_to_dma(dev, paddr)); + return __sme_set(phys_to_dma_unencrypted(dev, paddr)); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { - return __sme_clr(__dma_to_phys(dev, daddr)); + phys_addr_t paddr; + + if (dev->dma_range_map) + paddr = translate_dma_to_phys(dev, dma_addr); + else + paddr = dma_addr; + + return __sme_clr(paddr); } +#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ + +#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED +bool force_dma_unencrypted(struct device *dev); +#else +static inline bool force_dma_unencrypted(struct device *dev) +{ + return false; +} +#endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, bool is_ram) { dma_addr_t end = addr + size - 1; - if (!dev->dma_mask) + if (addr == DMA_MAPPING_ERROR) return false; - if (is_ram && !IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; @@ -77,10 +115,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -void *dma_direct_alloc_pages(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); @@ -150,6 +184,9 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, size); } static inline dma_addr_t dma_direct_map_page(struct device *dev, diff --git a/include/linux/dma-direction.h b/include/linux/dma-direction.h index 9c96e30e6a0b..a2fe4571bc92 100644 --- a/include/linux/dma-direction.h +++ b/include/linux/dma-direction.h @@ -9,4 +9,10 @@ enum dma_data_direction { DMA_NONE = 3, }; -#endif +static inline int valid_dma_direction(enum dma_data_direction dir) +{ + return dir == DMA_BIDIRECTIONAL || dir == DMA_TO_DEVICE || + dir == DMA_FROM_DEVICE; +} + +#endif /* _LINUX_DMA_DIRECTION_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 52635e91143b..943479fb77f6 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -67,12 +67,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * A dma_addr_t can hold any valid DMA or bus address for the platform. - * It can be given to a device to use as a DMA source or target. A CPU cannot - * reference a dma_addr_t directly because there may be translation between - * its physical address space and the bus address space. - */ struct dma_map_ops { void* (*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, @@ -131,6 +125,16 @@ struct dma_map_ops { unsigned long (*get_merge_boundary)(struct device *dev); }; +/* + * A dma_addr_t can hold any valid DMA or bus address for the platform. It can + * be given to a device to use as a DMA source or target. It is specific to a + * given device and there may be a translation between the CPU physical address + * space and the bus address space. + * + * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not + * be used directly in drivers, but checked for using dma_mapping_error() + * instead. + */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) extern const struct dma_map_ops dma_virt_ops; @@ -138,15 +142,6 @@ extern const struct dma_map_ops dma_dummy_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -#define DMA_MASK_NONE 0x0ULL - -static inline int valid_dma_direction(int dma_direction) -{ - return ((dma_direction == DMA_BIDIRECTIONAL) || - (dma_direction == DMA_TO_DEVICE) || - (dma_direction == DMA_FROM_DEVICE)); -} - #ifdef CONFIG_DMA_DECLARE_COHERENT /* * These three functions are only for dma allocator. @@ -629,7 +624,26 @@ static inline unsigned long dma_get_seg_boundary(struct device *dev) { if (dev->dma_parms && dev->dma_parms->segment_boundary_mask) return dev->dma_parms->segment_boundary_mask; - return DMA_BIT_MASK(32); + return ULONG_MAX; +} + +/** + * dma_get_seg_boundary_nr_pages - return the segment boundary in "page" units + * @dev: device to guery the boundary for + * @page_shift: ilog() of the IOMMU page size + * + * Return the segment boundary in IOMMU page units (which may be different from + * the CPU page size) for the passed in device. + * + * If @dev is NULL a boundary of U32_MAX is assumed, this case is just for + * non-DMA API callers. + */ +static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, + unsigned int page_shift) +{ + if (!dev) + return (U32_MAX >> page_shift) + 1; + return (dma_get_seg_boundary(dev) >> page_shift) + 1; } static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) @@ -711,4 +725,11 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif -#endif +/* + * Legacy interface to set up the dma offset map. Drivers really should not + * actually use it, but we have a few legacy cases left. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size); + +#endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index ca09a4e07d2d..b9bc6c557ea4 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -108,6 +108,14 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ +#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN +void arch_dma_mark_clean(phys_addr_t paddr, size_t size); +#else +static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) +{ +} +#endif /* ARCH_HAS_DMA_MARK_CLEAN */ + void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 847a9d1fa634..281785feb874 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -9,6 +9,7 @@ config HAS_DMA default y config DMA_OPS + depends on HAS_DMA bool # @@ -43,6 +44,12 @@ config ARCH_HAS_DMA_SET_MASK config ARCH_HAS_DMA_WRITE_COMBINE bool +# +# Select if the architectures provides the arch_dma_mark_clean hook +# +config ARCH_HAS_DMA_MARK_CLEAN + bool + config DMA_DECLARE_COHERENT bool @@ -118,6 +125,17 @@ config DMA_CMA If unsure, say "n". if DMA_CMA + +config DMA_PERNUMA_CMA + bool "Enable separate DMA Contiguous Memory Area for each NUMA Node" + default NUMA && ARM64 + help + Enable this option to get pernuma CMA areas so that devices like + ARM64 SMMU can get local memory by DMA coherent APIs. + + You can set the size of pernuma CMA by specifying "cma_pernuma=size" + on the kernel's command line. + comment "Default contiguous memory area size:" config CMA_SIZE_MBYTES diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 32c7c1942bbd..dc755ab68aab 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o +obj-$(CONFIG_DMA_OPS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 2a0c4985f38e..c0685196fb6d 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -7,7 +7,7 @@ #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> struct dma_coherent_mem { void *virt_base; @@ -32,9 +32,8 @@ static inline dma_addr_t dma_get_device_base(struct device *dev, struct dma_coherent_mem * mem) { if (mem->use_dev_dma_pfn_offset) - return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT; - else - return mem->device_base; + return phys_to_dma(dev, PFN_PHYS(mem->pfn_base)); + return mem->device_base; } static int dma_init_coherent_memory(phys_addr_t phys_addr, @@ -107,6 +106,23 @@ static int dma_assign_coherent_memory(struct device *dev, return 0; } +/* + * Declare a region of memory to be handed out by dma_alloc_coherent() when it + * is asked for coherent memory for this device. This shall only be used + * from platform code, usually based on the device tree description. + * + * phys_addr is the CPU physical address to which the memory is currently + * assigned (this will be ioremapped so the CPU can access the region). + * + * device_addr is the DMA address the device needs to be programmed with to + * actually address this memory (this will be handed out as the dma_addr_t in + * dma_alloc_coherent()). + * + * size is the size of the area (must be a multiple of PAGE_SIZE). + * + * As a simplification for the platforms, only *one* such region of memory may + * be declared per device. + */ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size) { diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index cff7e60968b9..f4c150810fd2 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -69,6 +69,19 @@ static int __init early_cma(char *p) } early_param("cma", early_cma); +#ifdef CONFIG_DMA_PERNUMA_CMA + +static struct cma *dma_contiguous_pernuma_area[MAX_NUMNODES]; +static phys_addr_t pernuma_size_bytes __initdata; + +static int __init early_cma_pernuma(char *p) +{ + pernuma_size_bytes = memparse(p, &p); + return 0; +} +early_param("cma_pernuma", early_cma_pernuma); +#endif + #ifdef CONFIG_CMA_SIZE_PERCENTAGE static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) @@ -96,6 +109,34 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif +#ifdef CONFIG_DMA_PERNUMA_CMA +void __init dma_pernuma_cma_reserve(void) +{ + int nid; + + if (!pernuma_size_bytes) + return; + + for_each_online_node(nid) { + int ret; + char name[CMA_MAX_NAME]; + struct cma **cma = &dma_contiguous_pernuma_area[nid]; + + snprintf(name, sizeof(name), "pernuma%d", nid); + ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, + 0, false, name, cma, nid); + if (ret) { + pr_warn("%s: reservation failed: err %d, node %d", __func__, + ret, nid); + continue; + } + + pr_debug("%s: reserved %llu MiB on node %d\n", __func__, + (unsigned long long)pernuma_size_bytes / SZ_1M, nid); + } +} +#endif + /** * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). @@ -228,23 +269,44 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) * @size: Requested allocation size. * @gfp: Allocation flags. * - * This function allocates contiguous memory buffer for specified device. It - * tries to use device specific contiguous memory area if available, or the - * default global one. + * tries to use device specific contiguous memory area if available, or it + * tries to use per-numa cma, if the allocation fails, it will fallback to + * try default global one. * - * Note that it byapss one-page size of allocations from the global area as - * the addresses within one page are always contiguous, so there is no need - * to waste CMA pages for that kind; it also helps reduce fragmentations. + * Note that it bypass one-page size of allocations from the per-numa and + * global area as the addresses within one page are always contiguous, so + * there is no need to waste CMA pages for that kind; it also helps reduce + * fragmentations. */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { +#ifdef CONFIG_DMA_PERNUMA_CMA + int nid = dev_to_node(dev); +#endif + /* CMA can be used only in the context which permits sleeping */ if (!gfpflags_allow_blocking(gfp)) return NULL; if (dev->cma_area) return cma_alloc_aligned(dev->cma_area, size, gfp); - if (size <= PAGE_SIZE || !dma_contiguous_default_area) + if (size <= PAGE_SIZE) + return NULL; + +#ifdef CONFIG_DMA_PERNUMA_CMA + if (nid != NUMA_NO_NODE && !(gfp & (GFP_DMA | GFP_DMA32))) { + struct cma *cma = dma_contiguous_pernuma_area[nid]; + struct page *page; + + if (cma) { + page = cma_alloc_aligned(cma, size, gfp); + if (page) + return page; + } + } +#endif + if (!dma_contiguous_default_area) return NULL; + return cma_alloc_aligned(dma_contiguous_default_area, size, gfp); } @@ -261,9 +323,27 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) */ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { - if (!cma_release(dev_get_cma_area(dev), page, - PAGE_ALIGN(size) >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + /* if dev has its own cma, free page from there */ + if (dev->cma_area) { + if (cma_release(dev->cma_area, page, count)) + return; + } else { + /* + * otherwise, page is from either per-numa cma or default cma + */ +#ifdef CONFIG_DMA_PERNUMA_CMA + if (cma_release(dma_contiguous_pernuma_area[page_to_nid(page)], + page, count)) + return; +#endif + if (cma_release(dma_contiguous_default_area, page, count)) + return; + } + + /* not in any cma, free from buddy */ + __free_pages(page, get_order(size)); } /* diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 8e9f7b301c6d..4211800d9f3e 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1219,7 +1219,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_single; entry->pfn = page_to_pfn(page); - entry->offset = offset, + entry->offset = offset; entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1235,7 +1235,6 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, add_dma_entry(entry); } -EXPORT_SYMBOL(debug_dma_map_page); void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { @@ -1290,7 +1289,6 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, return; check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_page); void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int mapped_ents, int direction) @@ -1310,7 +1308,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, + entry->offset = s->offset; entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1328,7 +1326,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, add_dma_entry(entry); } } -EXPORT_SYMBOL(debug_dma_map_sg); static int get_nr_mapped_entries(struct device *dev, struct dma_debug_entry *ref) @@ -1380,7 +1377,6 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, check_unmap(&ref); } } -EXPORT_SYMBOL(debug_dma_unmap_sg); void debug_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t dma_addr, void *virt) @@ -1466,7 +1462,6 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, add_dma_entry(entry); } -EXPORT_SYMBOL(debug_dma_map_resource); void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) @@ -1484,7 +1479,6 @@ void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_resource); void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) @@ -1503,7 +1497,6 @@ void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, check_sync(dev, &ref, true); } -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); void debug_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -1523,7 +1516,6 @@ void debug_dma_sync_single_for_device(struct device *dev, check_sync(dev, &ref, false); } -EXPORT_SYMBOL(debug_dma_sync_single_for_device); void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1556,7 +1548,6 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, true); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1588,7 +1579,6 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, false); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); static int __init dma_debug_driver_setup(char *str) { diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index db6ef07aec3b..750659f7447c 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -13,6 +13,7 @@ #include <linux/pfn.h> #include <linux/vmalloc.h> #include <linux/set_memory.h> +#include <linux/slab.h> /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it @@ -25,7 +26,7 @@ static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { if (force_dma_unencrypted(dev)) - return __phys_to_dma(dev, phys); + return phys_to_dma_unencrypted(dev, phys); return phys_to_dma(dev, phys); } @@ -48,11 +49,6 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, { u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); - if (force_dma_unencrypted(dev)) - *phys_limit = __dma_to_phys(dev, dma_limit); - else - *phys_limit = dma_to_phys(dev, dma_limit); - /* * Optimistically try the zone that the physical address mask falls * into first. If that returns memory that isn't actually addressable @@ -61,6 +57,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding * zones. */ + *phys_limit = dma_to_phys(dev, dma_limit); if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) @@ -70,8 +67,12 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - return phys_to_dma_direct(dev, phys) + size - 1 <= - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); + dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); + + if (dma_addr == DMA_MAPPING_ERROR) + return false; + return dma_addr + size - 1 <= + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } /* @@ -108,7 +109,7 @@ static inline bool dma_should_free_from_pool(struct device *dev, } static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp, unsigned long attrs) + gfp_t gfp) { int node = dev_to_node(dev); struct page *page = NULL; @@ -116,11 +117,6 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, WARN_ON_ONCE(!PAGE_ALIGNED(size)); - if (attrs & DMA_ATTR_NO_WARN) - gfp |= __GFP_NOWARN; - - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); @@ -151,14 +147,21 @@ again: return page; } -void *dma_direct_alloc_pages(struct device *dev, size_t size, +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct page *page; void *ret; int err; + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + dma_alloc_need_uncached(dev, attrs)) + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); + size = PAGE_ALIGN(size); + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; if (dma_should_alloc_from_pool(dev, gfp, attrs)) { u64 phys_mask; @@ -172,7 +175,8 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, goto done; } - page = __dma_direct_alloc_pages(dev, size, gfp, attrs); + /* we always manually zero the memory once we are done */ + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); if (!page) return NULL; @@ -237,10 +241,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, goto out_encrypt_pages; } done: - if (force_dma_unencrypted(dev)) - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - else - *dma_handle = phys_to_dma(dev, page_to_phys(page)); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; out_encrypt_pages: @@ -256,11 +257,18 @@ out_free_pages: return NULL; } -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) +void dma_direct_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int page_order = get_order(size); + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + dma_alloc_need_uncached(dev, attrs)) { + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); + return; + } + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ if (dma_should_free_from_pool(dev, attrs) && dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) @@ -284,27 +292,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); } -void *dma_direct_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); -} - -void dma_direct_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); - else - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); -} - #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_device(struct device *dev, @@ -345,6 +332,9 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, sg->length); } if (!dev_is_dma_coherent(dev)) @@ -453,13 +443,13 @@ int dma_direct_supported(struct device *dev, u64 mask) return 1; /* - * This check needs to be against the actual bit mask value, so - * use __phys_to_dma() here so that the SME encryption mask isn't + * This check needs to be against the actual bit mask value, so use + * phys_to_dma_unencrypted() here so that the SME encryption mask isn't * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); - return mask >= __phys_to_dma(dev, min_mask); + return mask >= phys_to_dma_unencrypted(dev, min_mask); } size_t dma_direct_max_mapping_size(struct device *dev) @@ -476,3 +466,45 @@ bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) return !dev_is_dma_coherent(dev) || is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); } + +/** + * dma_direct_set_offset - Assign scalar offset for a single DMA range. + * @dev: device pointer; needed to "own" the alloced memory. + * @cpu_start: beginning of memory region covered by this offset. + * @dma_start: beginning of DMA/PCI region covered by this offset. + * @size: size of the region. + * + * This is for the simple case of a uniform offset which cannot + * be discovered by "dma-ranges". + * + * It returns -ENOMEM if out of memory, -EINVAL if a map + * already exists, 0 otherwise. + * + * Note: any call to this from a driver is a bug. The mapping needs + * to be described by the device tree or other firmware interfaces. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size) +{ + struct bus_dma_region *map; + u64 offset = (u64)cpu_start - (u64)dma_start; + + if (dev->dma_range_map) { + dev_err(dev, "attempt to add DMA range to existing map\n"); + return -EINVAL; + } + + if (!offset) + return 0; + + map = kcalloc(2, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map[0].cpu_start = cpu_start; + map[0].dma_start = dma_start; + map[0].offset = offset; + map[0].size = size; + dev->dma_range_map = map; + return 0; +} +EXPORT_SYMBOL_GPL(dma_direct_set_offset); diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index 05607642c888..6974b1bd7d0b 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -36,4 +36,3 @@ const struct dma_map_ops dma_dummy_ops = { .map_sg = dma_dummy_map_sg, .dma_supported = dma_dummy_supported, }; -EXPORT_SYMBOL(dma_dummy_ops); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 0d129421e75f..9a045e51df17 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -8,7 +8,7 @@ #include <linux/memblock.h> /* for max_pfn */ #include <linux/acpi.h> #include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> +#include <linux/dma-mapping.h> #include <linux/export.h> #include <linux/gfp.h> #include <linux/of_device.h> @@ -144,6 +144,10 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + if (dma_map_direct(dev, ops)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); else @@ -179,6 +183,10 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, int ents; BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return 0; + if (dma_map_direct(dev, ops)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); else @@ -213,6 +221,9 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, BUG_ON(!valid_dma_direction(dir)); + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + /* Don't allow RAM to be mapped */ if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) return DMA_MAPPING_ERROR; @@ -296,22 +307,6 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, EXPORT_SYMBOL(dma_sync_sg_for_device); /* - * Create scatter-list for the already allocated DMA buffer. - */ -int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - struct page *page = virt_to_page(cpu_addr); - int ret; - - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - return ret; -} - -/* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems * that the intention is to allow exporting memory allocated via the * coherent DMA APIs through the dma_buf API, which only accepts a @@ -358,35 +353,6 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) } #endif /* CONFIG_MMU */ -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_MMU - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off >= count || user_count > count - off) - return -ENXIO; - - return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, - user_count << PAGE_SHIFT, vma->vm_page_prot); -#else - return -ENXIO; -#endif /* CONFIG_MMU */ -} - /** * dma_can_mmap - check if a given device supports dma_mmap_* * @dev: device to check diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c new file mode 100644 index 000000000000..e443c69be429 --- /dev/null +++ b/kernel/dma/ops_helpers.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for DMA ops implementations. These generally rely on the fact that + * the allocated memory contains normal pages in the direct kernel mapping. + */ +#include <linux/dma-noncoherent.h> + +/* + * Create scatter-list for the already allocated DMA buffer. + */ +int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + struct page *page = virt_to_page(cpu_addr); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return ret; +} + +/* + * Create userspace mapping for the DMA-coherent memory. + */ +int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#ifdef CONFIG_MMU + unsigned long user_count = vma_pages(vma); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + int ret = -ENXIO; + + vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); + + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off >= count || user_count > count - off) + return -ENXIO; + + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, + user_count << PAGE_SHIFT, vma->vm_page_prot); +#else + return -ENXIO; +#endif /* CONFIG_MMU */ +} diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 1281c0f0442b..fe11643ff9cc 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -115,7 +115,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #endif /* * Memory in the atomic DMA pools must be unencrypted, the pools do not - * shrink so no re-encryption occurs in dma_direct_free_pages(). + * shrink so no re-encryption occurs in dma_direct_free(). */ ret = set_memory_decrypted((unsigned long)page_to_virt(page), 1 << order); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index c19379fabd20..4ea72d145cd2 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -670,13 +670,13 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, swiotlb_force); swiotlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), + phys_to_dma_unencrypted(dev, io_tlb_start), paddr, size, size, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ - dma_addr = __phys_to_dma(dev, swiotlb_addr); + dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); diff --git a/mm/Kconfig b/mm/Kconfig index 6c974888f86f..d75a0107f61f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -516,13 +516,14 @@ config CMA_DEBUGFS config CMA_AREAS int "Maximum count of the CMA areas" depends on CMA + default 19 if NUMA default 7 help CMA allows to create CMA areas for particular purpose, mainly, used as device private area. This parameter sets the maximum number of CMA area in the system. - If unsure, leave the default value "7". + If unsure, leave the default value "7" in UMA and "19" in NUMA. config MEM_SOFT_DIRTY bool "Track memory changes" @@ -4,8 +4,6 @@ #include <linux/debugfs.h> -#define CMA_MAX_NAME 64 - struct cma { unsigned long base_pfn; unsigned long count; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 67fc6383995b..1cc743d52565 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5708,12 +5708,12 @@ void __init hugetlb_cma_reserve(int order) reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; - char name[20]; + char name[CMA_MAX_NAME]; size = min(per_node, hugetlb_cma_size - reserved); size = round_up(size, PAGE_SIZE << order); - snprintf(name, 20, "hugetlb%d", nid); + snprintf(name, sizeof(name), "hugetlb%d", nid); res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, 0, false, name, &hugetlb_cma[nid], nid); |