summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/netdev_rx_queue.h5
-rw-r--r--include/net/netmem.h132
-rw-r--r--include/net/page_pool/helpers.h39
-rw-r--r--include/net/page_pool/types.h23
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/tcp.h3
6 files changed, 162 insertions, 42 deletions
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index aa1716fb0e53..ac34f5fb4f71 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -6,6 +6,7 @@
#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <net/xdp.h>
+#include <net/page_pool/types.h>
/* This structure contains an instance of an RX queue. */
struct netdev_rx_queue {
@@ -25,6 +26,7 @@ struct netdev_rx_queue {
* Readers and writers must hold RTNL
*/
struct napi_struct *napi;
+ struct pp_memory_provider_params mp_params;
} ____cacheline_aligned_in_smp;
/*
@@ -54,4 +56,7 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
return index;
}
#endif
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
+
#endif
diff --git a/include/net/netmem.h b/include/net/netmem.h
index 46cc9b89ac79..8a6e20be4b9d 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -8,6 +8,54 @@
#ifndef _NET_NETMEM_H
#define _NET_NETMEM_H
+#include <linux/mm.h>
+#include <net/net_debug.h>
+
+/* net_iov */
+
+DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
+/* We overload the LSB of the struct page pointer to indicate whether it's
+ * a page or net_iov.
+ */
+#define NET_IOV 0x01UL
+
+struct net_iov {
+ unsigned long __unused_padding;
+ unsigned long pp_magic;
+ struct page_pool *pp;
+ struct dmabuf_genpool_chunk_owner *owner;
+ unsigned long dma_addr;
+ atomic_long_t pp_ref_count;
+};
+
+/* These fields in struct page are used by the page_pool and net stack:
+ *
+ * struct {
+ * unsigned long pp_magic;
+ * struct page_pool *pp;
+ * unsigned long _pp_mapping_pad;
+ * unsigned long dma_addr;
+ * atomic_long_t pp_ref_count;
+ * };
+ *
+ * We mirror the page_pool fields here so the page_pool can access these fields
+ * without worrying whether the underlying fields belong to a page or net_iov.
+ *
+ * The non-net stack fields of struct page are private to the mm stack and must
+ * never be mirrored to net_iov.
+ */
+#define NET_IOV_ASSERT_OFFSET(pg, iov) \
+ static_assert(offsetof(struct page, pg) == \
+ offsetof(struct net_iov, iov))
+NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
+NET_IOV_ASSERT_OFFSET(pp, pp);
+NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
+NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
+#undef NET_IOV_ASSERT_OFFSET
+
+/* netmem */
+
/**
* typedef netmem_ref - a nonexistent type marking a reference to generic
* network memory.
@@ -19,20 +67,37 @@
*/
typedef unsigned long __bitwise netmem_ref;
+static inline bool netmem_is_net_iov(const netmem_ref netmem)
+{
+ return (__force unsigned long)netmem & NET_IOV;
+}
+
/* This conversion fails (returns NULL) if the netmem_ref is not struct page
* backed.
- *
- * Currently struct page is the only possible netmem, and this helper never
- * fails.
*/
static inline struct page *netmem_to_page(netmem_ref netmem)
{
+ if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
+ return NULL;
+
return (__force struct page *)netmem;
}
-/* Converting from page to netmem is always safe, because a page can always be
- * a netmem.
- */
+static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return (struct net_iov *)((__force unsigned long)netmem &
+ ~NET_IOV);
+
+ DEBUG_NET_WARN_ON_ONCE(true);
+ return NULL;
+}
+
+static inline netmem_ref net_iov_to_netmem(struct net_iov *niov)
+{
+ return (__force netmem_ref)((unsigned long)niov | NET_IOV);
+}
+
static inline netmem_ref page_to_netmem(struct page *page)
{
return (__force netmem_ref)page;
@@ -40,17 +105,70 @@ static inline netmem_ref page_to_netmem(struct page *page)
static inline int netmem_ref_count(netmem_ref netmem)
{
+ /* The non-pp refcount of net_iov is always 1. On net_iov, we only
+ * support pp refcounting which uses the pp_ref_count field.
+ */
+ if (netmem_is_net_iov(netmem))
+ return 1;
+
return page_ref_count(netmem_to_page(netmem));
}
-static inline unsigned long netmem_to_pfn(netmem_ref netmem)
+static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
{
+ if (netmem_is_net_iov(netmem))
+ return 0;
+
return page_to_pfn(netmem_to_page(netmem));
}
+static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
+{
+ return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
+}
+
+static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->pp;
+}
+
+static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
+{
+ return &__netmem_clear_lsb(netmem)->pp_ref_count;
+}
+
+static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
+{
+ /* NUMA node preference only makes sense if we're allocating
+ * system memory. Memory providers (which give us net_iovs)
+ * choose for us.
+ */
+ if (netmem_is_net_iov(netmem))
+ return true;
+
+ return page_to_nid(netmem_to_page(netmem)) == pref_nid;
+}
+
static inline netmem_ref netmem_compound_head(netmem_ref netmem)
{
+ /* niov are never compounded */
+ if (netmem_is_net_iov(netmem))
+ return netmem;
+
return page_to_netmem(compound_head(netmem_to_page(netmem)));
}
+static inline void *netmem_address(netmem_ref netmem)
+{
+ if (netmem_is_net_iov(netmem))
+ return NULL;
+
+ return page_address(netmem_to_page(netmem));
+}
+
+static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->dma_addr;
+}
+
#endif /* _NET_NETMEM_H */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 2b43a893c619..793e6fd78bc5 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -216,7 +216,7 @@ page_pool_get_dma_dir(const struct page_pool *pool)
static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
{
- atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr);
+ atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr);
}
/**
@@ -244,7 +244,7 @@ static inline void page_pool_fragment_page(struct page *page, long nr)
static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
{
- struct page *page = netmem_to_page(netmem);
+ atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem);
long ret;
/* If nr == pp_ref_count then we have cleared all remaining
@@ -261,19 +261,19 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
* initially, and only overwrite it when the page is partitioned into
* more than one piece.
*/
- if (atomic_long_read(&page->pp_ref_count) == nr) {
+ if (atomic_long_read(pp_ref_count) == nr) {
/* As we have ensured nr is always one for constant case using
* the BUILD_BUG_ON(), only need to handle the non-constant case
* here for pp_ref_count draining, which is a rare case.
*/
BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
if (!__builtin_constant_p(nr))
- atomic_long_set(&page->pp_ref_count, 1);
+ atomic_long_set(pp_ref_count, 1);
return 0;
}
- ret = atomic_long_sub_return(nr, &page->pp_ref_count);
+ ret = atomic_long_sub_return(nr, pp_ref_count);
WARN_ON(ret < 0);
/* We are the last user here too, reset pp_ref_count back to 1 to
@@ -282,7 +282,7 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
* page_pool_unref_page() currently.
*/
if (unlikely(!ret))
- atomic_long_set(&page->pp_ref_count, 1);
+ atomic_long_set(pp_ref_count, 1);
return ret;
}
@@ -401,9 +401,7 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
{
- struct page *page = netmem_to_page(netmem);
-
- dma_addr_t ret = page->dma_addr;
+ dma_addr_t ret = netmem_get_dma_addr(netmem);
if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
ret <<= PAGE_SHIFT;
@@ -423,24 +421,6 @@ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
}
-static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem,
- dma_addr_t addr)
-{
- struct page *page = netmem_to_page(netmem);
-
- if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
- page->dma_addr = addr >> PAGE_SHIFT;
-
- /* We assume page alignment to shave off bottom bits,
- * if this "compression" doesn't work we need to drop.
- */
- return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT;
- }
-
- page->dma_addr = addr;
- return false;
-}
-
/**
* page_pool_dma_sync_for_cpu - sync Rx page for CPU after it's written by HW
* @pool: &page_pool the @page belongs to
@@ -463,11 +443,6 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
page_pool_get_dma_dir(pool));
}
-static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
-{
- return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
-}
-
static inline bool page_pool_put(struct page_pool *pool)
{
return refcount_dec_and_test(&pool->user_cnt);
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 50569fed7868..c022c410abe3 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -20,8 +20,18 @@
* device driver responsibility
*/
#define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */
+
+/* Allow unreadable (net_iov backed) netmem in this page_pool. Drivers setting
+ * this must be able to support unreadable netmem, where netmem_address() would
+ * return NULL. This flag should not be set for header page_pools.
+ *
+ * If the driver sets PP_FLAG_ALLOW_UNREADABLE_NETMEM, it should also set
+ * page_pool_params.slow.queue_idx.
+ */
+#define PP_FLAG_ALLOW_UNREADABLE_NETMEM BIT(3)
+
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
- PP_FLAG_SYSTEM_POOL)
+ PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
/*
* Fast allocation side cache array/stack
@@ -57,7 +67,9 @@ struct pp_alloc_cache {
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
* @slow: params with slowpath access only (initialization and Netlink)
* @netdev: netdev this pool will serve (leave as NULL if none or multiple)
- * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL
+ * @queue_idx: queue idx this page_pool is being created for.
+ * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL,
+ * PP_FLAG_ALLOW_UNREADABLE_NETMEM.
*/
struct page_pool_params {
struct_group_tagged(page_pool_params_fast, fast,
@@ -72,6 +84,7 @@ struct page_pool_params {
);
struct_group_tagged(page_pool_params_slow, slow,
struct net_device *netdev;
+ unsigned int queue_idx;
unsigned int flags;
/* private: used by test code only */
void (*init_callback)(netmem_ref netmem, void *arg);
@@ -139,6 +152,10 @@ struct page_pool_stats {
*/
#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
+struct pp_memory_provider_params {
+ void *mp_priv;
+};
+
struct page_pool {
struct page_pool_params_fast p;
@@ -197,6 +214,8 @@ struct page_pool {
*/
struct ptr_ring ring;
+ void *mp_priv;
+
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
struct page_pool_recycle_stats __percpu *recycle_stats;
diff --git a/include/net/sock.h b/include/net/sock.h
index f51d61fab059..c58ca8dd561b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -337,6 +337,7 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
+ * @sk_user_frags: xarray of pages the user is holding a reference on.
*/
struct sock {
/*
@@ -542,6 +543,7 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
+ struct xarray sk_user_frags;
};
struct sock_bh_locked {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2aac11e7e1cc..f77f812bfbe7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1069,7 +1069,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
/* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */
return likely(tcp_skb_can_collapse_to(to) &&
mptcp_skb_can_collapse(to, from) &&
- skb_pure_zcopy_same(to, from));
+ skb_pure_zcopy_same(to, from) &&
+ skb_frags_readable(to) == skb_frags_readable(from));
}
static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to,