summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h38
-rw-r--r--include/net/rps.h28
-rw-r--r--net/core/dev.c73
-rw-r--r--net/core/dev.h23
-rw-r--r--net/core/net-procfs.c3
5 files changed, 95 insertions, 70 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 55c7cf9404a4..7d12b5a9380f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3204,6 +3204,7 @@ struct softnet_data {
struct softnet_data *rps_ipi_list;
#endif
+ unsigned int received_rps;
bool in_net_rx_action;
bool in_napi_threaded_poll;
@@ -3236,11 +3237,11 @@ struct softnet_data {
unsigned int cpu;
unsigned int input_queue_tail;
#endif
- unsigned int received_rps;
- unsigned int dropped;
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
+ atomic_t dropped ____cacheline_aligned_in_smp;
+
/* Another possibly contended cache line */
spinlock_t defer_lock ____cacheline_aligned_in_smp;
int defer_count;
@@ -3249,21 +3250,6 @@ struct softnet_data {
call_single_data_t defer_csd;
};
-static inline void input_queue_head_incr(struct softnet_data *sd)
-{
-#ifdef CONFIG_RPS
- sd->input_queue_head++;
-#endif
-}
-
-static inline void input_queue_tail_incr_save(struct softnet_data *sd,
- unsigned int *qtail)
-{
-#ifdef CONFIG_RPS
- *qtail = ++sd->input_queue_tail;
-#endif
-}
-
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
static inline int dev_recursion_level(void)
@@ -3271,24 +3257,6 @@ static inline int dev_recursion_level(void)
return this_cpu_read(softnet_data.xmit.recursion);
}
-#define XMIT_RECURSION_LIMIT 8
-static inline bool dev_xmit_recursion(void)
-{
- return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
- XMIT_RECURSION_LIMIT);
-}
-
-static inline void dev_xmit_recursion_inc(void)
-{
- __this_cpu_inc(softnet_data.xmit.recursion);
-}
-
-static inline void dev_xmit_recursion_dec(void)
-{
- __this_cpu_dec(softnet_data.xmit.recursion);
-}
-
-void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
void __netif_schedule(struct Qdisc *q);
void netif_schedule_queue(struct netdev_queue *txq);
diff --git a/include/net/rps.h b/include/net/rps.h
index 7660243e905b..a93401d23d66 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -122,4 +122,32 @@ static inline void sock_rps_record_flow(const struct sock *sk)
#endif
}
+static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ return ++sd->input_queue_tail;
+#else
+ return 0;
+#endif
+}
+
+static inline void rps_input_queue_tail_save(u32 *dest, u32 tail)
+{
+#ifdef CONFIG_RPS
+ WRITE_ONCE(*dest, tail);
+#endif
+}
+
+static inline void rps_input_queue_head_add(struct softnet_data *sd, int val)
+{
+#ifdef CONFIG_RPS
+ WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val);
+#endif
+}
+
+static inline void rps_input_queue_head_incr(struct softnet_data *sd)
+{
+ rps_input_queue_head_add(sd, 1);
+}
+
#endif /* _NET_RPS_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index c136e80dea61..818699dea9d7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4528,7 +4528,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
out:
#endif
rflow->last_qtail =
- per_cpu(softnet_data, next_cpu).input_queue_head;
+ READ_ONCE(per_cpu(softnet_data, next_cpu).input_queue_head);
}
rflow->cpu = next_cpu;
@@ -4610,8 +4610,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
*/
if (unlikely(tcpu != next_cpu) &&
(tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
- ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0)) {
+ ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) -
+ READ_ONCE(rflow->last_qtail))) >= 0)) {
tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
}
@@ -4665,8 +4665,8 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
rflow = &flow_table->flows[flow_id];
cpu = READ_ONCE(rflow->cpu);
if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
- ((int)(per_cpu(softnet_data, cpu).input_queue_head -
- rflow->last_qtail) <
+ ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
+ READ_ONCE(rflow->last_qtail)) <
(int)(10 * flow_table->mask)))
expire = false;
}
@@ -4800,37 +4800,45 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
struct softnet_data *sd;
unsigned long flags;
unsigned int qlen;
+ int max_backlog;
+ u32 tail;
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ reason = SKB_DROP_REASON_DEV_READY;
+ if (!netif_running(skb->dev))
+ goto bad_dev;
+
+ reason = SKB_DROP_REASON_CPU_BACKLOG;
sd = &per_cpu(softnet_data, cpu);
+ qlen = skb_queue_len_lockless(&sd->input_pkt_queue);
+ max_backlog = READ_ONCE(net_hotdata.max_backlog);
+ if (unlikely(qlen > max_backlog))
+ goto cpu_backlog_drop;
backlog_lock_irq_save(sd, &flags);
- if (!netif_running(skb->dev))
- goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= READ_ONCE(net_hotdata.max_backlog) &&
- !skb_flow_limit(skb, qlen)) {
- if (qlen) {
-enqueue:
- __skb_queue_tail(&sd->input_pkt_queue, skb);
- input_queue_tail_incr_save(sd, qtail);
- backlog_unlock_irq_restore(sd, &flags);
- return NET_RX_SUCCESS;
+ if (qlen <= max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (!qlen) {
+ /* Schedule NAPI for backlog device. We can use
+ * non atomic operation as we own the queue lock.
+ */
+ if (!__test_and_set_bit(NAPI_STATE_SCHED,
+ &sd->backlog.state))
+ napi_schedule_rps(sd);
}
+ __skb_queue_tail(&sd->input_pkt_queue, skb);
+ tail = rps_input_queue_tail_incr(sd);
+ backlog_unlock_irq_restore(sd, &flags);
- /* Schedule NAPI for backlog device
- * We can use non atomic operation since we own the queue lock
- */
- if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
- napi_schedule_rps(sd);
- goto enqueue;
+ /* save the tail outside of the critical section */
+ rps_input_queue_tail_save(qtail, tail);
+ return NET_RX_SUCCESS;
}
- reason = SKB_DROP_REASON_CPU_BACKLOG;
-drop:
- sd->dropped++;
backlog_unlock_irq_restore(sd, &flags);
+cpu_backlog_drop:
+ atomic_inc(&sd->dropped);
+bad_dev:
dev_core_stats_rx_dropped_inc(skb->dev);
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
@@ -5900,7 +5908,7 @@ static void flush_backlog(struct work_struct *work)
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
dev_kfree_skb_irq(skb);
- input_queue_head_incr(sd);
+ rps_input_queue_head_incr(sd);
}
}
backlog_unlock_irq_enable(sd);
@@ -5909,7 +5917,7 @@ static void flush_backlog(struct work_struct *work)
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
kfree_skb(skb);
- input_queue_head_incr(sd);
+ rps_input_queue_head_incr(sd);
}
}
local_bh_enable();
@@ -6037,9 +6045,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
rcu_read_lock();
__netif_receive_skb(skb);
rcu_read_unlock();
- input_queue_head_incr(sd);
- if (++work >= quota)
+ if (++work >= quota) {
+ rps_input_queue_head_add(sd, work);
return work;
+ }
}
@@ -6062,6 +6071,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
backlog_unlock_irq_enable(sd);
}
+ if (work)
+ rps_input_queue_head_add(sd, work);
return work;
}
@@ -11451,11 +11462,11 @@ static int dev_cpu_dead(unsigned int oldcpu)
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
netif_rx(skb);
- input_queue_head_incr(oldsd);
+ rps_input_queue_head_incr(oldsd);
}
while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
- input_queue_head_incr(oldsd);
+ rps_input_queue_head_incr(oldsd);
}
return 0;
diff --git a/net/core/dev.h b/net/core/dev.h
index 2bcaf8eee50c..8572d2c8dc4a 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -4,11 +4,9 @@
#include <linux/types.h>
#include <linux/rwsem.h>
+#include <linux/netdevice.h>
struct net;
-struct net_device;
-struct netdev_bpf;
-struct netdev_phys_item_id;
struct netlink_ext_ack;
struct cpumask;
@@ -150,4 +148,23 @@ static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
struct napi_struct *napi_by_id(unsigned int napi_id);
+void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
+
+#define XMIT_RECURSION_LIMIT 8
+static inline bool dev_xmit_recursion(void)
+{
+ return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
+ XMIT_RECURSION_LIMIT);
+}
+
+static inline void dev_xmit_recursion_inc(void)
+{
+ __this_cpu_inc(softnet_data.xmit.recursion);
+}
+
+static inline void dev_xmit_recursion_dec(void)
+{
+ __this_cpu_dec(softnet_data.xmit.recursion);
+}
+
#endif
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index a97eceb84e61..fa6d3969734a 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -144,7 +144,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x\n",
- sd->processed, sd->dropped, sd->time_squeeze, 0,
+ sd->processed, atomic_read(&sd->dropped),
+ sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */
sd->received_rps, flow_limit_count,