summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c3
-rw-r--r--net/core/dev.c104
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/devlink.c825
-rw-r--r--net/core/filter.c108
-rw-r--r--net/core/flow_dissector.c18
-rw-r--r--net/core/gen_estimator.c52
-rw-r--r--net/core/gen_stats.c186
-rw-r--r--net/core/neighbour.c204
-rw-r--r--net/core/net-procfs.c24
-rw-r--r--net/core/net-sysfs.c61
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/of_net.c170
-rw-r--r--net/core/page_pool.c10
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/core/selftests.c8
-rw-r--r--net/core/skbuff.c99
-rw-r--r--net/core/skmsg.c57
-rw-r--r--net/core/sock.c104
-rw-r--r--net/core/sock_destructor.h12
-rw-r--r--net/core/sock_map.c6
-rw-r--r--net/core/stream.c5
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/xdp.c2
25 files changed, 1284 insertions, 798 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 35ced6201814..4268846f2f47 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_FAILOVER) += failover.o
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
+obj-$(CONFIG_OF) += of_net.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 15ab9ffb27fe..ee290776c661 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -646,7 +646,8 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
skb->truesize += truesize;
if (sk && sk->sk_type == SOCK_STREAM) {
sk_wmem_queued_add(sk, truesize);
- sk_mem_charge(sk, truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ee9fecd3aff..15ac064b5562 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -140,7 +140,7 @@
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
-#include <linux/netfilter_ingress.h>
+#include <linux/netfilter_netdev.h>
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
@@ -303,6 +303,12 @@ static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
return NULL;
}
+bool netdev_name_in_use(struct net *net, const char *name)
+{
+ return netdev_name_node_lookup(net, name);
+}
+EXPORT_SYMBOL(netdev_name_in_use);
+
int netdev_name_node_alt_create(struct net_device *dev, const char *name)
{
struct netdev_name_node *name_node;
@@ -1133,7 +1139,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
}
snprintf(buf, IFNAMSIZ, name, i);
- if (!__dev_get_by_name(net, buf))
+ if (!netdev_name_in_use(net, buf))
return i;
/* It is possible to run out of possible slots
@@ -1187,7 +1193,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev,
if (strchr(name, '%'))
return dev_alloc_name_ns(net, dev, name);
- else if (__dev_get_by_name(net, name))
+ else if (netdev_name_in_use(net, name))
return -EEXIST;
else if (dev->name != name)
strlcpy(dev->name, name, IFNAMSIZ);
@@ -1290,8 +1296,8 @@ rollback:
old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
- pr_err("%s: name change rollback failed: %d\n",
- dev->name, ret);
+ netdev_err(dev, "name change rollback failed: %d\n",
+ ret);
}
}
@@ -2345,7 +2351,7 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
/* If TC0 is invalidated disable TC mapping */
if (tc->offset + tc->count > txq) {
- pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
+ netdev_warn(dev, "Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
dev->num_tc = 0;
return;
}
@@ -2356,8 +2362,8 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
tc = &dev->tc_to_txq[q];
if (tc->offset + tc->count > txq) {
- pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
- i, q);
+ netdev_warn(dev, "Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
+ i, q);
netdev_set_prio_tc_map(dev, i, 0);
}
}
@@ -2921,6 +2927,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->num_tc)
netif_setup_tc(dev, txq);
+ dev_qdisc_change_real_num_tx(dev, txq);
+
dev->real_num_tx_queues = txq;
if (disabling) {
@@ -3163,6 +3171,12 @@ static u16 skb_tx_hash(const struct net_device *dev,
qoffset = sb_dev->tc_to_txq[tc].offset;
qcount = sb_dev->tc_to_txq[tc].count;
+ if (unlikely(!qcount)) {
+ net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
+ sb_dev->name, qoffset, tc);
+ qoffset = 0;
+ qcount = dev->real_num_tx_queues;
+ }
}
if (skb_rx_queue_recorded(skb)) {
@@ -3408,7 +3422,7 @@ EXPORT_SYMBOL(__skb_gso_segment);
#ifdef CONFIG_BUG
static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
- pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+ netdev_err(dev, "hw csum failure\n");
skb_dump(KERN_ERR, skb, true);
dump_stack();
}
@@ -3906,7 +3920,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
skb->pkt_type = PACKET_LOOPBACK;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (skb->ip_summed == CHECKSUM_NONE)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
WARN_ON(!skb_dst(skb));
skb_dst_force(skb);
netif_rx_ni(skb);
@@ -3918,6 +3933,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
+#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
@@ -3953,6 +3969,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
default:
break;
}
+#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
@@ -4146,13 +4163,20 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_at_ingress = 0;
-# ifdef CONFIG_NET_EGRESS
+#endif
+#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
+ if (nf_hook_egress_active()) {
+ skb = nf_hook_egress(skb, &rc, dev);
+ if (!skb)
+ goto out;
+ }
+ nf_skip_egress(skb, true);
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
}
-# endif
#endif
/* If device/qdisc don't need skb->dst, release it right now while
* its hot in this cpu cache.
@@ -5294,6 +5318,7 @@ skip_taps:
if (static_branch_unlikely(&ingress_needed_key)) {
bool another = false;
+ nf_skip_egress(skb, true);
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
&another);
if (another)
@@ -5301,6 +5326,7 @@ skip_taps:
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
goto out;
}
@@ -5837,7 +5863,7 @@ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int se
gro_normal_list(napi);
}
-static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
__be16 type = skb->protocol;
@@ -5866,12 +5892,11 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
if (err) {
WARN_ON(&ptype->list == head);
kfree_skb(skb);
- return NET_RX_SUCCESS;
+ return;
}
out:
gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
- return NET_RX_SUCCESS;
}
static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
@@ -6898,19 +6923,28 @@ EXPORT_SYMBOL(netif_napi_add);
void napi_disable(struct napi_struct *n)
{
+ unsigned long val, new;
+
might_sleep();
set_bit(NAPI_STATE_DISABLE, &n->state);
- while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
- msleep(1);
- while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
- msleep(1);
+ for ( ; ; ) {
+ val = READ_ONCE(n->state);
+ if (val & (NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC)) {
+ usleep_range(20, 200);
+ continue;
+ }
+
+ new = val | NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC;
+ new &= ~(NAPIF_STATE_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
+
+ if (cmpxchg(&n->state, val, new) == val)
+ break;
+ }
hrtimer_cancel(&n->timer);
- clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
- clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);
@@ -6988,8 +7022,8 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
}
if (unlikely(work > weight))
- pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
- n->poll, work, weight);
+ netdev_err_once(n->dev, "NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
+ n->poll, work, weight);
if (likely(work < weight))
return work;
@@ -8543,8 +8577,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
dev->flags &= ~IFF_PROMISC;
else {
dev->promiscuity -= inc;
- pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
- dev->name);
+ netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
return -EOVERFLOW;
}
}
@@ -8614,8 +8647,7 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
dev->flags &= ~IFF_ALLMULTI;
else {
dev->allmulti -= inc;
- pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
- dev->name);
+ netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
return -EOVERFLOW;
}
}
@@ -9152,14 +9184,11 @@ int dev_get_port_parent_id(struct net_device *dev,
}
err = devlink_compat_switch_id_get(dev, ppid);
- if (!err || err != -EOPNOTSUPP)
+ if (!recurse || err != -EOPNOTSUPP)
return err;
- if (!recurse)
- return -EOPNOTSUPP;
-
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = dev_get_port_parent_id(lower_dev, ppid, recurse);
+ err = dev_get_port_parent_id(lower_dev, ppid, true);
if (err)
break;
if (!first.id_len)
@@ -9903,6 +9932,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ if ((features & NETIF_F_GRO_HW) && (features & NETIF_F_LRO)) {
+ netdev_dbg(dev, "Dropping LRO feature since HW-GRO is requested.\n");
+ features &= ~NETIF_F_LRO;
+ }
+
if (features & NETIF_F_HW_TLS_TX) {
bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
@@ -10860,7 +10894,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
- nf_hook_ingress_init(dev);
+ nf_hook_netdev_init(dev);
return dev;
@@ -11146,7 +11180,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
* we can use it in the destination network namespace.
*/
err = -EEXIST;
- if (__dev_get_by_name(net, dev->name)) {
+ if (netdev_name_in_use(net, dev->name)) {
/* We get here if we can't use the current device name */
if (!pat)
goto out;
@@ -11499,7 +11533,7 @@ static void __net_exit default_device_exit(struct net *net)
/* Push remaining network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
- if (__dev_get_by_name(&init_net, fb_name))
+ if (netdev_name_in_use(&init_net, fb_name))
snprintf(fb_name, IFNAMSIZ, "dev%%d");
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 0e87237fd871..cbab5fec64b1 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -518,9 +518,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
case SIOCETHTOOL:
dev_load(net, ifr->ifr_name);
- rtnl_lock();
ret = dev_ethtool(net, ifr, data);
- rtnl_unlock();
if (colon)
*colon = ':';
return ret;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a856ae401ea5..5ba4f9434acd 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -30,6 +30,63 @@
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
+struct devlink_dev_stats {
+ u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+};
+
+struct devlink {
+ u32 index;
+ struct list_head port_list;
+ struct list_head rate_list;
+ struct list_head sb_list;
+ struct list_head dpipe_table_list;
+ struct list_head resource_list;
+ struct list_head param_list;
+ struct list_head region_list;
+ struct list_head reporter_list;
+ struct mutex reporters_lock; /* protects reporter_list */
+ struct devlink_dpipe_headers *dpipe_headers;
+ struct list_head trap_list;
+ struct list_head trap_group_list;
+ struct list_head trap_policer_list;
+ const struct devlink_ops *ops;
+ u64 features;
+ struct xarray snapshot_ids;
+ struct devlink_dev_stats stats;
+ struct device *dev;
+ possible_net_t _net;
+ /* Serializes access to devlink instance specific objects such as
+ * port, sb, dpipe, resource, params, region, traps and more.
+ */
+ struct mutex lock;
+ u8 reload_failed:1;
+ refcount_t refcount;
+ struct completion comp;
+ char priv[] __aligned(NETDEV_ALIGN);
+};
+
+void *devlink_priv(struct devlink *devlink)
+{
+ return &devlink->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_priv);
+
+struct devlink *priv_to_devlink(void *priv)
+{
+ return container_of(priv, struct devlink, priv);
+}
+EXPORT_SYMBOL_GPL(priv_to_devlink);
+
+struct device *devlink_to_dev(const struct devlink *devlink)
+{
+ return devlink->dev;
+}
+EXPORT_SYMBOL_GPL(devlink_to_dev);
+
static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
{
.name = "destination mac",
@@ -45,7 +102,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
.global = true,
};
-EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ethernet);
static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
{
@@ -62,7 +119,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
.global = true,
};
-EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv4);
static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
{
@@ -79,7 +136,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
.global = true,
};
-EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
+EXPORT_SYMBOL_GPL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
@@ -95,6 +152,22 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
#define DEVLINK_REGISTERED XA_MARK_1
+/* devlink instances are open to the access from the user space after
+ * devlink_register() call. Such logical barrier allows us to have certain
+ * expectations related to locking.
+ *
+ * Before *_register() - we are in initialization stage and no parallel
+ * access possible to the devlink instance. All drivers perform that phase
+ * by implicitly holding device_lock.
+ *
+ * After *_register() - users and driver can access devlink instance at
+ * the same time.
+ */
+#define ASSERT_DEVLINK_REGISTERED(d) \
+ WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
+ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+
/* devlink_mutex
*
* An overall lock guarding every operation coming from userspace.
@@ -109,15 +182,17 @@ struct net *devlink_net(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_net);
-static void devlink_put(struct devlink *devlink)
+void devlink_put(struct devlink *devlink)
{
if (refcount_dec_and_test(&devlink->refcount))
complete(&devlink->comp);
}
-static bool __must_check devlink_try_get(struct devlink *devlink)
+struct devlink *__must_check devlink_try_get(struct devlink *devlink)
{
- return refcount_inc_not_zero(&devlink->refcount);
+ if (refcount_inc_not_zero(&devlink->refcount))
+ return devlink;
+ return NULL;
}
static struct devlink *devlink_get_from_attrs(struct net *net,
@@ -742,6 +817,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
int err;
WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -1040,11 +1116,15 @@ nla_put_failure:
static void devlink_port_notify(struct devlink_port *devlink_port,
enum devlink_command cmd)
{
+ struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -1055,19 +1135,22 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(devlink_port->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static void devlink_rate_notify(struct devlink_rate *devlink_rate,
enum devlink_command cmd)
{
+ struct devlink *devlink = devlink_rate->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -1078,9 +1161,8 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(devlink_rate->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
@@ -3285,7 +3367,7 @@ void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
kfree(value[value_index].mask);
}
}
-EXPORT_SYMBOL(devlink_dpipe_entry_clear);
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_clear);
static int devlink_dpipe_entries_fill(struct genl_info *info,
enum devlink_command cmd, int flags,
@@ -3952,9 +4034,6 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;
- if (!devlink->reload_enabled)
- return -EOPNOTSUPP;
-
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
@@ -4022,7 +4101,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
u32 actions_performed;
int err;
- if (!devlink_reload_supported(devlink->ops))
+ if (!(devlink->features & DEVLINK_F_RELOAD))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -4150,6 +4229,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -4522,8 +4602,6 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
return -EOPNOTSUPP;
param_value[i] = param_item->driverinit_value;
} else {
- if (!param_item->published)
- continue;
ctx.cmode = i;
err = devlink_param_get(devlink, param, &ctx);
if (err)
@@ -4599,6 +4677,7 @@ static void devlink_param_notify(struct devlink *devlink,
WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
cmd != DEVLINK_CMD_PORT_PARAM_DEL);
+ ASSERT_DEVLINK_REGISTERED(devlink);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -4848,47 +4927,6 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
info, DEVLINK_CMD_PARAM_NEW);
}
-static int devlink_param_register_one(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *param,
- enum devlink_command cmd)
-{
- struct devlink_param_item *param_item;
-
- if (devlink_param_find_by_name(param_list, param->name))
- return -EEXIST;
-
- if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
- WARN_ON(param->get || param->set);
- else
- WARN_ON(!param->get || !param->set);
-
- param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
- if (!param_item)
- return -ENOMEM;
- param_item->param = param;
-
- list_add_tail(&param_item->list, param_list);
- devlink_param_notify(devlink, port_index, param_item, cmd);
- return 0;
-}
-
-static void devlink_param_unregister_one(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *param,
- enum devlink_command cmd)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_name(param_list, param->name);
- WARN_ON(!param_item);
- devlink_param_notify(devlink, port_index, param_item, cmd);
- list_del(&param_item->list);
- kfree(param_item);
-}
-
static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
@@ -5070,6 +5108,11 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
+ err = nla_put_u32(msg, DEVLINK_ATTR_REGION_MAX_SNAPSHOTS,
+ region->max_snapshots);
+ if (err)
+ goto nla_put_failure;
+
err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
if (err)
goto nla_put_failure;
@@ -5145,17 +5188,19 @@ static void devlink_nl_region_notify(struct devlink_region *region,
struct devlink_snapshot *snapshot,
enum devlink_command cmd)
{
+ struct devlink *devlink = region->devlink;
struct sk_buff *msg;
WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
if (IS_ERR(msg))
return;
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(region->devlink), msg, 0,
- DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
/**
@@ -6269,23 +6314,21 @@ static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
return 0;
}
-int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put);
-int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put);
int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
{
@@ -6296,14 +6339,13 @@ int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
}
EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
-int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
{
if (fmsg->putting_binary)
return -EINVAL;
return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put);
int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
{
@@ -6923,10 +6965,12 @@ genlmsg_cancel:
static void devlink_recover_notify(struct devlink_health_reporter *reporter,
enum devlink_command cmd)
{
+ struct devlink *devlink = reporter->devlink;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -6938,9 +6982,8 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family,
- devlink_net(reporter->devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
void
@@ -8900,6 +8943,25 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
}
/**
+ * devlink_set_features - Set devlink supported features
+ *
+ * @devlink: devlink
+ * @features: devlink support features
+ *
+ * This interface allows us to set reload ops separatelly from
+ * the devlink_alloc.
+ */
+void devlink_set_features(struct devlink *devlink, u64 features)
+{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ WARN_ON(features & DEVLINK_F_RELOAD &&
+ !devlink_reload_supported(devlink->ops));
+ devlink->features = features;
+}
+EXPORT_SYMBOL_GPL(devlink_set_features);
+
+/**
* devlink_alloc_ns - Allocate new devlink instance resources
* in specific namespace
*
@@ -8958,18 +9020,104 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
}
EXPORT_SYMBOL_GPL(devlink_alloc_ns);
+static void
+devlink_trap_policer_notify(struct devlink *devlink,
+ const struct devlink_trap_policer_item *policer_item,
+ enum devlink_command cmd);
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+ const struct devlink_trap_group_item *group_item,
+ enum devlink_command cmd);
+static void devlink_trap_notify(struct devlink *devlink,
+ const struct devlink_trap_item *trap_item,
+ enum devlink_command cmd);
+
+static void devlink_notify_register(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct devlink_trap_group_item *group_item;
+ struct devlink_param_item *param_item;
+ struct devlink_trap_item *trap_item;
+ struct devlink_port *devlink_port;
+ struct devlink_rate *rate_node;
+ struct devlink_region *region;
+
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(devlink_port, &devlink->port_list, list)
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW);
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW);
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+ list_for_each_entry(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+
+ list_for_each_entry(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ list_for_each_entry(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+}
+
+static void devlink_notify_unregister(struct devlink *devlink)
+{
+ struct devlink_trap_policer_item *policer_item;
+ struct devlink_trap_group_item *group_item;
+ struct devlink_param_item *param_item;
+ struct devlink_trap_item *trap_item;
+ struct devlink_port *devlink_port;
+ struct devlink_rate *rate_node;
+ struct devlink_region *region;
+
+ list_for_each_entry_reverse(param_item, &devlink->param_list, list)
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+
+ list_for_each_entry_reverse(region, &devlink->region_list, list)
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+
+ list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+
+ list_for_each_entry_reverse(trap_item, &devlink->trap_list, list)
+ devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+
+ list_for_each_entry_reverse(group_item, &devlink->trap_group_list, list)
+ devlink_trap_group_notify(devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_DEL);
+ list_for_each_entry_reverse(policer_item, &devlink->trap_policer_list,
+ list)
+ devlink_trap_policer_notify(devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_DEL);
+
+ list_for_each_entry_reverse(devlink_port, &devlink->port_list, list)
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
+}
+
/**
* devlink_register - Register devlink instance
*
* @devlink: devlink
*/
-int devlink_register(struct devlink *devlink)
+void devlink_register(struct devlink *devlink)
{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ /* Make sure that we are in .probe() routine */
+
mutex_lock(&devlink_mutex);
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- devlink_notify(devlink, DEVLINK_CMD_NEW);
+ devlink_notify_register(devlink);
mutex_unlock(&devlink_mutex);
- return 0;
}
EXPORT_SYMBOL_GPL(devlink_register);
@@ -8980,60 +9128,28 @@ EXPORT_SYMBOL_GPL(devlink_register);
*/
void devlink_unregister(struct devlink *devlink)
{
+ ASSERT_DEVLINK_REGISTERED(devlink);
+ /* Make sure that we are in .remove() routine */
+
devlink_put(devlink);
wait_for_completion(&devlink->comp);
mutex_lock(&devlink_mutex);
- WARN_ON(devlink_reload_supported(devlink->ops) &&
- devlink->reload_enabled);
- devlink_notify(devlink, DEVLINK_CMD_DEL);
+ devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
mutex_unlock(&devlink_mutex);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
/**
- * devlink_reload_enable - Enable reload of devlink instance
- *
- * @devlink: devlink
- *
- * Should be called at end of device initialization
- * process when reload operation is supported.
- */
-void devlink_reload_enable(struct devlink *devlink)
-{
- mutex_lock(&devlink_mutex);
- devlink->reload_enabled = true;
- mutex_unlock(&devlink_mutex);
-}
-EXPORT_SYMBOL_GPL(devlink_reload_enable);
-
-/**
- * devlink_reload_disable - Disable reload of devlink instance
- *
- * @devlink: devlink
- *
- * Should be called at the beginning of device cleanup
- * process when reload operation is supported.
- */
-void devlink_reload_disable(struct devlink *devlink)
-{
- mutex_lock(&devlink_mutex);
- /* Mutex is taken which ensures that no reload operation is in
- * progress while setting up forbidded flag.
- */
- devlink->reload_enabled = false;
- mutex_unlock(&devlink_mutex);
-}
-EXPORT_SYMBOL_GPL(devlink_reload_disable);
-
-/**
* devlink_free - Free devlink instance resources
*
* @devlink: devlink
*/
void devlink_free(struct devlink *devlink)
{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
WARN_ON(!list_empty(&devlink->trap_policer_list));
@@ -9939,73 +10055,6 @@ static int devlink_param_verify(const struct devlink_param *param)
return devlink_param_driver_verify(param);
}
-static int __devlink_param_register_one(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *param,
- enum devlink_command reg_cmd)
-{
- int err;
-
- err = devlink_param_verify(param);
- if (err)
- return err;
-
- return devlink_param_register_one(devlink, port_index,
- param_list, param, reg_cmd);
-}
-
-static int __devlink_params_register(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *params,
- size_t params_count,
- enum devlink_command reg_cmd,
- enum devlink_command unreg_cmd)
-{
- const struct devlink_param *param = params;
- int i;
- int err;
-
- mutex_lock(&devlink->lock);
- for (i = 0; i < params_count; i++, param++) {
- err = __devlink_param_register_one(devlink, port_index,
- param_list, param, reg_cmd);
- if (err)
- goto rollback;
- }
-
- mutex_unlock(&devlink->lock);
- return 0;
-
-rollback:
- if (!i)
- goto unlock;
- for (param--; i > 0; i--, param--)
- devlink_param_unregister_one(devlink, port_index, param_list,
- param, unreg_cmd);
-unlock:
- mutex_unlock(&devlink->lock);
- return err;
-}
-
-static void __devlink_params_unregister(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list,
- const struct devlink_param *params,
- size_t params_count,
- enum devlink_command cmd)
-{
- const struct devlink_param *param = params;
- int i;
-
- mutex_lock(&devlink->lock);
- for (i = 0; i < params_count; i++, param++)
- devlink_param_unregister_one(devlink, 0, param_list, param,
- cmd);
- mutex_unlock(&devlink->lock);
-}
-
/**
* devlink_params_register - register configuration parameters
*
@@ -10019,10 +10068,25 @@ int devlink_params_register(struct devlink *devlink,
const struct devlink_param *params,
size_t params_count)
{
- return __devlink_params_register(devlink, 0, &devlink->param_list,
- params, params_count,
- DEVLINK_CMD_PARAM_NEW,
- DEVLINK_CMD_PARAM_DEL);
+ const struct devlink_param *param = params;
+ int i, err;
+
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ for (i = 0; i < params_count; i++, param++) {
+ err = devlink_param_register(devlink, param);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ if (!i)
+ return err;
+
+ for (param--; i > 0; i--, param--)
+ devlink_param_unregister(devlink, param);
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_params_register);
@@ -10036,9 +10100,13 @@ void devlink_params_unregister(struct devlink *devlink,
const struct devlink_param *params,
size_t params_count)
{
- return __devlink_params_unregister(devlink, 0, &devlink->param_list,
- params, params_count,
- DEVLINK_CMD_PARAM_DEL);
+ const struct devlink_param *param = params;
+ int i;
+
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ for (i = 0; i < params_count; i++, param++)
+ devlink_param_unregister(devlink, param);
}
EXPORT_SYMBOL_GPL(devlink_params_unregister);
@@ -10054,13 +10122,26 @@ EXPORT_SYMBOL_GPL(devlink_params_unregister);
int devlink_param_register(struct devlink *devlink,
const struct devlink_param *param)
{
- int err;
+ struct devlink_param_item *param_item;
- mutex_lock(&devlink->lock);
- err = __devlink_param_register_one(devlink, 0, &devlink->param_list,
- param, DEVLINK_CMD_PARAM_NEW);
- mutex_unlock(&devlink->lock);
- return err;
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ WARN_ON(devlink_param_verify(param));
+ WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name));
+
+ if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+ WARN_ON(param->get || param->set);
+ else
+ WARN_ON(!param->get || !param->set);
+
+ param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+ if (!param_item)
+ return -ENOMEM;
+
+ param_item->param = param;
+
+ list_add_tail(&param_item->list, &devlink->param_list);
+ return 0;
}
EXPORT_SYMBOL_GPL(devlink_param_register);
@@ -10072,152 +10153,38 @@ EXPORT_SYMBOL_GPL(devlink_param_register);
void devlink_param_unregister(struct devlink *devlink,
const struct devlink_param *param)
{
- mutex_lock(&devlink->lock);
- devlink_param_unregister_one(devlink, 0, &devlink->param_list, param,
- DEVLINK_CMD_PARAM_DEL);
- mutex_unlock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devlink_param_unregister);
-
-/**
- * devlink_params_publish - publish configuration parameters
- *
- * @devlink: devlink
- *
- * Publish previously registered configuration parameters.
- */
-void devlink_params_publish(struct devlink *devlink)
-{
- struct devlink_param_item *param_item;
-
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->published)
- continue;
- param_item->published = true;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_params_publish);
-
-/**
- * devlink_params_unpublish - unpublish configuration parameters
- *
- * @devlink: devlink
- *
- * Unpublish previously registered configuration parameters.
- */
-void devlink_params_unpublish(struct devlink *devlink)
-{
struct devlink_param_item *param_item;
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (!param_item->published)
- continue;
- param_item->published = false;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
- }
-}
-EXPORT_SYMBOL_GPL(devlink_params_unpublish);
-
-/**
- * devlink_param_publish - publish one configuration parameter
- *
- * @devlink: devlink
- * @param: one configuration parameter
- *
- * Publish previously registered configuration parameter.
- */
-void devlink_param_publish(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->param != param || param_item->published)
- continue;
- param_item->published = true;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
- break;
- }
+ param_item =
+ devlink_param_find_by_name(&devlink->param_list, param->name);
+ WARN_ON(!param_item);
+ list_del(&param_item->list);
+ kfree(param_item);
}
-EXPORT_SYMBOL_GPL(devlink_param_publish);
+EXPORT_SYMBOL_GPL(devlink_param_unregister);
/**
- * devlink_param_unpublish - unpublish one configuration parameter
+ * devlink_param_driverinit_value_get - get configuration parameter
+ * value for driver initializing
*
- * @devlink: devlink
- * @param: one configuration parameter
+ * @devlink: devlink
+ * @param_id: parameter ID
+ * @init_val: value of parameter in driverinit configuration mode
*
- * Unpublish previously registered configuration parameter.
+ * This function should be used by the driver to get driverinit
+ * configuration for initialization after reload command.
*/
-void devlink_param_unpublish(struct devlink *devlink,
- const struct devlink_param *param)
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+ union devlink_param_value *init_val)
{
struct devlink_param_item *param_item;
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (param_item->param != param || !param_item->published)
- continue;
- param_item->published = false;
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
- break;
- }
-}
-EXPORT_SYMBOL_GPL(devlink_param_unpublish);
-
-/**
- * devlink_port_params_register - register port configuration parameters
- *
- * @devlink_port: devlink port
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- *
- * Register the configuration parameters supported by the port.
- */
-int devlink_port_params_register(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
- return __devlink_params_register(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list, params,
- params_count,
- DEVLINK_CMD_PORT_PARAM_NEW,
- DEVLINK_CMD_PORT_PARAM_DEL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_params_register);
-
-/**
- * devlink_port_params_unregister - unregister port configuration
- * parameters
- *
- * @devlink_port: devlink port
- * @params: configuration parameters array
- * @params_count: number of parameters provided
- */
-void devlink_port_params_unregister(struct devlink_port *devlink_port,
- const struct devlink_param *params,
- size_t params_count)
-{
- return __devlink_params_unregister(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list,
- params, params_count,
- DEVLINK_CMD_PORT_PARAM_DEL);
-}
-EXPORT_SYMBOL_GPL(devlink_port_params_unregister);
-
-static int
-__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
- union devlink_param_value *init_val)
-{
- struct devlink_param_item *param_item;
+ if (!devlink_reload_supported(devlink->ops))
+ return -EOPNOTSUPP;
- param_item = devlink_param_find_by_id(param_list, param_id);
+ param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
if (!param_item)
return -EINVAL;
@@ -10233,54 +10200,6 @@ __devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
return 0;
}
-
-static int
-__devlink_param_driverinit_value_set(struct devlink *devlink,
- unsigned int port_index,
- struct list_head *param_list, u32 param_id,
- union devlink_param_value init_val,
- enum devlink_command cmd)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(param_list, param_id);
- if (!param_item)
- return -EINVAL;
-
- if (!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT))
- return -EOPNOTSUPP;
-
- if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(param_item->driverinit_value.vstr, init_val.vstr);
- else
- param_item->driverinit_value = init_val;
- param_item->driverinit_value_valid = true;
-
- devlink_param_notify(devlink, port_index, param_item, cmd);
- return 0;
-}
-
-/**
- * devlink_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink: devlink
- * @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- */
-int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *init_val)
-{
- if (!devlink_reload_supported(devlink->ops))
- return -EOPNOTSUPP;
-
- return __devlink_param_driverinit_value_get(&devlink->param_list,
- param_id, init_val);
-}
EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
/**
@@ -10298,61 +10217,26 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
union devlink_param_value init_val)
{
- return __devlink_param_driverinit_value_set(devlink, 0,
- &devlink->param_list,
- param_id, init_val,
- DEVLINK_CMD_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
+ struct devlink_param_item *param_item;
-/**
- * devlink_port_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
- *
- * This function should be used by the driver to get driverinit
- * configuration for initialization after reload command.
- */
-int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value *init_val)
-{
- struct devlink *devlink = devlink_port->devlink;
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
- if (!devlink_reload_supported(devlink->ops))
- return -EOPNOTSUPP;
+ param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ if (!param_item)
+ return -EINVAL;
- return __devlink_param_driverinit_value_get(&devlink_port->param_list,
- param_id, init_val);
-}
-EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get);
+ if (!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT))
+ return -EOPNOTSUPP;
-/**
- * devlink_port_param_driverinit_value_set - set value of configuration
- * parameter for driverinit
- * configuration mode
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- * @init_val: value of parameter to set for driverinit configuration mode
- *
- * This function should be used by the driver to set driverinit
- * configuration mode default value.
- */
-int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
- u32 param_id,
- union devlink_param_value init_val)
-{
- return __devlink_param_driverinit_value_set(devlink_port->devlink,
- devlink_port->index,
- &devlink_port->param_list,
- param_id, init_val,
- DEVLINK_CMD_PORT_PARAM_NEW);
+ if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+ strcpy(param_item->driverinit_value.vstr, init_val.vstr);
+ else
+ param_item->driverinit_value = init_val;
+ param_item->driverinit_value_valid = true;
+ return 0;
}
-EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set);
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
/**
* devlink_param_value_changed - notify devlink on a parameter's value
@@ -10378,50 +10262,6 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
EXPORT_SYMBOL_GPL(devlink_param_value_changed);
/**
- * devlink_port_param_value_changed - notify devlink on a parameter's value
- * change. Should be called by the driver
- * right after the change.
- *
- * @devlink_port: devlink_port
- * @param_id: parameter ID
- *
- * This function should be used by the driver to notify devlink on value
- * change, excluding driverinit configuration mode.
- * For driverinit configuration mode driver should use the function
- * devlink_port_param_driverinit_value_set() instead.
- */
-void devlink_port_param_value_changed(struct devlink_port *devlink_port,
- u32 param_id)
-{
- struct devlink_param_item *param_item;
-
- param_item = devlink_param_find_by_id(&devlink_port->param_list,
- param_id);
- WARN_ON(!param_item);
-
- devlink_param_notify(devlink_port->devlink, devlink_port->index,
- param_item, DEVLINK_CMD_PORT_PARAM_NEW);
-}
-EXPORT_SYMBOL_GPL(devlink_port_param_value_changed);
-
-/**
- * devlink_param_value_str_fill - Safely fill-up the string preventing
- * from overflow of the preallocated buffer
- *
- * @dst_val: destination devlink_param_value
- * @src: source buffer
- */
-void devlink_param_value_str_fill(union devlink_param_value *dst_val,
- const char *src)
-{
- size_t len;
-
- len = strlcpy(dst_val->vstr, src, __DEVLINK_PARAM_MAX_STRING_VALUE);
- WARN_ON(len >= __DEVLINK_PARAM_MAX_STRING_VALUE);
-}
-EXPORT_SYMBOL_GPL(devlink_param_value_str_fill);
-
-/**
* devlink_region_create - create a new address region
*
* @devlink: devlink
@@ -10839,6 +10679,8 @@ devlink_trap_group_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -10880,6 +10722,8 @@ static void devlink_trap_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
cmd != DEVLINK_CMD_TRAP_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -11261,6 +11105,8 @@ devlink_trap_policer_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -11429,45 +11275,36 @@ free_msg:
nlmsg_free(msg);
}
-void devlink_compat_running_version(struct net_device *dev,
- char *buf, size_t len)
+static struct devlink_port *netdev_to_devlink_port(struct net_device *dev)
{
- struct devlink *devlink;
+ if (!dev->netdev_ops->ndo_get_devlink_port)
+ return NULL;
- dev_hold(dev);
- rtnl_unlock();
+ return dev->netdev_ops->ndo_get_devlink_port(dev);
+}
- devlink = netdev_to_devlink(dev);
- if (!devlink || !devlink->ops->info_get)
- goto out;
+void devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ if (!devlink->ops->info_get)
+ return;
mutex_lock(&devlink->lock);
__devlink_compat_running_version(devlink, buf, len);
mutex_unlock(&devlink->lock);
-
-out:
- rtnl_lock();
- dev_put(dev);
}
-int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
+int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
{
struct devlink_flash_update_params params = {};
- struct devlink *devlink;
int ret;
- dev_hold(dev);
- rtnl_unlock();
-
- devlink = netdev_to_devlink(dev);
- if (!devlink || !devlink->ops->flash_update) {
- ret = -EOPNOTSUPP;
- goto out;
- }
+ if (!devlink->ops->flash_update)
+ return -EOPNOTSUPP;
ret = request_firmware(&params.fw, file_name, devlink->dev);
if (ret)
- goto out;
+ return ret;
mutex_lock(&devlink->lock);
devlink_flash_update_begin_notify(devlink);
@@ -11477,10 +11314,6 @@ int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
release_firmware(params.fw);
-out:
- rtnl_lock();
- dev_put(dev);
-
return ret;
}
@@ -11538,7 +11371,7 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
if (!net_eq(devlink_net(devlink), net))
goto retry;
- WARN_ON(!devlink_reload_supported(devlink->ops));
+ WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e32cee2c469..e471c9b09670 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7765,6 +7765,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
break;
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
+ if (type == BPF_WRITE || size != sizeof(__u64))
+ return false;
+ break;
case bpf_ctx_range(struct __sk_buff, tstamp):
if (size != sizeof(__u64))
return false;
@@ -7774,6 +7778,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
break;
+ case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
+ /* Explicitly prohibit access to padding in __sk_buff. */
+ return false;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -7802,6 +7809,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -7872,6 +7880,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -8373,6 +8382,7 @@ static bool sk_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, tstamp):
case bpf_ctx_range(struct __sk_buff, wire_len):
+ case bpf_ctx_range(struct __sk_buff, hwtstamp):
return false;
}
@@ -8884,6 +8894,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct sk_buff, sk));
break;
+ case offsetof(struct __sk_buff, hwtstamp):
+ BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
+ BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
+
+ insn = bpf_convert_shinfo_access(si, insn);
+ *insn++ = BPF_LDX_MEM(BPF_DW,
+ si->dst_reg, si->dst_reg,
+ bpf_target_off(struct skb_shared_info,
+ hwtstamps, 8,
+ target_size));
+ break;
}
return insn - insn_buf;
@@ -9735,22 +9756,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
struct bpf_insn *insn)
{
- /* si->dst_reg = skb->data */
+ int reg;
+ int temp_reg_off = offsetof(struct sk_buff, cb) +
+ offsetof(struct sk_skb_cb, temp_reg);
+
+ if (si->src_reg == si->dst_reg) {
+ /* We need an extra register, choose and save a register. */
+ reg = BPF_REG_9;
+ if (si->src_reg == reg || si->dst_reg == reg)
+ reg--;
+ if (si->src_reg == reg || si->dst_reg == reg)
+ reg--;
+ *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
+ } else {
+ reg = si->dst_reg;
+ }
+
+ /* reg = skb->data */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
- si->dst_reg, si->src_reg,
+ reg, si->src_reg,
offsetof(struct sk_buff, data));
/* AX = skb->len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, len));
- /* si->dst_reg = skb->data + skb->len */
- *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+ /* reg = skb->data + skb->len */
+ *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
/* AX = skb->data_len */
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
BPF_REG_AX, si->src_reg,
offsetof(struct sk_buff, data_len));
- /* si->dst_reg = skb->data + skb->len - skb->data_len */
- *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
+
+ /* reg = skb->data + skb->len - skb->data_len */
+ *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
+
+ if (si->src_reg == si->dst_reg) {
+ /* Restore the saved register */
+ *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
+ *insn++ = BPF_MOV64_REG(si->dst_reg, reg);
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
+ }
return insn;
}
@@ -9761,11 +9806,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
+ int off;
switch (si->off) {
case offsetof(struct __sk_buff, data_end):
insn = bpf_convert_data_end_access(si, insn);
break;
+ case offsetof(struct __sk_buff, cb[0]) ...
+ offsetofend(struct __sk_buff, cb[4]) - 1:
+ BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
+ BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
+ offsetof(struct sk_skb_cb, data)) %
+ sizeof(__u64));
+
+ prog->cb_access = 1;
+ off = si->off;
+ off -= offsetof(struct __sk_buff, cb[0]);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct sk_skb_cb, data);
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
+ else
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
+ si->src_reg, off);
+ break;
+
+
default:
return bpf_convert_ctx_access(type, si, insn_buf, prog,
target_size);
@@ -10402,8 +10469,10 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
return -EINVAL;
if (unlikely(sk && sk_is_refcounted(sk)))
return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
- if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
- return -ESOCKTNOSUPPORT; /* reject connected sockets */
+ if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
+ return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
+ if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
+ return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */
/* Check if socket is suitable for packet L3/L4 protocol */
if (sk && sk->sk_protocol != ctx->protocol)
@@ -10702,6 +10771,26 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
+BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
+{
+ /* unix_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct unix_sock);
+ if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
+ .func = bpf_skc_to_unix_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
+};
+
BPF_CALL_1(bpf_sock_from_file, struct file *, file)
{
return (unsigned long)sock_from_file(file);
@@ -10741,6 +10830,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skc_to_udp6_sock:
func = &bpf_skc_to_udp6_sock_proto;
break;
+ case BPF_FUNC_skc_to_unix_sock:
+ func = &bpf_skc_to_unix_sock_proto;
+ break;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index bac0184cf3de..3255f57f5131 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1196,9 +1196,8 @@ proto_again:
break;
}
- proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
- switch (proto) {
+ switch (hdr->proto) {
case htons(PPP_IP):
proto = htons(ETH_P_IP);
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
@@ -1307,6 +1306,11 @@ ip_proto_again:
switch (ip_proto) {
case IPPROTO_GRE:
+ if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
target_container, data,
&proto, &nhoff, &hlen, flags);
@@ -1364,6 +1368,11 @@ ip_proto_again:
break;
}
case IPPROTO_IPIP:
+ if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
proto = htons(ETH_P_IP);
key_control->flags |= FLOW_DIS_ENCAPSULATION;
@@ -1376,6 +1385,11 @@ ip_proto_again:
break;
case IPPROTO_IPV6:
+ if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
proto = htons(ETH_P_IPV6);
key_control->flags |= FLOW_DIS_ENCAPSULATION;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 8e582e29a41e..4fcbdd71c59f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -40,10 +40,10 @@
*/
struct net_rate_estimator {
- struct gnet_stats_basic_packed *bstats;
+ struct gnet_stats_basic_sync *bstats;
spinlock_t *stats_lock;
- seqcount_t *running;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+ bool running;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
u8 ewma_log;
u8 intvl_log; /* period : (250ms << intvl_log) */
@@ -60,13 +60,13 @@ struct net_rate_estimator {
};
static void est_fetch_counters(struct net_rate_estimator *e,
- struct gnet_stats_basic_packed *b)
+ struct gnet_stats_basic_sync *b)
{
- memset(b, 0, sizeof(*b));
+ gnet_stats_basic_sync_init(b);
if (e->stats_lock)
spin_lock(e->stats_lock);
- __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
+ gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
if (e->stats_lock)
spin_unlock(e->stats_lock);
@@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e,
static void est_timer(struct timer_list *t)
{
struct net_rate_estimator *est = from_timer(est, t, timer);
- struct gnet_stats_basic_packed b;
+ struct gnet_stats_basic_sync b;
+ u64 b_bytes, b_packets;
u64 rate, brate;
est_fetch_counters(est, &b);
- brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
+ b_bytes = u64_stats_read(&b.bytes);
+ b_packets = u64_stats_read(&b.packets);
+
+ brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
- rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
+ rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
write_seqcount_begin(&est->seq);
@@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t)
est->avpps += rate;
write_seqcount_end(&est->seq);
- est->last_bytes = b.bytes;
- est->last_packets = b.packets;
+ est->last_bytes = b_bytes;
+ est->last_packets = b_packets;
est->next_jiffies += ((HZ/4) << est->intvl_log);
@@ -109,7 +113,9 @@ static void est_timer(struct timer_list *t)
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @lock: lock for statistics and control path
- * @running: qdisc running seqcount
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ * internal values might change during basic reads. Only used
+ * if @bstats_cpu is NULL
* @opt: rate estimator configuration TLV
*
* Creates a new rate estimator with &bstats as source and &rate_est
@@ -121,16 +127,16 @@ static void est_timer(struct timer_list *t)
* Returns 0 on success or a negative error code.
*
*/
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
- seqcount_t *running,
+ bool running,
struct nlattr *opt)
{
struct gnet_estimator *parm = nla_data(opt);
struct net_rate_estimator *old, *est;
- struct gnet_stats_basic_packed b;
+ struct gnet_stats_basic_sync b;
int intvl_log;
if (nla_len(opt) < sizeof(*parm))
@@ -164,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
est_fetch_counters(est, &b);
if (lock)
local_bh_enable();
- est->last_bytes = b.bytes;
- est->last_packets = b.packets;
+ est->last_bytes = u64_stats_read(&b.bytes);
+ est->last_packets = u64_stats_read(&b.packets);
if (lock)
spin_lock_bh(lock);
@@ -214,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
* @cpu_bstats: bstats per cpu
* @rate_est: rate estimator statistics
* @lock: lock for statistics and control path
- * @running: qdisc running seqcount (might be NULL)
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ * internal values might change during basic reads. Only used
+ * if @cpu_bstats is NULL
* @opt: rate estimator configuration TLV
*
* Replaces the configuration of a rate estimator by calling
@@ -222,11 +230,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
*
* Returns 0 on success or a negative error code.
*/
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt)
+ bool running, struct nlattr *opt)
{
return gen_new_estimator(bstats, cpu_bstats, rate_est,
lock, running, opt);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index e491b083b348..a10335b4ba2d 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -18,7 +18,7 @@
#include <linux/gen_stats.h>
#include <net/netlink.h>
#include <net/gen_stats.h>
-
+#include <net/sch_generic.h>
static inline int
gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
@@ -114,63 +114,112 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
}
EXPORT_SYMBOL(gnet_stats_start_copy);
-static void
-__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu)
+/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
{
+ u64_stats_set(&b->bytes, 0);
+ u64_stats_set(&b->packets, 0);
+ u64_stats_init(&b->syncp);
+}
+EXPORT_SYMBOL(gnet_stats_basic_sync_init);
+
+static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu)
+{
+ u64 t_bytes = 0, t_packets = 0;
int i;
for_each_possible_cpu(i) {
- struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
+ struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
unsigned int start;
u64 bytes, packets;
do {
start = u64_stats_fetch_begin_irq(&bcpu->syncp);
- bytes = bcpu->bstats.bytes;
- packets = bcpu->bstats.packets;
+ bytes = u64_stats_read(&bcpu->bytes);
+ packets = u64_stats_read(&bcpu->packets);
} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
- bstats->bytes += bytes;
- bstats->packets += packets;
+ t_bytes += bytes;
+ t_packets += packets;
+ }
+ _bstats_update(bstats, t_bytes, t_packets);
+}
+
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running)
+{
+ unsigned int start;
+ u64 bytes = 0;
+ u64 packets = 0;
+
+ WARN_ON_ONCE((cpu || running) && in_hardirq());
+
+ if (cpu) {
+ gnet_stats_add_basic_cpu(bstats, cpu);
+ return;
}
+ do {
+ if (running)
+ start = u64_stats_fetch_begin_irq(&b->syncp);
+ bytes = u64_stats_read(&b->bytes);
+ packets = u64_stats_read(&b->packets);
+ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
+
+ _bstats_update(bstats, bytes, packets);
}
+EXPORT_SYMBOL(gnet_stats_add_basic);
-void
-__gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
+static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running)
{
- unsigned int seq;
+ unsigned int start;
if (cpu) {
- __gnet_stats_copy_basic_cpu(bstats, cpu);
+ u64 t_bytes = 0, t_packets = 0;
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
+ unsigned int start;
+ u64 bytes, packets;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&bcpu->syncp);
+ bytes = u64_stats_read(&bcpu->bytes);
+ packets = u64_stats_read(&bcpu->packets);
+ } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+
+ t_bytes += bytes;
+ t_packets += packets;
+ }
+ *ret_bytes = t_bytes;
+ *ret_packets = t_packets;
return;
}
do {
if (running)
- seq = read_seqcount_begin(running);
- bstats->bytes = b->bytes;
- bstats->packets = b->packets;
- } while (running && read_seqcount_retry(running, seq));
+ start = u64_stats_fetch_begin_irq(&b->syncp);
+ *ret_bytes = u64_stats_read(&b->bytes);
+ *ret_packets = u64_stats_read(&b->packets);
+ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
}
-EXPORT_SYMBOL(__gnet_stats_copy_basic);
static int
-___gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b,
- int type)
+___gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+ int type, bool running)
{
- struct gnet_stats_basic_packed bstats = {0};
+ u64 bstats_bytes, bstats_packets;
- __gnet_stats_copy_basic(running, &bstats, cpu, b);
+ gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running);
if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
- d->tc_stats.bytes = bstats.bytes;
- d->tc_stats.packets = bstats.packets;
+ d->tc_stats.bytes = bstats_bytes;
+ d->tc_stats.packets = bstats_packets;
}
if (d->tail) {
@@ -178,24 +227,28 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
int res;
memset(&sb, 0, sizeof(sb));
- sb.bytes = bstats.bytes;
- sb.packets = bstats.packets;
+ sb.bytes = bstats_bytes;
+ sb.packets = bstats_packets;
res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
- if (res < 0 || sb.packets == bstats.packets)
+ if (res < 0 || sb.packets == bstats_packets)
return res;
/* emit 64bit stats only if needed */
- return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
- sizeof(bstats.packets), TCA_STATS_PAD);
+ return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets,
+ sizeof(bstats_packets), TCA_STATS_PAD);
}
return 0;
}
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
- * @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ * internal values might change during basic reads.
+ * Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
@@ -204,22 +257,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+ bool running)
{
- return ___gnet_stats_copy_basic(running, d, cpu, b,
- TCA_STATS_BASIC);
+ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic);
/**
* gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
- * @running: seqcount_t pointer
* @d: dumping handle
* @cpu: copy statistic per cpu
* @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ * internal values might change during basic reads.
+ * Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
*
* Appends the basic statistics to the top level TLV created by
* gnet_stats_start_copy().
@@ -228,13 +284,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
* if the room in the socket buffer was not sufficient.
*/
int
-gnet_stats_copy_basic_hw(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b)
+gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+ bool running)
{
- return ___gnet_stats_copy_basic(running, d, cpu, b,
- TCA_STATS_BASIC_HW);
+ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
}
EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
@@ -282,16 +337,15 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
}
EXPORT_SYMBOL(gnet_stats_copy_rate_est);
-static void
-__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *q)
+static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *q)
{
int i;
for_each_possible_cpu(i) {
const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
- qstats->qlen = 0;
+ qstats->qlen += qcpu->backlog;
qstats->backlog += qcpu->backlog;
qstats->drops += qcpu->drops;
qstats->requeues += qcpu->requeues;
@@ -299,24 +353,21 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
}
}
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu,
- const struct gnet_stats_queue *q,
- __u32 qlen)
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu,
+ const struct gnet_stats_queue *q)
{
if (cpu) {
- __gnet_stats_copy_queue_cpu(qstats, cpu);
+ gnet_stats_add_queue_cpu(qstats, cpu);
} else {
- qstats->qlen = q->qlen;
- qstats->backlog = q->backlog;
- qstats->drops = q->drops;
- qstats->requeues = q->requeues;
- qstats->overlimits = q->overlimits;
+ qstats->qlen += q->qlen;
+ qstats->backlog += q->backlog;
+ qstats->drops += q->drops;
+ qstats->requeues += q->requeues;
+ qstats->overlimits += q->overlimits;
}
-
- qstats->qlen = qlen;
}
-EXPORT_SYMBOL(__gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_add_queue);
/**
* gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -339,7 +390,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
{
struct gnet_stats_queue qstats = {0};
- __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen);
+ gnet_stats_add_queue(&qstats, cpu_q, q);
+ qstats.qlen = qlen;
if (d->compat_tc_stats) {
d->tc_stats.drops = qstats.drops;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2d5bc3a75fae..47931c8be04b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,6 +122,8 @@ static void neigh_mark_dead(struct neighbour *n)
list_del_init(&n->gc_list);
atomic_dec(&n->tbl->gc_entries);
}
+ if (!list_empty(&n->managed_list))
+ list_del_init(&n->managed_list);
}
static void neigh_update_gc_list(struct neighbour *n)
@@ -130,7 +132,6 @@ static void neigh_update_gc_list(struct neighbour *n)
write_lock_bh(&n->tbl->lock);
write_lock(&n->lock);
-
if (n->dead)
goto out;
@@ -149,32 +150,59 @@ static void neigh_update_gc_list(struct neighbour *n)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
atomic_inc(&n->tbl->gc_entries);
}
+out:
+ write_unlock(&n->lock);
+ write_unlock_bh(&n->tbl->lock);
+}
+static void neigh_update_managed_list(struct neighbour *n)
+{
+ bool on_managed_list, add_to_managed;
+
+ write_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+ if (n->dead)
+ goto out;
+
+ add_to_managed = n->flags & NTF_MANAGED;
+ on_managed_list = !list_empty(&n->managed_list);
+
+ if (!add_to_managed && on_managed_list)
+ list_del_init(&n->managed_list);
+ else if (add_to_managed && !on_managed_list)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
out:
write_unlock(&n->lock);
write_unlock_bh(&n->tbl->lock);
}
-static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
- int *notify)
+static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
+ bool *gc_update, bool *managed_update)
{
- bool rc = false;
- u8 ndm_flags;
+ u32 ndm_flags, old_flags = neigh->flags;
if (!(flags & NEIGH_UPDATE_F_ADMIN))
- return rc;
+ return;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
- ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
- if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
if (ndm_flags & NTF_EXT_LEARNED)
neigh->flags |= NTF_EXT_LEARNED;
else
neigh->flags &= ~NTF_EXT_LEARNED;
- rc = true;
*notify = 1;
+ *gc_update = true;
+ }
+ if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
+ if (ndm_flags & NTF_MANAGED)
+ neigh->flags |= NTF_MANAGED;
+ else
+ neigh->flags &= ~NTF_MANAGED;
+ *notify = 1;
+ *managed_update = true;
}
-
- return rc;
}
static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
@@ -379,7 +407,7 @@ EXPORT_SYMBOL(neigh_ifdown);
static struct neighbour *neigh_alloc(struct neigh_table *tbl,
struct net_device *dev,
- bool exempt_from_gc)
+ u32 flags, bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
@@ -412,6 +440,7 @@ do_alloc:
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
+ n->flags = flags;
seqlock_init(&n->hh.hh_lock);
n->parms = neigh_parms_clone(&tbl->parms);
timer_setup(&n->timer, neigh_timer_handler, 0);
@@ -421,6 +450,7 @@ do_alloc:
refcount_set(&n->refcnt, 1);
n->dead = 1;
INIT_LIST_HEAD(&n->gc_list);
+ INIT_LIST_HEAD(&n->managed_list);
atomic_inc(&tbl->entries);
out:
@@ -575,19 +605,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
}
EXPORT_SYMBOL(neigh_lookup_nodev);
-static struct neighbour *___neigh_create(struct neigh_table *tbl,
- const void *pkey,
- struct net_device *dev,
- bool exempt_from_gc, bool want_ref)
+static struct neighbour *
+___neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, u32 flags,
+ bool exempt_from_gc, bool want_ref)
{
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
- u32 hash_val;
- unsigned int key_len = tbl->key_len;
- int error;
+ u32 hash_val, key_len = tbl->key_len;
+ struct neighbour *n1, *rc, *n;
struct neigh_hash_table *nht;
+ int error;
+ n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
-
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
@@ -650,7 +679,8 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
n->dead = 0;
if (!exempt_from_gc)
list_add_tail(&n->gc_list, &n->tbl->gc_list);
-
+ if (n->flags & NTF_MANAGED)
+ list_add_tail(&n->managed_list, &n->tbl->managed_list);
if (want_ref)
neigh_hold(n);
rcu_assign_pointer(n->next,
@@ -674,7 +704,7 @@ out_neigh_release:
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
- return ___neigh_create(tbl, pkey, dev, false, want_ref);
+ return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
}
EXPORT_SYMBOL(__neigh_create);
@@ -1205,8 +1235,6 @@ static void neigh_update_hhs(struct neighbour *neigh)
}
}
-
-
/* Generic update routine.
-- lladdr is new lladdr or NULL, if it is not supplied.
-- new is new state.
@@ -1217,7 +1245,8 @@ static void neigh_update_hhs(struct neighbour *neigh)
lladdr instead of overriding it
if it is different.
NEIGH_UPDATE_F_ADMIN means that the change is administrative.
-
+ NEIGH_UPDATE_F_USE means that the entry is user triggered.
+ NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
NTF_ROUTER flag.
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
@@ -1225,17 +1254,15 @@ static void neigh_update_hhs(struct neighbour *neigh)
Caller MUST hold reference count on the entry.
*/
-
static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
u8 new, u32 flags, u32 nlmsg_pid,
struct netlink_ext_ack *extack)
{
- bool ext_learn_change = false;
- u8 old;
- int err;
- int notify = 0;
- struct net_device *dev;
+ bool gc_update = false, managed_update = false;
int update_isrouter = 0;
+ struct net_device *dev;
+ int err, notify = 0;
+ u8 old;
trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
@@ -1254,7 +1281,13 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
- ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
+ neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
+ if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
+ new = old & ~NUD_PERMANENT;
+ neigh->nud_state = new;
+ err = 0;
+ goto out;
+ }
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
@@ -1399,15 +1432,13 @@ out:
if (update_isrouter)
neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
-
- if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+ if (((new ^ old) & NUD_PERMANENT) || gc_update)
neigh_update_gc_list(neigh);
-
+ if (managed_update)
+ neigh_update_managed_list(neigh);
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
-
trace_neigh_update_done(neigh, err);
-
return err;
}
@@ -1533,6 +1564,20 @@ int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
}
EXPORT_SYMBOL(neigh_direct_output);
+static void neigh_managed_work(struct work_struct *work)
+{
+ struct neigh_table *tbl = container_of(work, struct neigh_table,
+ managed_work.work);
+ struct neighbour *neigh;
+
+ write_lock_bh(&tbl->lock);
+ list_for_each_entry(neigh, &tbl->managed_list, managed_list)
+ neigh_event_send(neigh, NULL);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
+ NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME));
+ write_unlock_bh(&tbl->lock);
+}
+
static void neigh_proxy_process(struct timer_list *t)
{
struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
@@ -1679,6 +1724,8 @@ void neigh_table_init(int index, struct neigh_table *tbl)
INIT_LIST_HEAD(&tbl->parms_list);
INIT_LIST_HEAD(&tbl->gc_list);
+ INIT_LIST_HEAD(&tbl->managed_list);
+
list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
refcount_set(&tbl->parms.refcnt, 1);
@@ -1710,9 +1757,13 @@ void neigh_table_init(int index, struct neigh_table *tbl)
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
rwlock_init(&tbl->lock);
+
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
tbl->parms.reachable_time);
+ INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
+ queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
+
timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
skb_queue_head_init_class(&tbl->proxy_queue,
&neigh_table_proxy_queue_class);
@@ -1783,6 +1834,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
[NDA_NH_ID] = { .type = NLA_U32 },
+ [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
[NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
};
@@ -1855,7 +1907,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
struct net *net = sock_net(skb->sk);
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
@@ -1864,6 +1916,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct neighbour *neigh;
void *dst, *lladdr;
u8 protocol = 0;
+ u32 ndm_flags;
int err;
ASSERT_RTNL();
@@ -1879,6 +1932,15 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
}
ndm = nlmsg_data(nlh);
+ ndm_flags = ndm->ndm_flags;
+ if (tb[NDA_FLAGS_EXT]) {
+ u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
+
+ BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
+ (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
+ hweight32(NTF_EXT_MASK)));
+ ndm_flags |= (ext << NTF_EXT_SHIFT);
+ }
if (ndm->ndm_ifindex) {
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
@@ -1906,14 +1968,18 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[NDA_PROTOCOL])
protocol = nla_get_u8(tb[NDA_PROTOCOL]);
-
- if (ndm->ndm_flags & NTF_PROXY) {
+ if (ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
+ if (ndm_flags & NTF_MANAGED) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
+ goto out;
+ }
+
err = -ENOBUFS;
pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
- pn->flags = ndm->ndm_flags;
+ pn->flags = ndm_flags;
if (protocol)
pn->protocol = protocol;
err = 0;
@@ -1933,16 +1999,24 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
- bool exempt_from_gc;
+ bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
+ bool exempt_from_gc = ndm_permanent ||
+ ndm_flags & NTF_EXT_LEARNED;
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
goto out;
}
+ if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
+ NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
+ err = -EINVAL;
+ goto out;
+ }
- exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
- ndm->ndm_flags & NTF_EXT_LEARNED;
- neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
+ neigh = ___neigh_create(tbl, dst, dev,
+ ndm_flags &
+ (NTF_EXT_LEARNED | NTF_MANAGED),
+ exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
goto out;
@@ -1961,22 +2035,22 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (protocol)
neigh->protocol = protocol;
-
- if (ndm->ndm_flags & NTF_EXT_LEARNED)
+ if (ndm_flags & NTF_EXT_LEARNED)
flags |= NEIGH_UPDATE_F_EXT_LEARNED;
-
- if (ndm->ndm_flags & NTF_ROUTER)
+ if (ndm_flags & NTF_ROUTER)
flags |= NEIGH_UPDATE_F_ISROUTER;
+ if (ndm_flags & NTF_MANAGED)
+ flags |= NEIGH_UPDATE_F_MANAGED;
+ if (ndm_flags & NTF_USE)
+ flags |= NEIGH_UPDATE_F_USE;
- if (ndm->ndm_flags & NTF_USE) {
+ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid, extack);
+ if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
neigh_event_send(neigh, NULL);
err = 0;
- } else
- err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
- NETLINK_CB(skb).portid, extack);
-
+ }
neigh_release(neigh);
-
out:
return err;
}
@@ -2427,6 +2501,7 @@ out:
static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
u32 pid, u32 seq, int type, unsigned int flags)
{
+ u32 neigh_flags, neigh_flags_ext;
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
@@ -2436,11 +2511,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
+ neigh_flags = neigh->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = neigh->ops->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = neigh->flags;
+ ndm->ndm_flags = neigh_flags;
ndm->ndm_type = neigh->type;
ndm->ndm_ifindex = neigh->dev->ifindex;
@@ -2471,6 +2549,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2484,6 +2564,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
u32 pid, u32 seq, int type, unsigned int flags,
struct neigh_table *tbl)
{
+ u32 neigh_flags, neigh_flags_ext;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
@@ -2491,11 +2572,14 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nlh == NULL)
return -EMSGSIZE;
+ neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
+ neigh_flags = pn->flags & NTF_OLD_MASK;
+
ndm = nlmsg_data(nlh);
ndm->ndm_family = tbl->family;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = pn->flags | NTF_PROXY;
+ ndm->ndm_flags = neigh_flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
@@ -2505,6 +2589,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
goto nla_put_failure;
+ if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
+ goto nla_put_failure;
nlmsg_end(skb, nlh);
return 0;
@@ -2820,6 +2906,7 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(struct nda_cacheinfo))
+ nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
@@ -2848,6 +2935,7 @@ static inline size_t pneigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
+ nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(4) /* NDA_FLAGS_EXT */
+ nla_total_size(1); /* NDA_PROTOCOL */
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index eab5fc88a002..d8b9dbabd4a4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
- "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
+ seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
static int dev_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Interface| Receive "
- " | Transmit\n"
- " | bytes packets errs drop fifo frame "
- "compressed multicast| bytes packets errs "
- " drop fifo colls carrier compressed\n");
+ seq_puts(seq, "Inter-| Receive "
+ " | Transmit\n"
+ " face |bytes packets errs drop fifo frame "
+ "compressed multicast|bytes packets errs "
+ "drop fifo colls carrier compressed\n");
else
dev_seq_printf_stats(seq, v);
return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
struct packet_type *pt = v;
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Type Device Function\n");
+ seq_puts(seq, "Type Device Function\n");
else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-9s %ps\n",
+ seq_printf(seq, " %-8s %ps\n",
pt->dev ? pt->dev->name : "", pt->func);
}
@@ -327,14 +327,12 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct netdev_hw_addr *ha;
struct net_device *dev = v;
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
+ if (v == SEQ_START_TOKEN)
return 0;
- }
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
+ seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
dev->ifindex, dev->name,
ha->refcount, ha->global_use,
(int)dev->addr_len, ha->addr);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6197774048b..9c01c642cf9e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -175,6 +175,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
+ struct net_device *netdev = to_net_dev(dev);
+
+ /* The check is also done in change_carrier; this helps returning early
+ * without hitting the trylock/restart in netdev_store.
+ */
+ if (!netdev->netdev_ops->ndo_change_carrier)
+ return -EOPNOTSUPP;
+
return netdev_store(dev, attr, buf, len, change_carrier);
}
@@ -196,6 +204,12 @@ static ssize_t speed_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->ethtool_ops->get_link_ksettings)
+ return ret;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -216,6 +230,12 @@ static ssize_t duplex_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
int ret = -EINVAL;
+ /* The check is also done in __ethtool_get_link_ksettings; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->ethtool_ops->get_link_ksettings)
+ return ret;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -468,6 +488,14 @@ static ssize_t proto_down_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
+ struct net_device *netdev = to_net_dev(dev);
+
+ /* The check is also done in change_proto_down; this helps returning
+ * early without hitting the trylock/restart in netdev_store.
+ */
+ if (!netdev->netdev_ops->ndo_change_proto_down)
+ return -EOPNOTSUPP;
+
return netdev_store(dev, attr, buf, len, change_proto_down);
}
NETDEVICE_SHOW_RW(proto_down, fmt_dec);
@@ -478,6 +506,12 @@ static ssize_t phys_port_id_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The check is also done in dev_get_phys_port_id; this helps returning
+ * early without hitting the trylock/restart below.
+ */
+ if (!netdev->netdev_ops->ndo_get_phys_port_id)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -500,6 +534,13 @@ static ssize_t phys_port_name_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The checks are also done in dev_get_phys_port_name; this helps
+ * returning early without hitting the trylock/restart below.
+ */
+ if (!netdev->netdev_ops->ndo_get_phys_port_name &&
+ !netdev->netdev_ops->ndo_get_devlink_port)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -522,6 +563,14 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
ssize_t ret = -EINVAL;
+ /* The checks are also done in dev_get_phys_port_name; this helps
+ * returning early without hitting the trylock/restart below. This works
+ * because recurse is false when calling dev_get_port_parent_id.
+ */
+ if (!netdev->netdev_ops->ndo_get_port_parent_id &&
+ !netdev->netdev_ops->ndo_get_devlink_port)
+ return -EOPNOTSUPP;
+
if (!rtnl_trylock())
return restart_syscall();
@@ -1226,6 +1275,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue,
if (!capable(CAP_NET_ADMIN))
return -EPERM;
+ /* The check is also done later; this helps returning early without
+ * hitting the trylock/restart below.
+ */
+ if (!dev->netdev_ops->ndo_set_tx_maxrate)
+ return -EOPNOTSUPP;
+
err = kstrtou32(buf, 10, &rate);
if (err < 0)
return err;
@@ -1869,7 +1924,7 @@ static struct class net_class __ro_after_init = {
.get_ownership = net_get_ownership,
};
-#ifdef CONFIG_OF_NET
+#ifdef CONFIG_OF
static int of_dev_node_match(struct device *dev, const void *data)
{
for (; dev; dev = dev->parent) {
@@ -1973,9 +2028,9 @@ int netdev_register_kobject(struct net_device *ndev)
int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
const struct net *net_new)
{
+ kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
+ kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
struct device *dev = &ndev->dev;
- kuid_t old_uid, new_uid;
- kgid_t old_gid, new_gid;
int error;
net_ns_get_ownership(net_old, &old_uid, &old_gid);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a448a9b5bb2d..202fa5eacd0f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -473,7 +473,9 @@ struct net *copy_net_ns(unsigned long flags,
if (rv < 0) {
put_userns:
+#ifdef CONFIG_KEYS
key_remove_domain(net->key_domain);
+#endif
put_user_ns(user_ns);
net_free(net);
dec_ucounts:
@@ -605,7 +607,9 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
dec_net_namespaces(net->ucounts);
+#ifdef CONFIG_KEYS
key_remove_domain(net->key_domain);
+#endif
put_user_ns(net->user_ns);
net_free(net);
}
diff --git a/net/core/of_net.c b/net/core/of_net.c
new file mode 100644
index 000000000000..f1a9bf7578e7
--- /dev/null
+++ b/net/core/of_net.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OF helpers for network devices.
+ *
+ * Initially copied out of arch/powerpc/kernel/prom_parse.c
+ */
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/phy.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/nvmem-consumer.h>
+
+/**
+ * of_get_phy_mode - Get phy mode for given device_node
+ * @np: Pointer to the given device_node
+ * @interface: Pointer to the result
+ *
+ * The function gets phy interface string from property 'phy-mode' or
+ * 'phy-connection-type'. The index in phy_modes table is set in
+ * interface and 0 returned. In case of error interface is set to
+ * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
+ */
+int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
+{
+ const char *pm;
+ int err, i;
+
+ *interface = PHY_INTERFACE_MODE_NA;
+
+ err = of_property_read_string(np, "phy-mode", &pm);
+ if (err < 0)
+ err = of_property_read_string(np, "phy-connection-type", &pm);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
+ if (!strcasecmp(pm, phy_modes(i))) {
+ *interface = i;
+ return 0;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(of_get_phy_mode);
+
+static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
+{
+ struct property *pp = of_find_property(np, name, NULL);
+
+ if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
+ memcpy(addr, pp->value, ETH_ALEN);
+ return 0;
+ }
+ return -ENODEV;
+}
+
+static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
+{
+ struct platform_device *pdev = of_find_device_by_node(np);
+ struct nvmem_cell *cell;
+ const void *mac;
+ size_t len;
+ int ret;
+
+ /* Try lookup by device first, there might be a nvmem_cell_lookup
+ * associated with a given device.
+ */
+ if (pdev) {
+ ret = nvmem_get_mac_address(&pdev->dev, addr);
+ put_device(&pdev->dev);
+ return ret;
+ }
+
+ cell = of_nvmem_cell_get(np, "mac-address");
+ if (IS_ERR(cell))
+ return PTR_ERR(cell);
+
+ mac = nvmem_cell_read(cell, &len);
+ nvmem_cell_put(cell);
+
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
+ }
+
+ memcpy(addr, mac, ETH_ALEN);
+ kfree(mac);
+
+ return 0;
+}
+
+/**
+ * of_get_mac_address()
+ * @np: Caller's Device Node
+ * @addr: Pointer to a six-byte array for the result
+ *
+ * Search the device tree for the best MAC address to use. 'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address. If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree. If any
+ * of the above isn't set, then try to get MAC address from nvmem cell named
+ * 'mac-address'.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the device tree, but were not set by U-Boot. For example, the
+ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+ * addresses. Some older U-Boots only initialized 'local-mac-address'. In
+ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+ * but is all zeros.
+ *
+ * Return: 0 on success and errno in case of error.
+*/
+int of_get_mac_address(struct device_node *np, u8 *addr)
+{
+ int ret;
+
+ if (!np)
+ return -ENODEV;
+
+ ret = of_get_mac_addr(np, "mac-address", addr);
+ if (!ret)
+ return 0;
+
+ ret = of_get_mac_addr(np, "local-mac-address", addr);
+ if (!ret)
+ return 0;
+
+ ret = of_get_mac_addr(np, "address", addr);
+ if (!ret)
+ return 0;
+
+ return of_get_mac_addr_nvmem(np, addr);
+}
+EXPORT_SYMBOL(of_get_mac_address);
+
+/**
+ * of_get_ethdev_address()
+ * @np: Caller's Device Node
+ * @dev: Pointer to netdevice which address will be updated
+ *
+ * Search the device tree for the best MAC address to use.
+ * If found set @dev->dev_addr to that address.
+ *
+ * See documentation of of_get_mac_address() for more information on how
+ * the best address is determined.
+ *
+ * Return: 0 on success and errno in case of error.
+ */
+int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
+{
+ u8 addr[ETH_ALEN];
+ int ret;
+
+ ret = of_get_mac_address(np, addr);
+ if (!ret)
+ eth_hw_addr_set(dev, addr);
+ return ret;
+}
+EXPORT_SYMBOL(of_get_ethdev_address);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1a6978427d6c..9b60e4301a44 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -49,6 +49,12 @@ static int page_pool_init(struct page_pool *pool,
* which is the XDP_TX use-case.
*/
if (pool->p.flags & PP_FLAG_DMA_MAP) {
+ /* DMA-mapping is not supported on 32-bit systems with
+ * 64-bit DMA mapping.
+ */
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ return -EOPNOTSUPP;
+
if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
return -EINVAL;
@@ -69,10 +75,6 @@ static int page_pool_init(struct page_pool *pool,
*/
}
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
- pool->p.flags & PP_FLAG_PAGE_FRAG)
- return -EINVAL;
-
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 972c8cb303a5..2af8aeeadadf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -301,7 +301,7 @@ int rtnl_unregister(int protocol, int msgtype)
}
link = rtnl_dereference(tab[msgindex]);
- rcu_assign_pointer(tab[msgindex], NULL);
+ RCU_INIT_POINTER(tab[msgindex], NULL);
rtnl_unlock();
kfree_rcu(link, rcu);
@@ -337,7 +337,7 @@ void rtnl_unregister_all(int protocol)
if (!link)
continue;
- rcu_assign_pointer(tab[msgindex], NULL);
+ RCU_INIT_POINTER(tab[msgindex], NULL);
kfree_rcu(link, rcu);
}
rtnl_unlock();
@@ -3204,8 +3204,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
dev->mtu = mtu;
}
if (tb[IFLA_ADDRESS]) {
- memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
- nla_len(tb[IFLA_ADDRESS]));
+ __dev_addr_set(dev, nla_data(tb[IFLA_ADDRESS]),
+ nla_len(tb[IFLA_ADDRESS]));
dev->addr_assign_type = NET_ADDR_SET;
}
if (tb[IFLA_BROADCAST])
@@ -3804,9 +3804,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
- size_t if_info_size;
- skb = nlmsg_new((if_info_size = if_nlmsg_size(dev, 0)), flags);
+ skb = nlmsg_new(if_nlmsg_size(dev, 0), flags);
if (skb == NULL)
goto errout;
@@ -4384,7 +4383,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (br_dev != netdev_master_upper_dev_get(dev) &&
- !(dev->priv_flags & IFF_EBRIDGE))
+ !netif_is_bridge_master(dev))
continue;
cops = ops;
}
@@ -5262,7 +5261,7 @@ nla_put_failure:
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
- size_t size = 0;
+ size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
diff --git a/net/core/selftests.c b/net/core/selftests.c
index 9077fa969892..acb1ee97bbd3 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -15,8 +15,8 @@
#include <net/udp.h>
struct net_packet_attrs {
- unsigned char *src;
- unsigned char *dst;
+ const unsigned char *src;
+ const unsigned char *dst;
u32 ip_src;
u32 ip_dst;
bool tcp;
@@ -173,8 +173,8 @@ static int net_test_loopback_validate(struct sk_buff *skb,
struct net_device *orig_ndev)
{
struct net_test_priv *tpriv = pt->af_packet_priv;
- unsigned char *src = tpriv->packet->src;
- unsigned char *dst = tpriv->packet->dst;
+ const unsigned char *src = tpriv->packet->src;
+ const unsigned char *dst = tpriv->packet->dst;
struct netsfhdr *shdr;
struct ethhdr *ehdr;
struct udphdr *uhdr;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2170bea2c7de..ba2f38246f07 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/mpls.h>
#include <net/mptcp.h>
+#include <net/mctp.h>
#include <net/page_pool.h>
#include <linux/uaccess.h>
@@ -80,6 +81,7 @@
#include <linux/indirect_call_wrapper.h>
#include "datagram.h"
+#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
@@ -134,34 +136,31 @@ struct napi_alloc_cache {
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
-static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask);
-}
-
-void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
-{
fragsz = SKB_DATA_ALIGN(fragsz);
- return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+ return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
}
EXPORT_SYMBOL(__napi_alloc_frag_align);
void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
- struct page_frag_cache *nc;
void *data;
fragsz = SKB_DATA_ALIGN(fragsz);
if (in_hardirq() || irqs_disabled()) {
- nc = this_cpu_ptr(&netdev_alloc_cache);
+ struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
+
data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
} else {
+ struct napi_alloc_cache *nc;
+
local_bh_disable();
- data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+ nc = this_cpu_ptr(&napi_alloc_cache);
+ data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
local_bh_enable();
}
return data;
@@ -397,8 +396,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct kmem_cache *cache;
struct sk_buff *skb;
- u8 *data;
+ unsigned int osize;
bool pfmemalloc;
+ u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
? skbuff_fclone_cache : skbuff_head_cache;
@@ -430,7 +430,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- size = SKB_WITH_OVERHEAD(ksize(data));
+ osize = ksize(data);
+ size = SKB_WITH_OVERHEAD(osize);
prefetchw(data + size);
/*
@@ -439,7 +440,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- __build_skb_around(skb, data, 0);
+ __build_skb_around(skb, data, osize);
skb->pfmemalloc = pfmemalloc;
if (flags & SKB_ALLOC_FCLONE) {
@@ -1804,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom);
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
{
int delta = headroom - skb_headroom(skb);
+ int osize = skb_end_offset(skb);
+ struct sock *sk = skb->sk;
if (WARN_ONCE(delta <= 0,
"%s is expecting an increase in the headroom", __func__))
return skb;
- /* pskb_expand_head() might crash, if skb is shared */
- if (skb_shared(skb)) {
+ delta = SKB_DATA_ALIGN(delta);
+ /* pskb_expand_head() might crash, if skb is shared. */
+ if (skb_shared(skb) || !is_skb_wmem(skb)) {
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
- if (likely(nskb)) {
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
- consume_skb(skb);
- } else {
- kfree_skb(skb);
- }
+ if (unlikely(!nskb))
+ goto fail;
+
+ if (sk)
+ skb_set_owner_w(nskb, sk);
+ consume_skb(skb);
skb = nskb;
}
- if (skb &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(skb);
- skb = NULL;
+ if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC))
+ goto fail;
+
+ if (sk && is_skb_wmem(skb)) {
+ delta = skb_end_offset(skb) - osize;
+ refcount_add(delta, &sk->sk_wmem_alloc);
+ skb->truesize += delta;
}
return skb;
+
+fail:
+ kfree_skb(skb);
+ return NULL;
}
EXPORT_SYMBOL(skb_expand_head);
@@ -3423,8 +3433,9 @@ static inline void skb_split_no_header(struct sk_buff *skb,
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);
+ const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
- skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+ skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
skb_zerocopy_clone(skb1, skb, 0);
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
@@ -3439,19 +3450,7 @@ EXPORT_SYMBOL(skb_split);
*/
static int skb_prepare_for_shift(struct sk_buff *skb)
{
- int ret = 0;
-
- if (skb_cloned(skb)) {
- /* Save and restore truesize: pskb_expand_head() may reallocate
- * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
- * cannot change truesize at this point.
- */
- unsigned int save_truesize = skb->truesize;
-
- ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- skb->truesize = save_truesize;
- }
- return ret;
+ return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
}
/**
@@ -4431,6 +4430,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_MPTCP)
[SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
#endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+ [SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4448,6 +4450,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#if IS_ENABLED(CONFIG_MPTCP)
skb_ext_type_len[SKB_EXT_MPTCP] +
#endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+ skb_ext_type_len[SKB_EXT_MCTP] +
+#endif
0;
}
@@ -6520,6 +6525,14 @@ static void skb_ext_put_sp(struct sec_path *sp)
}
#endif
+#ifdef CONFIG_MCTP_FLOWS
+static void skb_ext_put_mctp(struct mctp_flow *flow)
+{
+ if (flow->key)
+ mctp_key_unref(flow->key);
+}
+#endif
+
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
{
struct skb_ext *ext = skb->extensions;
@@ -6555,6 +6568,10 @@ free_now:
if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
#endif
+#ifdef CONFIG_MCTP_FLOWS
+ if (__skb_ext_exist(ext, SKB_EXT_MCTP))
+ skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP));
+#endif
kmem_cache_free(skbuff_ext_cache, ext);
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..1ae52ac943f6 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+bool sk_msg_is_readable(struct sock *sk)
+{
+ struct sk_psock *psock;
+ bool empty = true;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (likely(psock))
+ empty = list_empty(&psock->ingress_msg);
+ rcu_read_unlock();
+ return !empty;
+}
+EXPORT_SYMBOL_GPL(sk_msg_is_readable);
+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
@@ -494,6 +508,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
}
static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+ u32 off, u32 len,
struct sk_psock *psock,
struct sock *sk,
struct sk_msg *msg)
@@ -507,11 +522,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
*/
if (skb_linearize(skb))
return -EAGAIN;
- num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (unlikely(num_sge < 0))
return num_sge;
- copied = skb->len;
+ copied = len;
msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge;
@@ -522,9 +537,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
return copied;
}
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+ u32 off, u32 len);
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+ u32 off, u32 len)
{
struct sock *sk = psock->sk;
struct sk_msg *msg;
@@ -535,7 +552,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* correctly.
*/
if (unlikely(skb->sk == sk))
- return sk_psock_skb_ingress_self(psock, skb);
+ return sk_psock_skb_ingress_self(psock, skb, off, len);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
@@ -547,7 +564,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
kfree(msg);
return err;
@@ -557,7 +574,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* skb. In this case we do not need to check memory limits or skb_set_owner_r
* because the skb is already accounted for here.
*/
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+ u32 off, u32 len)
{
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk;
@@ -567,7 +585,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
return -EAGAIN;
sk_msg_init(msg);
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
if (err < 0)
kfree(msg);
return err;
@@ -581,7 +599,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- return sk_psock_skb_ingress(psock, skb);
+ return sk_psock_skb_ingress(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -624,6 +642,12 @@ static void sk_psock_backlog(struct work_struct *work)
while ((skb = skb_dequeue(&psock->ingress_skb))) {
len = skb->len;
off = 0;
+ if (skb_bpf_strparser(skb)) {
+ struct strp_msg *stm = strp_msg(skb);
+
+ off = stm->offset;
+ len = stm->full_len;
+ }
start:
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
@@ -863,6 +887,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
* return code, but then didn't set a redirect interface.
*/
if (unlikely(!sk_other)) {
+ skb_bpf_redirect_clear(skb);
sock_drop(from->sk, skb);
return -EIO;
}
@@ -930,6 +955,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
{
struct sock *sk_other;
int err = 0;
+ u32 len, off;
switch (verdict) {
case __SK_PASS:
@@ -937,6 +963,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ skb_bpf_redirect_clear(skb);
goto out_free;
}
@@ -949,7 +976,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
* retrying later from workqueue.
*/
if (skb_queue_empty(&psock->ingress_skb)) {
- err = sk_psock_skb_ingress_self(psock, skb);
+ len = skb->len;
+ off = 0;
+ if (skb_bpf_strparser(skb)) {
+ struct strp_msg *stm = strp_msg(skb);
+
+ off = stm->offset;
+ len = stm->full_len;
+ }
+ err = sk_psock_skb_ingress_self(psock, skb, off, len);
}
if (err < 0) {
spin_lock_bh(&psock->ingress_lock);
@@ -1015,6 +1050,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb);
skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb);
+ if (ret == SK_PASS)
+ skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index c1601f75ec4b..8f2b2f2c0e7b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -350,7 +350,7 @@ void sk_error_report(struct sock *sk)
}
EXPORT_SYMBOL(sk_error_report);
-static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
+int sock_get_timeout(long timeo, void *optval, bool old_timeval)
{
struct __kernel_sock_timeval tv;
@@ -379,12 +379,11 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
*(struct __kernel_sock_timeval *)optval = tv;
return sizeof(tv);
}
+EXPORT_SYMBOL(sock_get_timeout);
-static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
- bool old_timeval)
+int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
+ sockptr_t optval, int optlen, bool old_timeval)
{
- struct __kernel_sock_timeval tv;
-
if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
struct old_timeval32 tv32;
@@ -393,8 +392,8 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
return -EFAULT;
- tv.tv_sec = tv32.tv_sec;
- tv.tv_usec = tv32.tv_usec;
+ tv->tv_sec = tv32.tv_sec;
+ tv->tv_usec = tv32.tv_usec;
} else if (old_timeval) {
struct __kernel_old_timeval old_tv;
@@ -402,14 +401,28 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
return -EINVAL;
if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
return -EFAULT;
- tv.tv_sec = old_tv.tv_sec;
- tv.tv_usec = old_tv.tv_usec;
+ tv->tv_sec = old_tv.tv_sec;
+ tv->tv_usec = old_tv.tv_usec;
} else {
- if (optlen < sizeof(tv))
+ if (optlen < sizeof(*tv))
return -EINVAL;
- if (copy_from_sockptr(&tv, optval, sizeof(tv)))
+ if (copy_from_sockptr(tv, optval, sizeof(*tv)))
return -EFAULT;
}
+
+ return 0;
+}
+EXPORT_SYMBOL(sock_copy_user_timeval);
+
+static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
+ bool old_timeval)
+{
+ struct __kernel_sock_timeval tv;
+ int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
+
+ if (err)
+ return err;
+
if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
return -EDOM;
@@ -947,6 +960,53 @@ void sock_set_mark(struct sock *sk, u32 val)
}
EXPORT_SYMBOL(sock_set_mark);
+static void sock_release_reserved_memory(struct sock *sk, int bytes)
+{
+ /* Round down bytes to multiple of pages */
+ bytes &= ~(SK_MEM_QUANTUM - 1);
+
+ WARN_ON(bytes > sk->sk_reserved_mem);
+ sk->sk_reserved_mem -= bytes;
+ sk_mem_reclaim(sk);
+}
+
+static int sock_reserve_memory(struct sock *sk, int bytes)
+{
+ long allocated;
+ bool charged;
+ int pages;
+
+ if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
+ return -EOPNOTSUPP;
+
+ if (!bytes)
+ return 0;
+
+ pages = sk_mem_pages(bytes);
+
+ /* pre-charge to memcg */
+ charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ if (!charged)
+ return -ENOMEM;
+
+ /* pre-charge to forward_alloc */
+ allocated = sk_memory_allocated_add(sk, pages);
+ /* If the system goes into memory pressure with this
+ * precharge, give up and return error.
+ */
+ if (allocated > sk_prot_mem_limits(sk, 1)) {
+ sk_memory_allocated_sub(sk, pages);
+ mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
+ return -ENOMEM;
+ }
+ sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT;
+
+ sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT;
+
+ return 0;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -1367,6 +1427,23 @@ set_sndbuf:
~SOCK_BUF_LOCK_MASK);
break;
+ case SO_RESERVE_MEM:
+ {
+ int delta;
+
+ if (val < 0) {
+ ret = -EINVAL;
+ break;
+ }
+
+ delta = val - sk->sk_reserved_mem;
+ if (delta < 0)
+ sock_release_reserved_memory(sk, -delta);
+ else
+ ret = sock_reserve_memory(sk, delta);
+ break;
+ }
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1750,6 +1827,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
break;
+ case SO_RESERVE_MEM:
+ v.val = sk->sk_reserved_mem;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -2063,6 +2144,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+ newsk->sk_reserved_mem = 0;
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h
new file mode 100644
index 000000000000..2f396e6bfba5
--- /dev/null
+++ b/net/core/sock_destructor.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_SOCK_DESTRUCTOR_H
+#define _NET_CORE_SOCK_DESTRUCTOR_H
+#include <net/tcp.h>
+
+static inline bool is_skb_wmem(const struct sk_buff *skb)
+{
+ return skb->destructor == sock_wfree ||
+ skb->destructor == __sock_wfree ||
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
+}
+#endif
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index e252b8ec2b85..f39ef79ced67 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -511,12 +511,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
}
-static bool sk_is_tcp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_STREAM &&
- sk->sk_protocol == IPPROTO_TCP;
-}
-
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
diff --git a/net/core/stream.c b/net/core/stream.c
index 4f1d4aa5fb38..06b36c730ce8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -195,14 +195,11 @@ void sk_stream_kill_queues(struct sock *sk)
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
- /* Next, the error queue. */
- __skb_queue_purge(&sk->sk_error_queue);
-
/* Next, the write queue. */
WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
/* Account for returned memory. */
- sk_mem_reclaim(sk);
+ sk_mem_reclaim_final(sk);
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..5f88526ad61c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
.extra1 = &long_one,
- .extra2 = &long_max,
+ .extra2 = &bpf_jit_limit_max,
},
#endif
{
diff --git a/net/core/xdp.c b/net/core/xdp.c
index cc92ccb38432..5ddc29f29bad 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -143,8 +143,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
if (xdp_rxq->reg_state == REG_STATE_UNUSED)
return;
- WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
-
xdp_rxq_info_unreg_mem_model(xdp_rxq);
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;