From 5b22d3669f2fa6e762c5302fc4b6051a92b81617 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 18:55:39 +0300 Subject: net: dsa: track the number of switches in a tree In preparation of supporting data plane forwarding on behalf of a software bridge, some drivers might need to view bridges as virtual switches behind the CPU port in a cross-chip topology. Give them some help and let them know how many physical switches there are in the tree, so that they can count the virtual switches starting from that number on. Note that the first dsa_switch_ops method where this information is reliably available is .setup(). This is because of how DSA works: in a tree with 3 switches, each calling dsa_register_switch(), the first 2 will advance until dsa_tree_setup() -> dsa_tree_setup_routing_table() and exit with error code 0 because the topology is not complete. Since probing is parallel at this point, one switch does not know about the existence of the other. Then the third switch comes, and for it, dsa_tree_setup_routing_table() returns complete = true. This switch goes ahead and calls dsa_tree_setup_switches() for everybody else, calling their .setup() methods too. This acts as the synchronization point. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/dsa/dsa2.c') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 185629f27f80..de5e93ba2a9d 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1265,6 +1265,9 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds, return -EEXIST; } + if (ds->dst->last_switch < ds->index) + ds->dst->last_switch = ds->index; + return 0; } -- cgit v1.2.3 From 123abc06e74f49d9b173a93cb2b797fb85f50ba3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 22 Jul 2021 18:55:40 +0300 Subject: net: dsa: add support for bridge TX forwarding offload For a DSA switch, to offload the forwarding process of a bridge device means to send the packets coming from the software bridge as data plane packets. This is contrary to everything that DSA has done so far, because the current taggers only know to send control packets (ones that target a specific destination port), whereas data plane packets are supposed to be forwarded according to the FDB lookup, much like packets ingressing on any regular ingress port. If the FDB lookup process returns multiple destination ports (flooding, multicast), then replication is also handled by the switch hardware - the bridge only sends a single packet and avoids the skb_clone(). DSA keeps for each bridge port a zero-based index (the number of the bridge). Multiple ports performing TX forwarding offload to the same bridge have the same dp->bridge_num value, and ports not offloading the TX data plane of a bridge have dp->bridge_num = -1. The tagger can check if the packet that is being transmitted on has skb->offload_fwd_mark = true or not. If it does, it can be sure that the packet belongs to the data plane of a bridge, further information about which can be obtained based on dp->bridge_dev and dp->bridge_num. It can then compose a DSA tag for injecting a data plane packet into that bridge number. For the switch driver side, we offer two new dsa_switch_ops methods, called .port_bridge_fwd_offload_{add,del}, which are modeled after .port_bridge_{join,leave}. These methods are provided in case the driver needs to configure the hardware to treat packets coming from that bridge software interface as data plane packets. The switchdev <-> bridge interaction happens during the netdev_master_upper_dev_link() call, so to switch drivers, the effect is that the .port_bridge_fwd_offload_add() method is called immediately after .port_bridge_join(). If the bridge number exceeds the number of bridges for which the switch driver can offload the TX data plane (and this includes the case where the driver can offload none), DSA falls back to simply returning tx_fwd_offload = false in the switchdev_bridge_port_offload() call. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/dsa/dsa2.c') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index de5e93ba2a9d..c7fa85fb3086 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1044,6 +1044,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) dp->ds = ds; dp->index = index; + dp->bridge_num = -1; INIT_LIST_HEAD(&dp->list); list_add_tail(&dp->list, &dst->ports); -- cgit v1.2.3 From 0e8eb9a16e2569ff72f9d2f43d665e15d52bfa2e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 4 Aug 2021 16:54:29 +0300 Subject: net: dsa: rename teardown_default_cpu to teardown_cpu_ports There is nothing specific to having a default CPU port to what dsa_tree_teardown_default_cpu() does. Even with multiple CPU ports, it would do the same thing: iterate through the ports of this switch tree and reset the ->cpu_dp pointer to NULL. So rename it accordingly. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/dsa/dsa2.c') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c7fa85fb3086..4f1aab6cf964 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -329,7 +329,7 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } -static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) +static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -927,7 +927,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) err = dsa_tree_setup_switches(dst); if (err) - goto teardown_default_cpu; + goto teardown_cpu_ports; err = dsa_tree_setup_master(dst); if (err) @@ -947,8 +947,8 @@ teardown_master: dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); -teardown_default_cpu: - dsa_tree_teardown_default_cpu(dst); +teardown_cpu_ports: + dsa_tree_teardown_cpu_ports(dst); return err; } @@ -966,7 +966,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_switches(dst); - dsa_tree_teardown_default_cpu(dst); + dsa_tree_teardown_cpu_ports(dst); list_for_each_entry_safe(dl, next, &dst->rtable, list) { list_del(&dl->list); -- cgit v1.2.3 From 2c0b03258b8bda0ef6339229d18fbb2594317dbe Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 4 Aug 2021 16:54:30 +0300 Subject: net: dsa: give preference to local CPU ports Be there an "H" switch topology, where there are 2 switches connected as follows: eth0 eth1 | | CPU port CPU port | DSA link | sw0p0 sw0p1 sw0p2 sw0p3 sw0p4 -------- sw1p4 sw1p3 sw1p2 sw1p1 sw1p0 | | | | | | user user user user user user port port port port port port basically one where each switch has its own CPU port for termination, but there is also a DSA link in case packets need to be forwarded in hardware between one switch and another. DSA insists to see this as a daisy chain topology, basically registering all network interfaces as sw0p0@eth0, ... sw1p0@eth0 and disregarding eth1 as a valid DSA master. This is only half the story, since when asked using dsa_port_is_cpu(), DSA will respond that sw1p1 is a CPU port, however one which has no dp->cpu_dp pointing to it. So sw1p1 is enabled, but not used. Furthermore, be there a driver for switches which support only one upstream port. This driver iterates through its ports and checks using dsa_is_upstream_port() whether the current port is an upstream one. For switch 1, two ports pass the "is upstream port" checks: - sw1p4 is an upstream port because it is a routing port towards the dedicated CPU port assigned using dsa_tree_setup_default_cpu() - sw1p1 is also an upstream port because it is a CPU port, albeit one that is disabled. This is because dsa_upstream_port() returns: if (!cpu_dp) return port; which means that if @dp does not have a ->cpu_dp pointer (which is a characteristic of CPU ports themselves as well as unused ports), then @dp is its own upstream port. So the driver for switch 1 rightfully says: I have two upstream ports, but I don't support multiple upstream ports! So let me error out, I don't know which one to choose and what to do with the other one. Generally I am against enforcing any default policy in the kernel in terms of user to CPU port assignment (like round robin or such) but this case is different. To solve the conundrum, one would have to: - Disable sw1p1 in the device tree or mark it as "not a CPU port" in order to comply with DSA's view of this topology as a daisy chain, where the termination traffic from switch 1 must pass through switch 0. This is counter-productive because it wastes 1Gbps of termination throughput in switch 1. - Disable the DSA link between sw0p4 and sw1p4 and do software forwarding between switch 0 and 1, and basically treat the switches as part of disjoint switch trees. This is counter-productive because it wastes 1Gbps of autonomous forwarding throughput between switch 0 and 1. - Treat sw0p4 and sw1p4 as user ports instead of DSA links. This could work, but it makes cross-chip bridging impossible. In this setup we would need to have 2 separate bridges, br0 spanning the ports of switch 0, and br1 spanning the ports of switch 1, and the "DSA links treated as user ports" sw0p4 (part of br0) and sw1p4 (part of br1) are the gateway ports between one bridge and another. This is hard to manage from a user's perspective, who wants to have a unified view of the switching fabric and the ability to transparently add ports to the same bridge. VLANs would also need to be explicitly managed by the user on these gateway ports. So it seems that the only reasonable thing to do is to make DSA prefer CPU ports that are local to the switch. Meaning that by default, the user and DSA ports of switch 0 will get assigned to the CPU port from switch 0 (sw0p1) and the user and DSA ports of switch 1 will get assigned to the CPU port from switch 1. The way this solves the problem is that sw1p4 is no longer an upstream port as far as switch 1 is concerned (it no longer views sw0p1 as its dedicated CPU port). So here we are, the first multi-CPU port that DSA supports is also perhaps the most uneventful one: the individual switches don't support multiple CPUs, however the DSA switch tree as a whole does have multiple CPU ports. No user space assignment of user ports to CPU ports is desirable, necessary, or possible. Ports that do not have a local CPU port (say there was an extra switch hanging off of sw0p0) default to the standard implementation of getting assigned to the first CPU port of the DSA switch tree. Is that good enough? Probably not (if the downstream switch was hanging off of switch 1, we would most certainly prefer its CPU port to be sw1p1), but in order to support that use case too, we would need to traverse the dst->rtable in search of an optimum dedicated CPU port, one that has the smallest number of hops between dp->ds and dp->cpu_dp->ds. At the moment, the DSA routing table structure does not keep the number of hops between dl->dp and dl->link_dp, and while it is probably deducible, there is zero justification to write that code now. Let's hope DSA will never have to support that use case. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'net/dsa/dsa2.c') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 4f1aab6cf964..a4c525f1cb17 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -311,6 +311,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } +/* Assign the default CPU port (the first one in the tree) to all ports of the + * fabric which don't already have one as part of their own switch. + */ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp, *dp; @@ -321,14 +324,47 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return -EINVAL; } - /* Assign the default CPU port to all ports of the fabric */ - list_for_each_entry(dp, &dst->ports, list) + list_for_each_entry(dp, &dst->ports, list) { + if (dp->cpu_dp) + continue; + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) dp->cpu_dp = cpu_dp; + } return 0; } +/* Perform initial assignment of CPU ports to user ports and DSA links in the + * fabric, giving preference to CPU ports local to each switch. Default to + * using the first CPU port in the switch tree if the port does not have a CPU + * port local to this switch. + */ +static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp, *dp; + + list_for_each_entry(cpu_dp, &dst->ports, list) { + if (!dsa_port_is_cpu(cpu_dp)) + continue; + + list_for_each_entry(dp, &dst->ports, list) { + /* Prefer a local CPU port */ + if (dp->ds != cpu_dp->ds) + continue; + + /* Prefer the first local CPU port found */ + if (dp->cpu_dp) + continue; + + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = cpu_dp; + } + } + + return dsa_tree_setup_default_cpu(dst); +} + static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -921,7 +957,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (!complete) return 0; - err = dsa_tree_setup_default_cpu(dst); + err = dsa_tree_setup_cpu_ports(dst); if (err) return err; -- cgit v1.2.3 From 919d13a7e455c2e7676042d7a5f94c164e859d8a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 8 Aug 2021 21:57:43 +0300 Subject: devlink: Set device as early as possible All kernel devlink implementations call to devlink_alloc() during initialization routine for specific device which is used later as a parent device for devlink_register(). Such late device assignment causes to the situation which requires us to call to device_register() before setting other parameters, but that call opens devlink to the world and makes accessible for the netlink users. Any attempt to move devlink_register() to be the last call generates the following error due to access to the devlink->dev pointer. [ 8.758862] devlink_nl_param_fill+0x2e8/0xe50 [ 8.760305] devlink_param_notify+0x6d/0x180 [ 8.760435] __devlink_params_register+0x2f1/0x670 [ 8.760558] devlink_params_register+0x1e/0x20 The simple change of API to set devlink device in the devlink_alloc() instead of devlink_register() fixes all this above and ensures that prior to call to devlink_register() everything already set. Signed-off-by: Leon Romanovsky Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/dsa/dsa2.c') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a4c525f1cb17..8150e16aaa55 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -746,13 +746,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables */ - ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv)); + ds->devlink = + devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); if (!ds->devlink) return -ENOMEM; dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - err = devlink_register(ds->devlink, ds->dev); + err = devlink_register(ds->devlink); if (err) goto free_devlink; -- cgit v1.2.3 From 724395f4dc9583bfb379ce41575eaaab299810b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 11 Aug 2021 16:46:06 +0300 Subject: net: dsa: tag_8021q: don't broadcast during setup/teardown Currently, on my board with multiple sja1105 switches in disjoint trees described in commit f66a6a69f97a ("net: dsa: permit cross-chip bridging between all trees in the system"), rebooting the board triggers the following benign warnings: [ 12.345566] sja1105 spi2.0: port 0 failed to notify tag_8021q VLAN 1088 deletion: -ENOENT [ 12.353804] sja1105 spi2.0: port 0 failed to notify tag_8021q VLAN 2112 deletion: -ENOENT [ 12.362019] sja1105 spi2.0: port 1 failed to notify tag_8021q VLAN 1089 deletion: -ENOENT [ 12.370246] sja1105 spi2.0: port 1 failed to notify tag_8021q VLAN 2113 deletion: -ENOENT [ 12.378466] sja1105 spi2.0: port 2 failed to notify tag_8021q VLAN 1090 deletion: -ENOENT [ 12.386683] sja1105 spi2.0: port 2 failed to notify tag_8021q VLAN 2114 deletion: -ENOENT Basically switch 1 calls dsa_tag_8021q_unregister, and switch 1's TX and RX VLANs cannot be found on switch 2's CPU port. But why would switch 2 even attempt to delete switch 1's TX and RX tag_8021q VLANs from its CPU port? Well, because we use dsa_broadcast, and it is supposed that it had added those VLANs in the first place (because in dsa_port_tag_8021q_vlan_match, all CPU ports match regardless of their tree index or switch index). The two trees probe asynchronously, and when switch 1 probed, it called dsa_broadcast which did not notify the tree of switch 2, because that didn't probe yet. But during unbind, switch 2's tree _is_ probed, so it _is_ notified of the deletion. Before jumping to introduce a synchronization mechanism between the probing across disjoint switch trees, let's take a step back and see whether we _need_ to do that in the first place. The RX and TX VLANs of switch 1 would be needed on switch 2's CPU port only if switch 1 and 2 were part of a cross-chip bridge. And dsa_tag_8021q_bridge_join takes care precisely of that (but if probing was synchronous, the bridge_join would just end up bumping the VLANs' refcount, because they are already installed by the setup path). Since by the time the ports are bridged, all DSA trees are already set up, and we don't need the tag_8021q VLANs of one switch installed on the other switches during probe time, the answer is that we don't need to fix the synchronization issue. So make the setup and teardown code paths call dsa_port_notify, which notifies only the local tree, and the bridge code paths call dsa_broadcast, which let the other trees know as well. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/dsa/dsa2.c') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 8150e16aaa55..dcd67801eca4 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -49,6 +49,9 @@ int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v) * Can be used to notify the switching fabric of events such as cross-chip * bridging between disjoint trees (such as islands of tagger-compatible * switches bridged by an incompatible middle switch). + * + * WARNING: this function is not reliable during probe time, because probing + * between trees is asynchronous and not all DSA trees might have probed. */ int dsa_broadcast(unsigned long e, void *v) { -- cgit v1.2.3 From f5e165e72b29d908214e554ef57f67790ba95934 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Aug 2021 20:55:00 +0300 Subject: net: dsa: track unique bridge numbers across all DSA switch trees Right now, cross-tree bridging setups work somewhat by mistake. In the case of cross-tree bridging with sja1105, all switch instances need to agree upon a common VLAN ID for forwarding a packet that belongs to a certain bridging domain. With TX forwarding offload, the VLAN ID is the bridge VLAN for VLAN-aware bridging, and the tag_8021q TX forwarding offload VID (a VLAN which has non-zero VBID bits) for VLAN-unaware bridging. The VBID for VLAN-unaware bridging is derived from the dp->bridge_num value calculated by DSA independently for each switch tree. If ports from one tree join one bridge, and ports from another tree join another bridge, DSA will assign them the same bridge_num, even though the bridges are different. If cross-tree bridging is supported, this is an issue. Modify DSA to calculate the bridge_num globally across all switch trees. This has the implication for a driver that the dp->bridge_num value that DSA will assign to its ports might not be contiguous, if there are boards with multiple DSA drivers instantiated. Additionally, all bridge_num values eat up towards each switch's ds->num_fwd_offloading_bridges maximum, which is potentially unfortunate, and can be seen as a limitation introduced by this patch. However, that is the lesser evil for now. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'net/dsa/dsa2.c') diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dcd67801eca4..1b2b25d7bd02 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,6 +21,9 @@ static DEFINE_MUTEX(dsa2_mutex); LIST_HEAD(dsa_tree_list); +/* Track the bridges with forwarding offload enabled */ +static unsigned long dsa_fwd_offloading_bridges; + /** * dsa_tree_notify - Execute code for all switches in a DSA switch tree. * @dst: collection of struct dsa_switch devices to notify. @@ -126,6 +129,51 @@ void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) } } +static int dsa_bridge_num_find(const struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst; + struct dsa_port *dp; + + /* When preparing the offload for a port, it will have a valid + * dp->bridge_dev pointer but a not yet valid dp->bridge_num. + * However there might be other ports having the same dp->bridge_dev + * and a valid dp->bridge_num, so just ignore this port. + */ + list_for_each_entry(dst, &dsa_tree_list, list) + list_for_each_entry(dp, &dst->ports, list) + if (dp->bridge_dev == bridge_dev && + dp->bridge_num != -1) + return dp->bridge_num; + + return -1; +} + +int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) +{ + int bridge_num = dsa_bridge_num_find(bridge_dev); + + if (bridge_num < 0) { + /* First port that offloads TX forwarding for this bridge */ + bridge_num = find_first_zero_bit(&dsa_fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES); + if (bridge_num >= max) + return -1; + + set_bit(bridge_num, &dsa_fwd_offloading_bridges); + } + + return bridge_num; +} + +void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num) +{ + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!dsa_bridge_num_find(bridge_dev)) + clear_bit(bridge_num, &dsa_fwd_offloading_bridges); +} + struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) { struct dsa_switch_tree *dst; -- cgit v1.2.3