From b5947239bfa666afd05ce0fc02b9c41ec8209e88 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 29 Aug 2023 14:29:50 +0100 Subject: net: stmmac: failure to probe without MAC interface specified Alexander Stein reports that commit a014c35556b9 ("net: stmmac: clarify difference between "interface" and "phy_interface"") caused breakage, because plat->mac_interface will never be negative. Fix this by using the "rc" temporary variable in stmmac_probe_config_dt(). Reported-by: Alexander Stein Signed-off-by: Russell King (Oracle) Tested-by: Alexander Stein Link: https://lore.kernel.org/r/E1qayn0-006Q8J-GE@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 35f4b1484029..0f28795e581c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -419,9 +419,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) return ERR_PTR(phy_mode); plat->phy_interface = phy_mode; - plat->mac_interface = stmmac_of_get_mac_mode(np); - if (plat->mac_interface < 0) - plat->mac_interface = plat->phy_interface; + rc = stmmac_of_get_mac_mode(np); + plat->mac_interface = rc < 0 ? plat->phy_interface : rc; /* Some wrapper drivers still rely on phy_node. Let's save it while * they are not converted to phylink. */ -- cgit v1.2.3 From 8b72d2a1c6cc148320a93d029eb3a7e721f951f6 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 29 Aug 2023 10:47:17 +0200 Subject: NFC: nxp: add NXP1002 It is backwards compatible Signed-off-by: Oliver Neukum Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230829084717.961-1-oneukum@suse.com Signed-off-by: Jakub Kicinski --- drivers/nfc/nxp-nci/i2c.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index dca25a0c2f33..3ae4b41c59ac 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -336,6 +336,7 @@ MODULE_DEVICE_TABLE(of, of_nxp_nci_i2c_match); #ifdef CONFIG_ACPI static const struct acpi_device_id acpi_id[] = { { "NXP1001" }, + { "NXP1002" }, { "NXP7471" }, { } }; -- cgit v1.2.3 From ae074e2b2fd410bf54d56509a7e48fb83873af3b Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 31 Aug 2023 17:58:11 +0100 Subject: sfc: check for zero length in EF10 RX prefix When EF10 RXDP firmware is operating in cut-through mode, packet length is not known at the time the RX prefix is generated, so it is left as zero and RX event merging is inhibited to ensure that the length is available in the RX event. However, it has been found that in certain circumstances the RX events for these packets still get merged, meaning the driver cannot read the length from the RX event, and tries to use the length from the prefix. The resulting zero-length SKBs cause crashes in GRO since commit 1d11fa696733 ("net-gro: remove GRO_DROP"), so add a check to the driver to detect these zero-length RX events and discard the packet. Signed-off-by: Edward Cree Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/rx.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 2375cef577e4..f77a2d3ef37e 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -359,26 +359,36 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, /* Handle a received packet. Second half: Touches packet payload. */ void __efx_rx_packet(struct efx_channel *channel) { + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); struct efx_nic *efx = channel->efx; struct efx_rx_buffer *rx_buf = - efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); + efx_rx_buffer(rx_queue, channel->rx_pkt_index); u8 *eh = efx_rx_buf_va(rx_buf); /* Read length from the prefix if necessary. This already * excludes the length of the prefix itself. */ - if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) + if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) { rx_buf->len = le16_to_cpup((__le16 *) (eh + efx->rx_packet_len_offset)); + /* A known issue may prevent this being filled in; + * if that happens, just drop the packet. + * Must do that in the driver since passing a zero-length + * packet up to the stack may cause a crash. + */ + if (unlikely(!rx_buf->len)) { + efx_free_rx_buffers(rx_queue, rx_buf, + channel->rx_pkt_n_frags); + channel->n_rx_frm_trunc++; + goto out; + } + } /* If we're in loopback test, then pass the packet directly to the * loopback layer, and free the rx_buf here */ if (unlikely(efx->loopback_selftest)) { - struct efx_rx_queue *rx_queue; - efx_loopback_rx_packet(efx, eh, rx_buf->len); - rx_queue = efx_channel_get_rx_queue(channel); efx_free_rx_buffers(rx_queue, rx_buf, channel->rx_pkt_n_frags); goto out; -- cgit v1.2.3 From fa09bc40b21a33937872c4c4cf0f266ec9fa4869 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Thu, 31 Aug 2023 14:19:13 +0200 Subject: igb: disable virtualization features on 82580 Disable virtualization features on 82580 just as on i210/i211. This avoids that virt functions are acidentally called on 82850. Fixes: 55cac248caa4 ("igb: Add full support for 82580 devices") Signed-off-by: Corinna Vinschen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 1ab787ed254d..13ba9c74bd84 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3933,8 +3933,9 @@ static void igb_probe_vfs(struct igb_adapter *adapter) struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; - /* Virtualization features not supported on i210 family. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) + /* Virtualization features not supported on i210 and 82580 family. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) || + (hw->mac.type == e1000_82580)) return; /* Of the below we really only want the effect of getting -- cgit v1.2.3 From 817c7cd2043a83a3d8147f40eea1505ac7300b62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 Aug 2023 21:38:12 +0000 Subject: gve: fix frag_list chaining gve_rx_append_frags() is able to build skbs chained with frag_list, like GRO engine. Problem is that shinfo->frag_list should only be used for the head of the chain. All other links should use skb->next pointer. Otherwise, built skbs are not valid and can cause crashes. Equivalent code in GRO (skb_gro_receive()) is: if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; NAPI_GRO_CB(p)->last = skb; Fixes: 9b8dd5e5ea48 ("gve: DQO: Add RX path") Signed-off-by: Eric Dumazet Cc: Bailey Forrest Cc: Willem de Bruijn Cc: Catherine Sullivan Reviewed-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index ea0e38b4d9e9..f281e42a7ef9 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -570,7 +570,10 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (!skb) return -1; - skb_shinfo(rx->ctx.skb_tail)->frag_list = skb; + if (rx->ctx.skb_tail == rx->ctx.skb_head) + skb_shinfo(rx->ctx.skb_head)->frag_list = skb; + else + rx->ctx.skb_tail->next = skb; rx->ctx.skb_tail = skb; num_frags = 0; } -- cgit v1.2.3 From 151e887d8ff97e2e42110ffa1fb1e6a2128fb364 Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Fri, 1 Sep 2023 12:09:21 +0800 Subject: veth: Fixing transmit return status for dropped packets The veth_xmit function returns NETDEV_TX_OK even when packets are dropped. This behavior leads to incorrect calculations of statistics counts, as well as things like txq->trans_start updates. Fixes: e314dbdc1c0d ("[NET]: Virtual ethernet device driver.") Signed-off-by: Liang Chen Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/veth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index d43e62ebc2fc..9c6f4f83f22b 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -344,6 +344,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct veth_rq *rq = NULL; + int ret = NETDEV_TX_OK; struct net_device *rcv; int length = skb->len; bool use_napi = false; @@ -378,11 +379,12 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) } else { drop: atomic64_inc(&priv->dropped); + ret = NET_XMIT_DROP; } rcu_read_unlock(); - return NETDEV_TX_OK; + return ret; } static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) -- cgit v1.2.3 From d3287e4038ca4f81e02067ab72d087af7224c68b Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 4 Sep 2023 10:56:04 +0200 Subject: Revert "net: macsec: preserve ingress frame ordering" This reverts commit ab046a5d4be4c90a3952a0eae75617b49c0cb01b. It was trying to work around an issue at the crypto layer by excluding ASYNC implementations of gcm(aes), because a bug in the AESNI version caused reordering when some requests bypassed the cryptd queue while older requests were still pending on the queue. This was fixed by commit 38b2f68b4264 ("crypto: aesni - Fix cryptd reordering problem on gcm"), which pre-dates ab046a5d4be4. Herbert Xu confirmed that all ASYNC implementations are expected to maintain the ordering of completions wrt requests, so we can use them in MACsec. On my test machine, this restores the performance of a single netperf instance, from 1.4Gbps to 4.4Gbps. Link: https://lore.kernel.org/netdev/9328d206c5d9f9239cae27e62e74de40b258471d.1692279161.git.sd@queasysnail.net/T/ Link: https://lore.kernel.org/netdev/1b0cec71-d084-8153-2ba4-72ce71abeb65@byu.edu/ Link: https://lore.kernel.org/netdev/d335ddaa-18dc-f9f0-17ee-9783d3b2ca29@mailbox.tu-dresden.de/ Fixes: ab046a5d4be4 ("net: macsec: preserve ingress frame ordering") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/11c952469d114db6fb29242e1d9545e61f52f512.1693757159.git.sd@queasysnail.net Signed-off-by: Paolo Abeni --- drivers/net/macsec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c3f30663070f..b7e151439c48 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1330,8 +1330,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len) struct crypto_aead *tfm; int ret; - /* Pick a sync gcm(aes) cipher to ensure order is preserved. */ - tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_aead("gcm(aes)", 0, 0); if (IS_ERR(tfm)) return tfm; -- cgit v1.2.3 From 29fe7a1b62717d58f033009874554d99d71f7d37 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 5 Sep 2023 12:18:16 +0530 Subject: octeontx2-af: Fix truncation of smq in CN10K NIX AQ enqueue mbox handler The smq value used in the CN10K NIX AQ instruction enqueue mailbox handler was truncated to 9-bit value from 10-bit value because of typecasting the CN10K mbox request structure to the CN9K structure. Though this hasn't caused any problems when programming the NIX SQ context to the HW because the context structure is the same size. However, this causes a problem when accessing the structure parameters. This patch reads the right smq value for each platform. Fixes: 30077d210c83 ("octeontx2-af: cn10k: Update NIX/NPA context structure") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index c2f68678e947..23c2f2ed2fb8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -846,6 +846,21 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, return 0; } +static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req, + u16 *smq, u16 *smq_mask) +{ + struct nix_cn10k_aq_enq_req *aq_req; + + if (!is_rvu_otx2(rvu)) { + aq_req = (struct nix_cn10k_aq_enq_req *)req; + *smq = aq_req->sq.smq; + *smq_mask = aq_req->sq_mask.smq; + } else { + *smq = req->sq.smq; + *smq_mask = req->sq_mask.smq; + } +} + static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_aq_enq_req *req, struct nix_aq_enq_rsp *rsp) @@ -857,6 +872,7 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, struct rvu_block *block; struct admin_queue *aq; struct rvu_pfvf *pfvf; + u16 smq, smq_mask; void *ctx, *mask; bool ena; u64 cfg; @@ -928,13 +944,14 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw, if (rc) return rc; + nix_get_aq_req_smq(rvu, req, &smq, &smq_mask); /* Check if SQ pointed SMQ belongs to this PF/VF or not */ if (req->ctype == NIX_AQ_CTYPE_SQ && ((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) || (req->op == NIX_AQ_INSTOP_WRITE && - req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) { + req->sq_mask.ena && req->sq.ena && smq_mask))) { if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ, - pcifunc, req->sq.smq)) + pcifunc, smq)) return NIX_AF_ERR_AQ_ENQUEUE; } -- cgit v1.2.3 From 5aa48279712e1f134aac908acde4df798955a955 Mon Sep 17 00:00:00 2001 From: Olga Zaborska Date: Tue, 25 Jul 2023 10:10:56 +0200 Subject: igc: Change IGC_MIN to allow set rx/tx value between 64 and 80 Change the minimum value of RX/TX descriptors to 64 to enable setting the rx/tx value between 64 and 80. All igc devices can use as low as 64 descriptors. This change will unify igc with other drivers. Based on commit 7b1be1987c1e ("e1000e: lower ring minimum size to 64") Fixes: 0507ef8a0372 ("igc: Add transmit and receive fastpath and interrupt handlers") Signed-off-by: Olga Zaborska Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 8ebe6999a528..f48f82d5e274 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -379,11 +379,11 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) /* TX/RX descriptor defines */ #define IGC_DEFAULT_TXD 256 #define IGC_DEFAULT_TX_WORK 128 -#define IGC_MIN_TXD 80 +#define IGC_MIN_TXD 64 #define IGC_MAX_TXD 4096 #define IGC_DEFAULT_RXD 256 -#define IGC_MIN_RXD 80 +#define IGC_MIN_RXD 64 #define IGC_MAX_RXD 4096 /* Supported Rx Buffer Sizes */ -- cgit v1.2.3 From 8360717524a24a421c36ef8eb512406dbd42160a Mon Sep 17 00:00:00 2001 From: Olga Zaborska Date: Tue, 25 Jul 2023 10:10:57 +0200 Subject: igbvf: Change IGBVF_MIN to allow set rx/tx value between 64 and 80 Change the minimum value of RX/TX descriptors to 64 to enable setting the rx/tx value between 64 and 80. All igbvf devices can use as low as 64 descriptors. This change will unify igbvf with other drivers. Based on commit 7b1be1987c1e ("e1000e: lower ring minimum size to 64") Fixes: d4e0fe01a38a ("igbvf: add new driver to support 82576 virtual functions") Signed-off-by: Olga Zaborska Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/igbvf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h index 57d39ee00b58..7b83678ba83a 100644 --- a/drivers/net/ethernet/intel/igbvf/igbvf.h +++ b/drivers/net/ethernet/intel/igbvf/igbvf.h @@ -39,11 +39,11 @@ enum latency_range { /* Tx/Rx descriptor defines */ #define IGBVF_DEFAULT_TXD 256 #define IGBVF_MAX_TXD 4096 -#define IGBVF_MIN_TXD 80 +#define IGBVF_MIN_TXD 64 #define IGBVF_DEFAULT_RXD 256 #define IGBVF_MAX_RXD 4096 -#define IGBVF_MIN_RXD 80 +#define IGBVF_MIN_RXD 64 #define IGBVF_MIN_ITR_USECS 10 /* 100000 irq/sec */ #define IGBVF_MAX_ITR_USECS 10000 /* 100 irq/sec */ -- cgit v1.2.3 From 6319685bdc8ad5310890add907b7c42f89302886 Mon Sep 17 00:00:00 2001 From: Olga Zaborska Date: Tue, 25 Jul 2023 10:10:58 +0200 Subject: igb: Change IGB_MIN to allow set rx/tx value between 64 and 80 Change the minimum value of RX/TX descriptors to 64 to enable setting the rx/tx value between 64 and 80. All igb devices can use as low as 64 descriptors. This change will unify igb with other drivers. Based on commit 7b1be1987c1e ("e1000e: lower ring minimum size to 64") Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver") Signed-off-by: Olga Zaborska Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 015b78144114..a2b759531cb7 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -34,11 +34,11 @@ struct igb_adapter; /* TX/RX descriptor defines */ #define IGB_DEFAULT_TXD 256 #define IGB_DEFAULT_TX_WORK 128 -#define IGB_MIN_TXD 80 +#define IGB_MIN_TXD 64 #define IGB_MAX_TXD 4096 #define IGB_DEFAULT_RXD 256 -#define IGB_MIN_RXD 80 +#define IGB_MIN_RXD 64 #define IGB_MAX_RXD 4096 #define IGB_DEFAULT_ITR 3 /* dynamic */ -- cgit v1.2.3 From 39285e124edbc752331e98ace37cc141a6a3747a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 5 Sep 2023 08:46:10 +0000 Subject: net: team: do not use dynamic lockdep key team interface has used a dynamic lockdep key to avoid false-positive lockdep deadlock detection. Virtual interfaces such as team usually have their own lock for protecting private data. These interfaces can be nested. team0 | team1 Each interface's lock is actually different(team0->lock and team1->lock). So, mutex_lock(&team0->lock); mutex_lock(&team1->lock); mutex_unlock(&team1->lock); mutex_unlock(&team0->lock); The above case is absolutely safe. But lockdep warns about deadlock. Because the lockdep understands these two locks are same. This is a false-positive lockdep warning. So, in order to avoid this problem, the team interfaces started to use dynamic lockdep key. The false-positive problem was fixed, but it introduced a new problem. When the new team virtual interface is created, it registers a dynamic lockdep key(creates dynamic lockdep key) and uses it. But there is the limitation of the number of lockdep keys. So, If so many team interfaces are created, it consumes all lockdep keys. Then, the lockdep stops to work and warns about it. In order to fix this problem, team interfaces use the subclass instead of the dynamic key. So, when a new team interface is created, it doesn't register(create) a new lockdep, but uses existed subclass key instead. It is already used by the bonding interface for a similar case. As the bonding interface does, the subclass variable is the same as the 'dev->nested_level'. This variable indicates the depth in the stacked interface graph. The 'dev->nested_level' is protected by RTNL and RCU. So, 'mutex_lock_nested()' for 'team->lock' requires RTNL or RCU. In the current code, 'team->lock' is usually acquired under RTNL, there is no problem with using 'dev->nested_level'. The 'team_nl_team_get()' and The 'lb_stats_refresh()' functions acquire 'team->lock' without RTNL. But these don't iterate their own ports nested so they don't need nested lock. Reproducer: for i in {0..1000} do ip link add team$i type team ip link add dummy$i master team$i type dummy ip link set dummy$i up ip link set team$i up done Splat looks like: BUG: MAX_LOCKDEP_ENTRIES too low! turning off the locking correctness validator. Please attach the output of /proc/lock_stat to the bug report CPU: 0 PID: 4104 Comm: ip Not tainted 6.5.0-rc7+ #45 Call Trace: dump_stack_lvl+0x64/0xb0 add_lock_to_list+0x30d/0x5e0 check_prev_add+0x73a/0x23a0 ... sock_def_readable+0xfe/0x4f0 netlink_broadcast+0x76b/0xac0 nlmsg_notify+0x69/0x1d0 dev_open+0xed/0x130 ... Reported-by: syzbot+9bbbacfbf1e04d5221f7@syzkaller.appspotmail.com Fixes: 369f61bee0f5 ("team: fix nested locking lockdep warning") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/team/team.c | 111 +++++++++++++++---------------- drivers/net/team/team_mode_loadbalance.c | 4 +- 2 files changed, 56 insertions(+), 59 deletions(-) (limited to 'drivers') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index e8b94580194e..ad29122a5468 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1135,8 +1135,8 @@ static int team_port_add(struct team *team, struct net_device *port_dev, struct netlink_ext_ack *extack) { struct net_device *dev = team->dev; - struct team_port *port; char *portname = port_dev->name; + struct team_port *port; int err; if (port_dev->flags & IFF_LOOPBACK) { @@ -1203,18 +1203,31 @@ static int team_port_add(struct team *team, struct net_device *port_dev, memcpy(port->orig.dev_addr, port_dev->dev_addr, port_dev->addr_len); - err = team_port_enter(team, port); + err = dev_open(port_dev, extack); if (err) { - netdev_err(dev, "Device %s failed to enter team mode\n", + netdev_dbg(dev, "Device %s opening failed\n", portname); - goto err_port_enter; + goto err_dev_open; } - err = dev_open(port_dev, extack); + err = team_upper_dev_link(team, port, extack); if (err) { - netdev_dbg(dev, "Device %s opening failed\n", + netdev_err(dev, "Device %s failed to set upper link\n", portname); - goto err_dev_open; + goto err_set_upper_link; + } + + /* lockdep subclass variable(dev->nested_level) was updated by + * team_upper_dev_link(). + */ + team_unlock(team); + team_lock(team); + + err = team_port_enter(team, port); + if (err) { + netdev_err(dev, "Device %s failed to enter team mode\n", + portname); + goto err_port_enter; } err = vlan_vids_add_by_dev(port_dev, dev); @@ -1242,13 +1255,6 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_handler_register; } - err = team_upper_dev_link(team, port, extack); - if (err) { - netdev_err(dev, "Device %s failed to set upper link\n", - portname); - goto err_set_upper_link; - } - err = __team_option_inst_add_port(team, port); if (err) { netdev_err(dev, "Device %s failed to add per-port options\n", @@ -1295,9 +1301,6 @@ err_set_slave_promisc: __team_option_inst_del_port(team, port); err_option_port_add: - team_upper_dev_unlink(team, port); - -err_set_upper_link: netdev_rx_handler_unregister(port_dev); err_handler_register: @@ -1307,13 +1310,16 @@ err_enable_netpoll: vlan_vids_del_by_dev(port_dev, dev); err_vids_add: + team_port_leave(team, port); + +err_port_enter: + team_upper_dev_unlink(team, port); + +err_set_upper_link: dev_close(port_dev); err_dev_open: - team_port_leave(team, port); team_port_set_orig_dev_addr(port); - -err_port_enter: dev_set_mtu(port_dev, port->orig.mtu); err_set_mtu: @@ -1616,6 +1622,7 @@ static int team_init(struct net_device *dev) int err; team->dev = dev; + mutex_init(&team->lock); team_set_no_mode(team); team->notifier_ctx = false; @@ -1643,8 +1650,6 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); - lockdep_register_key(&team->team_lock_key); - __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key); netdev_lockdep_set_classes(dev); return 0; @@ -1665,7 +1670,7 @@ static void team_uninit(struct net_device *dev) struct team_port *port; struct team_port *tmp; - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry_safe(port, tmp, &team->port_list, list) team_port_del(team, port->dev); @@ -1674,9 +1679,8 @@ static void team_uninit(struct net_device *dev) team_mcast_rejoin_fini(team); team_notify_peers_fini(team); team_queue_override_fini(team); - mutex_unlock(&team->lock); + team_unlock(team); netdev_change_features(dev); - lockdep_unregister_key(&team->team_lock_key); } static void team_destructor(struct net_device *dev) @@ -1790,18 +1794,18 @@ static void team_set_rx_mode(struct net_device *dev) static int team_set_mac_address(struct net_device *dev, void *p) { - struct sockaddr *addr = p; struct team *team = netdev_priv(dev); + struct sockaddr *addr = p; struct team_port *port; if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; dev_addr_set(dev, addr->sa_data); - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) if (team->ops.port_change_dev_addr) team->ops.port_change_dev_addr(team, port); - mutex_unlock(&team->lock); + team_unlock(team); return 0; } @@ -1815,7 +1819,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - mutex_lock(&team->lock); + team_lock(team); team->port_mtu_change_allowed = true; list_for_each_entry(port, &team->port_list, list) { err = dev_set_mtu(port->dev, new_mtu); @@ -1826,7 +1830,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) } } team->port_mtu_change_allowed = false; - mutex_unlock(&team->lock); + team_unlock(team); dev->mtu = new_mtu; @@ -1836,7 +1840,7 @@ unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) dev_set_mtu(port->dev, dev->mtu); team->port_mtu_change_allowed = false; - mutex_unlock(&team->lock); + team_unlock(team); return err; } @@ -1890,20 +1894,20 @@ static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) { err = vlan_vid_add(port->dev, proto, vid); if (err) goto unwind; } - mutex_unlock(&team->lock); + team_unlock(team); return 0; unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - mutex_unlock(&team->lock); + team_unlock(team); return err; } @@ -1913,10 +1917,10 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) struct team *team = netdev_priv(dev); struct team_port *port; - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - mutex_unlock(&team->lock); + team_unlock(team); return 0; } @@ -1938,9 +1942,9 @@ static void team_netpoll_cleanup(struct net_device *dev) { struct team *team = netdev_priv(dev); - mutex_lock(&team->lock); + team_lock(team); __team_netpoll_cleanup(team); - mutex_unlock(&team->lock); + team_unlock(team); } static int team_netpoll_setup(struct net_device *dev, @@ -1950,7 +1954,7 @@ static int team_netpoll_setup(struct net_device *dev, struct team_port *port; int err = 0; - mutex_lock(&team->lock); + team_lock(team); list_for_each_entry(port, &team->port_list, list) { err = __team_port_enable_netpoll(port); if (err) { @@ -1958,7 +1962,7 @@ static int team_netpoll_setup(struct net_device *dev, break; } } - mutex_unlock(&team->lock); + team_unlock(team); return err; } #endif @@ -1969,9 +1973,9 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev, struct team *team = netdev_priv(dev); int err; - mutex_lock(&team->lock); + team_lock(team); err = team_port_add(team, port_dev, extack); - mutex_unlock(&team->lock); + team_unlock(team); if (!err) netdev_change_features(dev); @@ -1984,19 +1988,12 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) struct team *team = netdev_priv(dev); int err; - mutex_lock(&team->lock); + team_lock(team); err = team_port_del(team, port_dev); - mutex_unlock(&team->lock); - - if (err) - return err; + team_unlock(team); - if (netif_is_team_master(port_dev)) { - lockdep_unregister_key(&team->team_lock_key); - lockdep_register_key(&team->team_lock_key); - lockdep_set_class(&team->lock, &team->team_lock_key); - } - netdev_change_features(dev); + if (!err) + netdev_change_features(dev); return err; } @@ -2316,13 +2313,13 @@ static struct team *team_nl_team_get(struct genl_info *info) } team = netdev_priv(dev); - mutex_lock(&team->lock); + __team_lock(team); return team; } static void team_nl_team_put(struct team *team) { - mutex_unlock(&team->lock); + team_unlock(team); dev_put(team->dev); } @@ -2984,9 +2981,9 @@ static void team_port_change_check(struct team_port *port, bool linkup) { struct team *team = port->team; - mutex_lock(&team->lock); + team_lock(team); __team_port_change_check(port, linkup); - mutex_unlock(&team->lock); + team_unlock(team); } diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 00f8989c29c0..7bcc9d37447a 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -478,7 +478,7 @@ static void lb_stats_refresh(struct work_struct *work) team = lb_priv_ex->team; lb_priv = get_lb_priv(team); - if (!mutex_trylock(&team->lock)) { + if (!team_trylock(team)) { schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); return; } @@ -515,7 +515,7 @@ static void lb_stats_refresh(struct work_struct *work) schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, (lb_priv_ex->stats.refresh_interval * HZ) / 10); - mutex_unlock(&team->lock); + team_unlock(team); } static void lb_stats_refresh_interval_get(struct team *team, -- cgit v1.2.3 From b7558a77529fef60e7992f40fb5353fed8be0cf8 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 5 Sep 2023 10:48:45 -0700 Subject: net/mlx5e: Clear mirred devices array if the rule is split In the cited commit, the mirred devices are recorded and checked while parsing the actions. In order to avoid system crash, the duplicate action in a single rule is not allowed. But the rule is actually break down into several FTEs in different tables, for either mirroring, or the specified types of actions which use post action infrastructure. It will reject certain action list by mistake, for example: actions:enp8s0f0_1,set(ipv4(ttl=63)),enp8s0f0_0,enp8s0f0_1. Here the rule is split to two FTEs because of pedit action. To fix this issue, when parsing the rule actions, reset if_count to clear the mirred devices array if the rule is split to multiple FTEs, and then the duplicate checking is restarted. Fixes: 554fe75c1b3f ("net/mlx5e: Avoid duplicating rule destinations") Signed-off-by: Jianbo Liu Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + 7 files changed, 13 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index 92d3952dfa8b..feeb41693c17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -17,8 +17,10 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - if (mlx5e_is_eswitch_flow(parse_state->flow)) + if (mlx5e_is_eswitch_flow(parse_state->flow)) { attr->esw_attr->split_count = attr->esw_attr->out_count; + parse_state->if_count = 0; + } attr->flags |= MLX5_ATTR_FLAG_CT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 291193f7120d..f63402c48028 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -294,6 +294,7 @@ parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; + parse_state->if_count = 0; esw_attr->out_count++; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 3b272bbf4c53..368a95fa77d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -98,8 +98,10 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { esw_attr->split_count = esw_attr->out_count; + parse_state->if_count = 0; + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c index ad09a8a5f36e..2d1d4a04501b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -66,6 +66,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; + parse_state->if_count = 0; esw_attr->out_count++; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index c8a3eaf189f6..a13c5e707b83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -166,6 +166,7 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, return err; esw_attr->split_count = esw_attr->out_count; + parse_state->if_count = 0; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c index 310b99230760..f17575b09788 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -65,8 +65,10 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, if (err) return err; - if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { attr->esw_attr->split_count = attr->esw_attr->out_count; + parse_state->if_count = 0; + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 318083690fcd..c24828b688ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3936,6 +3936,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, } i_split = i + 1; + parse_state->if_count = 0; list_add(&attr->list, &flow->attrs); } -- cgit v1.2.3 From 344134609a564f28b3cc81ca6650319ccd5d8961 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 5 Sep 2023 10:48:46 -0700 Subject: mlx5/core: E-Switch, Create ACL FT for eswitch manager in switchdev mode ACL flow table is required in switchdev mode when metadata is enabled, driver creates such table when loading each vport. However, not every vport is loaded in switchdev mode. Such as ECPF if it's the eswitch manager. In this case, ACL flow table is still needed. To make it modularized, create ACL flow table for eswitch manager as default and skip such operations when loading manager vport. Also, there is no need to load the eswitch manager vport in switchdev mode. This means there is no need to load it on regular connect-x HCAs where the PF is the eswitch manager. This will avoid creating duplicate ACL flow table for host PF vport. Fixes: 29bcb6e4fe70 ("net/mlx5e: E-Switch, Use metadata for vport matching in send-to-vport rules") Fixes: eb8e9fae0a22 ("mlx5/core: E-Switch, Allocate ECPF vport if it's an eswitch manager") Fixes: 5019833d661f ("net/mlx5: E-switch, Introduce helper function to enable/disable vports") Signed-off-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 21 +++++++--- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 49 +++++++++++++++------- 2 files changed, 51 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6cd7d6497e10..d4cde6555063 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1276,12 +1276,19 @@ int mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events) { + bool pf_needed; int ret; + pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY; + /* Enable PF vport */ - ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events); - if (ret) - return ret; + if (pf_needed) { + ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, + enabled_events); + if (ret) + return ret; + } /* Enable external host PF HCA */ ret = host_pf_enable_hca(esw->dev); @@ -1317,7 +1324,8 @@ ec_vf_err: ecpf_err: host_pf_disable_hca(esw->dev); pf_hca_err: - mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); + if (pf_needed) + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1335,7 +1343,10 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) } host_pf_disable_hca(esw->dev); - mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); } static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 752fb0dfb111..b296ac52a439 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3216,26 +3216,47 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, esw_acl_ingress_ofld_cleanup(esw, vport); } -static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) { - struct mlx5_vport *vport; + struct mlx5_vport *uplink, *manager; + int ret; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - if (IS_ERR(vport)) - return PTR_ERR(vport); + uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(uplink)) + return PTR_ERR(uplink); + + ret = esw_vport_create_offloads_acl_tables(esw, uplink); + if (ret) + return ret; + + manager = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(manager)) { + ret = PTR_ERR(manager); + goto err_manager; + } - return esw_vport_create_offloads_acl_tables(esw, vport); + ret = esw_vport_create_offloads_acl_tables(esw, manager); + if (ret) + goto err_manager; + + return 0; + +err_manager: + esw_vport_destroy_offloads_acl_tables(esw, uplink); + return ret; } -static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - if (IS_ERR(vport)) - return; + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (!IS_ERR(vport)) + esw_vport_destroy_offloads_acl_tables(esw, vport); - esw_vport_destroy_offloads_acl_tables(esw, vport); + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (!IS_ERR(vport)) + esw_vport_destroy_offloads_acl_tables(esw, vport); } int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) @@ -3280,7 +3301,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.indir = indir; - err = esw_create_uplink_offloads_acl_tables(esw); + err = esw_create_offloads_acl_tables(esw); if (err) goto create_acl_err; @@ -3321,7 +3342,7 @@ create_fdb_err: create_restore_err: esw_destroy_offloads_table(esw); create_offloads_err: - esw_destroy_uplink_offloads_acl_tables(esw); + esw_destroy_offloads_acl_tables(esw); create_acl_err: mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); create_indir_err: @@ -3337,7 +3358,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_offloads_fdb_tables(esw); esw_destroy_restore_table(esw); esw_destroy_offloads_table(esw); - esw_destroy_uplink_offloads_acl_tables(esw); + esw_destroy_offloads_acl_tables(esw); mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); mutex_destroy(&esw->fdb_table.offloads.vports.lock); } -- cgit v1.2.3 From 954ad9bf13c4f95a4958b5f8433301f2ab99e1f5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 6 Sep 2023 00:53:36 +0300 Subject: net: dsa: sja1105: fix bandwidth discrepancy between tc-cbs software and offload More careful measurement of the tc-cbs bandwidth shows that the stream bandwidth (effectively idleslope) increases, there is a larger and larger discrepancy between the rate limit obtained by the software Qdisc, and the rate limit obtained by its offloaded counterpart. The discrepancy becomes so large, that e.g. at an idleslope of 40000 (40Mbps), the offloaded cbs does not actually rate limit anything, and traffic will pass at line rate through a 100 Mbps port. The reason for the discrepancy is that the hardware documentation I've been following is incorrect. UM11040.pdf (for SJA1105P/Q/R/S) states about IDLE_SLOPE that it is "the rate (in unit of bytes/sec) at which the credit counter is increased". Cross-checking with UM10944.pdf (for SJA1105E/T) and UM11107.pdf (for SJA1110), the wording is different: "This field specifies the value, in bytes per second times link speed, by which the credit counter is increased". So there's an extra scaling for link speed that the driver is currently not accounting for, and apparently (empirically), that link speed is expressed in Kbps. I've pondered whether to pollute the sja1105_mac_link_up() implementation with CBS shaper reprogramming, but I don't think it is worth it. IMO, the UAPI exposed by tc-cbs requires user space to recalculate the sendslope anyway, since the formula for that depends on port_transmit_rate (see man tc-cbs), which is not an invariant from tc's perspective. So we use the offload->sendslope and offload->idleslope to deduce the original port_transmit_rate from the CBS formula, and use that value to scale the offload->sendslope and offload->idleslope to values that the hardware understands. Some numerical data points: 40Mbps stream, max interfering frame size 1500, port speed 100M --------------------------------------------------------------- tc-cbs parameters: idleslope 40000 sendslope -60000 locredit -900 hicredit 600 which result in hardware values: Before (doesn't work) After (works) credit_hi 600 600 credit_lo 900 900 send_slope 7500000 75 idle_slope 5000000 50 40Mbps stream, max interfering frame size 1500, port speed 1G ------------------------------------------------------------- tc-cbs parameters: idleslope 40000 sendslope -960000 locredit -1440 hicredit 60 which result in hardware values: Before (doesn't work) After (works) credit_hi 60 60 credit_lo 1440 1440 send_slope 120000000 120 idle_slope 5000000 5 5.12Mbps stream, max interfering frame size 1522, port speed 100M ----------------------------------------------------------------- tc-cbs parameters: idleslope 5120 sendslope -94880 locredit -1444 hicredit 77 which result in hardware values: Before (doesn't work) After (works) credit_hi 77 77 credit_lo 1444 1444 send_slope 11860000 118 idle_slope 640000 6 Tested on SJA1105T, SJA1105S and SJA1110A, at 1Gbps and 100Mbps. Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc") Reported-by: Yanan Yang Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 331bb1c6676a..3f17c17ff636 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2150,6 +2150,7 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; struct sja1105_cbs_entry *cbs; + s64 port_transmit_rate_kbps; int index; if (!offload->enable) @@ -2167,9 +2168,17 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port, */ cbs->credit_hi = offload->hicredit; cbs->credit_lo = abs(offload->locredit); - /* User space is in kbits/sec, hardware in bytes/sec */ - cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT; - cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT); + /* User space is in kbits/sec, while the hardware in bytes/sec times + * link speed. Since the given offload->sendslope is good only for the + * current link speed anyway, and user space is likely to reprogram it + * when that changes, don't even bother to track the port's link speed, + * but deduce the port transmit rate from idleslope - sendslope. + */ + port_transmit_rate_kbps = offload->idleslope - offload->sendslope; + cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT, + port_transmit_rate_kbps); + cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT), + port_transmit_rate_kbps); /* Convert the negative values from 64-bit 2's complement * to 32-bit 2's complement (for the case of 0x80000000 whose * negative is still negative). -- cgit v1.2.3 From 894cafc5c62ccced758077bd4e970dc714c42637 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 6 Sep 2023 00:53:37 +0300 Subject: net: dsa: sja1105: fix -ENOSPC when replacing the same tc-cbs too many times After running command [2] too many times in a row: [1] $ tc qdisc add dev sw2p0 root handle 1: mqprio num_tc 8 \ map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0 [2] $ tc qdisc replace dev sw2p0 parent 1:1 cbs offload 1 \ idleslope 120000 sendslope -880000 locredit -1320 hicredit 180 (aka more than priv->info->num_cbs_shapers times) we start seeing the following error message: Error: Specified device failed to setup cbs hardware offload. This comes from the fact that ndo_setup_tc(TC_SETUP_QDISC_CBS) presents the same API for the qdisc create and replace cases, and the sja1105 driver fails to distinguish between the 2. Thus, it always thinks that it must allocate the same shaper for a {port, queue} pair, when it may instead have to replace an existing one. Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 3f17c17ff636..d7f57f223031 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2116,6 +2116,18 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port, #define BYTES_PER_KBIT (1000LL / 8) +static int sja1105_find_cbs_shaper(struct sja1105_private *priv, + int port, int prio) +{ + int i; + + for (i = 0; i < priv->info->num_cbs_shapers; i++) + if (priv->cbs[i].port == port && priv->cbs[i].prio == prio) + return i; + + return -1; +} + static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv) { int i; @@ -2156,9 +2168,14 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port, if (!offload->enable) return sja1105_delete_cbs_shaper(priv, port, offload->queue); - index = sja1105_find_unused_cbs_shaper(priv); - if (index < 0) - return -ENOSPC; + /* The user may be replacing an existing shaper */ + index = sja1105_find_cbs_shaper(priv, port, offload->queue); + if (index < 0) { + /* That isn't the case - see if we can allocate a new one */ + index = sja1105_find_unused_cbs_shaper(priv); + if (index < 0) + return -ENOSPC; + } cbs = &priv->cbs[index]; cbs->port = port; -- cgit v1.2.3 From 180a7419fe4adc8d9c8e0ef0fd17bcdd0cf78acd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 6 Sep 2023 00:53:38 +0300 Subject: net: dsa: sja1105: complete tc-cbs offload support on SJA1110 The blamed commit left this delta behind: struct sja1105_cbs_entry { - u64 port; - u64 prio; + u64 port; /* Not used for SJA1110 */ + u64 prio; /* Not used for SJA1110 */ u64 credit_hi; u64 credit_lo; u64 send_slope; u64 idle_slope; }; but did not actually implement tc-cbs offload fully for the new switch. The offload is accepted, but it doesn't work. The difference compared to earlier switch generations is that now, the table of CBS shapers is sparse, because there are many more shapers, so the mapping between a {port, prio} and a table index is static, rather than requiring us to store the port and prio into the sja1105_cbs_entry. So, the problem is that the code programs the CBS shaper parameters at a dynamic table index which is incorrect. All that needs to be done for SJA1110 CBS shapers to work is to bypass the logic which allocates shapers in a dense manner, as for SJA1105, and use the fixed mapping instead. Fixes: 3e77e59bf8cf ("net: dsa: sja1105: add support for the SJA1110 switch family") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105.h | 2 ++ drivers/net/dsa/sja1105/sja1105_main.c | 13 +++++++++++++ drivers/net/dsa/sja1105/sja1105_spi.c | 4 ++++ 3 files changed, 19 insertions(+) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index dee35ba924ad..0617d5ccd3ff 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -132,6 +132,8 @@ struct sja1105_info { int max_frame_mem; int num_ports; bool multiple_cascade_ports; + /* Every {port, TXQ} has its own CBS shaper */ + bool fixed_cbs_mapping; enum dsa_tag_protocol tag_proto; const struct sja1105_dynamic_table_ops *dyn_ops; const struct sja1105_table_ops *static_ops; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index d7f57f223031..a23d980d28f5 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2115,12 +2115,22 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port, } #define BYTES_PER_KBIT (1000LL / 8) +/* Port 0 (the uC port) does not have CBS shapers */ +#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio)) static int sja1105_find_cbs_shaper(struct sja1105_private *priv, int port, int prio) { int i; + if (priv->info->fixed_cbs_mapping) { + i = SJA1110_FIXED_CBS(port, prio); + if (i >= 0 && i < priv->info->num_cbs_shapers) + return i; + + return -1; + } + for (i = 0; i < priv->info->num_cbs_shapers; i++) if (priv->cbs[i].port == port && priv->cbs[i].prio == prio) return i; @@ -2132,6 +2142,9 @@ static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv) { int i; + if (priv->info->fixed_cbs_mapping) + return -1; + for (i = 0; i < priv->info->num_cbs_shapers; i++) if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope) return i; diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 5ce29c8057a4..834b5c1b4db0 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -781,6 +781,7 @@ const struct sja1105_info sja1110a_info = { .tag_proto = DSA_TAG_PROTO_SJA1110, .can_limit_mcast_flood = true, .multiple_cascade_ports = true, + .fixed_cbs_mapping = true, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, .max_frame_mem = SJA1110_MAX_FRAME_MEMORY, @@ -831,6 +832,7 @@ const struct sja1105_info sja1110b_info = { .tag_proto = DSA_TAG_PROTO_SJA1110, .can_limit_mcast_flood = true, .multiple_cascade_ports = true, + .fixed_cbs_mapping = true, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, .max_frame_mem = SJA1110_MAX_FRAME_MEMORY, @@ -881,6 +883,7 @@ const struct sja1105_info sja1110c_info = { .tag_proto = DSA_TAG_PROTO_SJA1110, .can_limit_mcast_flood = true, .multiple_cascade_ports = true, + .fixed_cbs_mapping = true, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, .max_frame_mem = SJA1110_MAX_FRAME_MEMORY, @@ -931,6 +934,7 @@ const struct sja1105_info sja1110d_info = { .tag_proto = DSA_TAG_PROTO_SJA1110, .can_limit_mcast_flood = true, .multiple_cascade_ports = true, + .fixed_cbs_mapping = true, .ptp_ts_bits = 32, .ptpegr_ts_bytes = 8, .max_frame_mem = SJA1110_MAX_FRAME_MEMORY, -- cgit v1.2.3 From 08c6d8bae48c2c28f7017d7b61b5d5a1518ceb39 Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Tue, 5 Sep 2023 11:33:15 +0200 Subject: net: phy: Provide Module 4 KSZ9477 errata (DS80000754C) The KSZ9477 errata points out (in 'Module 4') the link up/down problems when EEE (Energy Efficient Ethernet) is enabled in the device to which the KSZ9477 tries to auto negotiate. The suggested workaround is to clear advertisement of EEE for PHYs in this chip driver. To avoid regressions with other switch ICs the new MICREL_NO_EEE flag has been introduced. Moreover, the in-register disablement of MMD_DEVICE_ID_EEE_ADV.MMD_EEE_ADV MMD register is removed, as this code is both; now executed too late (after previous rework of the PHY and DSA for KSZ switches) and not required as setting all members of eee_broken_modes bit field prevents the KSZ9477 from advertising EEE. Fixes: 69d3b36ca045 ("net: dsa: microchip: enable EEE support") # for KSZ9477 Signed-off-by: Lukasz Majewski Tested-by: Oleksij Rempel # Confirmed disabled EEE with oscilloscope. Reviewed-by: Oleksij Rempel Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230905093315.784052-1-lukma@denx.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 16 +++++++++++++++- drivers/net/phy/micrel.c | 9 ++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 6673122266b7..42db7679c360 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2335,13 +2335,27 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; - if (dev->chip_id == KSZ8830_CHIP_ID) { + switch (dev->chip_id) { + case KSZ8830_CHIP_ID: /* Silicon Errata Sheet (DS80000830A): * Port 1 does not work with LinkMD Cable-Testing. * Port 1 does not respond to received PAUSE control frames. */ if (!port) return MICREL_KSZ8_P1_ERRATA; + break; + case KSZ9477_CHIP_ID: + /* KSZ9477 Errata DS80000754C + * + * Module 4: Energy Efficient Ethernet (EEE) feature select must + * be manually disabled + * The EEE feature is enabled by default, but it is not fully + * operational. It must be manually disabled through register + * controls. If not disabled, the PHY ports can auto-negotiate + * to enable EEE, and this feature can cause link drops when + * linked to another device supporting EEE. + */ + return MICREL_NO_EEE; } return 0; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index b6d7981b2d1e..927d3d54658e 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1800,9 +1800,6 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = { /* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */ {0x1c, 0x04, 0x00d0}, - /* Energy Efficient Ethernet (EEE) feature select must be manually disabled */ - {0x07, 0x3c, 0x0000}, - /* Register settings are required to meet data sheet supply current specifications */ {0x1c, 0x13, 0x6eff}, {0x1c, 0x14, 0xe6ff}, @@ -1847,6 +1844,12 @@ static int ksz9477_config_init(struct phy_device *phydev) return err; } + /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes + * in this switch shall be regarded as broken. + */ + if (phydev->dev_flags & MICREL_NO_EEE) + phydev->eee_broken_modes = -1; + err = genphy_restart_aneg(phydev); if (err) return err; -- cgit v1.2.3 From 61a1deacc3d4fd3d57d7fda4d935f7f7503e8440 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 6 Sep 2023 15:20:12 +0800 Subject: net: hns3: fix tx timeout issue Currently, the driver knocks the ring doorbell before updating the ring->last_to_use in tx flow. if the hardware transmiting packet and napi poll scheduling are fast enough, it may get the old ring->last_to_use in drivers' napi poll. In this case, the driver will think the tx is not completed, and return directly without clear the flag __QUEUE_STATE_STACK_XOFF, which may cause tx timeout. Fixes: 20d06ca2679c ("net: hns3: optimize the tx clean process") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index eac2d0573241..81947c4e5100 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2103,8 +2103,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, */ if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num && !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) { + /* This smp_store_release() pairs with smp_load_aquire() in + * hns3_nic_reclaim_desc(). Ensure that the BD valid bit + * is updated. + */ + smp_store_release(&ring->last_to_use, ring->next_to_use); hns3_tx_push_bd(ring, num); - WRITE_ONCE(ring->last_to_use, ring->next_to_use); return; } @@ -2115,6 +2119,11 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, return; } + /* This smp_store_release() pairs with smp_load_aquire() in + * hns3_nic_reclaim_desc(). Ensure that the BD valid bit is updated. + */ + smp_store_release(&ring->last_to_use, ring->next_to_use); + if (ring->tqp->mem_base) hns3_tx_mem_doorbell(ring); else @@ -2122,7 +2131,6 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num, ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG); ring->pending_buf = 0; - WRITE_ONCE(ring->last_to_use, ring->next_to_use); } static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb, @@ -3563,9 +3571,8 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, int *bytes, int *pkts, int budget) { - /* pair with ring->last_to_use update in hns3_tx_doorbell(), - * smp_store_release() is not used in hns3_tx_doorbell() because - * the doorbell operation already have the needed barrier operation. + /* This smp_load_acquire() pairs with smp_store_release() in + * hns3_tx_doorbell(). */ int ltu = smp_load_acquire(&ring->last_to_use); int ntc = ring->next_to_clean; -- cgit v1.2.3 From dd2bbc2ef69a920d93801321b0b01ac6c4e5cacd Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Sep 2023 15:20:13 +0800 Subject: net: hns3: Support query tx timeout threshold by debugfs support query tx timeout threshold by debugfs Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index f276b5ecb431..8086722a56c0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1045,6 +1045,7 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos) struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); struct hnae3_dev_specs *dev_specs = &ae_dev->dev_specs; struct hnae3_knic_private_info *kinfo = &h->kinfo; + struct net_device *dev = kinfo->netdev; *pos += scnprintf(buf + *pos, len - *pos, "dev_spec:\n"); *pos += scnprintf(buf + *pos, len - *pos, "MAC entry num: %u\n", @@ -1087,6 +1088,9 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos) dev_specs->mc_mac_size); *pos += scnprintf(buf + *pos, len - *pos, "MAC statistics number: %u\n", dev_specs->mac_stats_num); + *pos += scnprintf(buf + *pos, len - *pos, + "TX timeout threshold: %d seconds\n", + dev->watchdog_timeo / HZ); } static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len) -- cgit v1.2.3 From efccf655e99b6907ca07a466924e91805892e7d3 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Wed, 6 Sep 2023 15:20:14 +0800 Subject: net: hns3: fix byte order conversion issue in hclge_dbg_fd_tcam_read() req1->tcam_data is defined as "u8 tcam_data[8]", and we convert it as (u32 *) without considerring byte order conversion, it may result in printing wrong data for tcam_data. Convert tcam_data to (__le32 *) first to fix it. Fixes: b5a0b70d77b9 ("net: hns3: refactor dump fd tcam of debugfs") Signed-off-by: Hao Chen Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index f01a7a9ee02c..ff3f8f424ad9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1519,7 +1519,7 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x, struct hclge_desc desc[3]; int pos = 0; int ret, i; - u32 *req; + __le32 *req; hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true); desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); @@ -1544,22 +1544,22 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x, tcam_msg.loc); /* tcam_data0 ~ tcam_data1 */ - req = (u32 *)req1->tcam_data; + req = (__le32 *)req1->tcam_data; for (i = 0; i < 2; i++) pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos, - "%08x\n", *req++); + "%08x\n", le32_to_cpu(*req++)); /* tcam_data2 ~ tcam_data7 */ - req = (u32 *)req2->tcam_data; + req = (__le32 *)req2->tcam_data; for (i = 0; i < 6; i++) pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos, - "%08x\n", *req++); + "%08x\n", le32_to_cpu(*req++)); /* tcam_data8 ~ tcam_data12 */ - req = (u32 *)req3->tcam_data; + req = (__le32 *)req3->tcam_data; for (i = 0; i < 5; i++) pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos, - "%08x\n", *req++); + "%08x\n", le32_to_cpu(*req++)); return ret; } -- cgit v1.2.3 From c295160b1d95e885f1af4586a221cb221d232d10 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Wed, 6 Sep 2023 15:20:15 +0800 Subject: net: hns3: fix debugfs concurrency issue between kfree buffer and read Now in hns3_dbg_uninit(), there may be concurrency between kfree buffer and read, it may result in memory error. Moving debugfs_remove_recursive() in front of kfree buffer to ensure they don't happen at the same time. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Hao Chen Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 8086722a56c0..b8508533878b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1415,9 +1415,9 @@ int hns3_dbg_init(struct hnae3_handle *handle) return 0; out: - mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; + mutex_destroy(&handle->dbgfs_lock); return ret; } @@ -1425,6 +1425,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) { u32 i; + debugfs_remove_recursive(handle->hnae3_dbgfs); + handle->hnae3_dbgfs = NULL; + for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) if (handle->dbgfs_buf[i]) { kvfree(handle->dbgfs_buf[i]); @@ -1432,8 +1435,6 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) } mutex_destroy(&handle->dbgfs_lock); - debugfs_remove_recursive(handle->hnae3_dbgfs); - handle->hnae3_dbgfs = NULL; } void hns3_dbg_register_debugfs(const char *debugfs_dir_name) -- cgit v1.2.3 From fa5564945f7d15ae2390b00c08b6abaef0165cda Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Wed, 6 Sep 2023 15:20:16 +0800 Subject: net: hns3: fix invalid mutex between tc qdisc and dcb ets command issue We hope that tc qdisc and dcb ets commands can not be used crosswise. If we want to use any of the commands to configure tc, We must use the other command to clear the existing configuration. However, when we configure a single tc with tc qdisc, we can still configure it with dcb ets. Because we use mqprio_active as the tag of tc qdisc configuration, but with dcb ets, we do not check mqprio_active. This patch fix this issue by check mqprio_active before executing the dcb ets command. and add dcb_ets_active to replace HCLGE_FLAG_DCB_ENABLE and HCLGE_FLAG_MQPRIO_ENABLE at the hclge layer, Fixes: cacde272dd00 ("net: hns3: Add hclge_dcb module for the support of DCB feature") Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 20 +++++--------------- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 +++-- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 -- 4 files changed, 9 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index a4b43bcd2f0c..aaf1f42624a7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -814,6 +814,7 @@ struct hnae3_tc_info { u8 max_tc; /* Total number of TCs */ u8 num_tc; /* Total number of enabled TCs */ bool mqprio_active; + bool dcb_ets_active; }; #define HNAE3_MAX_DSCP 64 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index fad5a5ff3cda..b98301e205f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -259,7 +259,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) int ret; if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || - hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + h->kinfo.tc_info.mqprio_active) return -EINVAL; ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed); @@ -275,10 +275,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) } hclge_tm_schd_info_update(hdev, num_tc); - if (num_tc > 1) - hdev->flag |= HCLGE_FLAG_DCB_ENABLE; - else - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + h->kinfo.tc_info.dcb_ets_active = num_tc > 1; ret = hclge_ieee_ets_to_tm_info(hdev, ets); if (ret) @@ -487,7 +484,7 @@ static u8 hclge_getdcbx(struct hnae3_handle *h) struct hclge_vport *vport = hclge_get_vport(h); struct hclge_dev *hdev = vport->back; - if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + if (h->kinfo.tc_info.mqprio_active) return 0; return hdev->dcbx_cap; @@ -611,7 +608,8 @@ static int hclge_setup_tc(struct hnae3_handle *h, if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state)) return -EBUSY; - if (hdev->flag & HCLGE_FLAG_DCB_ENABLE) + kinfo = &vport->nic.kinfo; + if (kinfo->tc_info.dcb_ets_active) return -EINVAL; ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt); @@ -625,7 +623,6 @@ static int hclge_setup_tc(struct hnae3_handle *h, if (ret) return ret; - kinfo = &vport->nic.kinfo; memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info)); hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt); kinfo->tc_info.mqprio_active = tc > 0; @@ -634,13 +631,6 @@ static int hclge_setup_tc(struct hnae3_handle *h, if (ret) goto err_out; - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; - - if (tc > 1) - hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE; - else - hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE; - return hclge_notify_init_up(hdev); err_out: diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 0f50dba6cc47..8ca368424436 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11026,6 +11026,7 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle, static void hclge_info_show(struct hclge_dev *hdev) { + struct hnae3_handle *handle = &hdev->vport->nic; struct device *dev = &hdev->pdev->dev; dev_info(dev, "PF info begin:\n"); @@ -11042,9 +11043,9 @@ static void hclge_info_show(struct hclge_dev *hdev) dev_info(dev, "This is %s PF\n", hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main"); dev_info(dev, "DCB %s\n", - hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable"); + handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable"); dev_info(dev, "MQPRIO %s\n", - hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable"); + handle->kinfo.tc_info.mqprio_active ? "enable" : "disable"); dev_info(dev, "Default tx spare buffer size: %u\n", hdev->tx_spare_buf_size); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index ec233ec57222..7bc2049b723d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -919,8 +919,6 @@ struct hclge_dev { #define HCLGE_FLAG_MAIN BIT(0) #define HCLGE_FLAG_DCB_CAPABLE BIT(1) -#define HCLGE_FLAG_DCB_ENABLE BIT(2) -#define HCLGE_FLAG_MQPRIO_ENABLE BIT(3) u32 flag; u32 pkt_buf_size; /* Total pf buf size for tx/rx */ -- cgit v1.2.3 From 674d9591a32d01df75d6b5fffed4ef942a294376 Mon Sep 17 00:00:00 2001 From: Yisen Zhuang Date: Wed, 6 Sep 2023 15:20:17 +0800 Subject: net: hns3: fix the port information display when sfp is absent When sfp is absent or unidentified, the port type should be displayed as PORT_OTHERS, rather than PORT_FIBRE. Fixes: 88d10bd6f730 ("net: hns3: add support for multiple media type") Signed-off-by: Yisen Zhuang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 36858a72d771..682239f33082 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -773,7 +773,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev, hns3_get_ksettings(h, cmd); break; case HNAE3_MEDIA_TYPE_FIBER: - if (module_type == HNAE3_MODULE_TYPE_CR) + if (module_type == HNAE3_MODULE_TYPE_UNKNOWN) + cmd->base.port = PORT_OTHER; + else if (module_type == HNAE3_MODULE_TYPE_CR) cmd->base.port = PORT_DA; else cmd->base.port = PORT_FIBRE; -- cgit v1.2.3 From 60326634f6c54528778de18bfef1e8a7a93b3771 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 6 Sep 2023 15:20:18 +0800 Subject: net: hns3: remove GSO partial feature bit HNS3 NIC does not support GSO partial packets segmentation. Actually tunnel packets for example NvGRE packets segment offload and checksum offload is already supported. There is no need to keep gso partial feature bit. So this patch removes it. Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Jie Wang Signed-off-by: Jijie Shao Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 81947c4e5100..b4895c7b3efd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3316,8 +3316,6 @@ static void hns3_set_default_feature(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; - netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | -- cgit v1.2.3 From 6afcf0fb92701487421aa73c692855aa70fbc796 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 7 Sep 2023 11:01:04 -0700 Subject: Revert "net: team: do not use dynamic lockdep key" This reverts commit 39285e124edbc752331e98ace37cc141a6a3747a. Looks like the change has unintended consequences in exposing objects before they are initialized. Let's drop this patch and try again in net-next. Reported-by: syzbot+44ae022028805f4600fc@syzkaller.appspotmail.com Fixes: 39285e124edb ("net: team: do not use dynamic lockdep key") Link: https://lore.kernel.org/all/20230907103124.6adb7256@kernel.org/ Signed-off-by: Jakub Kicinski --- drivers/net/team/team.c | 111 ++++++++++++++++--------------- drivers/net/team/team_mode_loadbalance.c | 4 +- 2 files changed, 59 insertions(+), 56 deletions(-) (limited to 'drivers') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ad29122a5468..e8b94580194e 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1135,8 +1135,8 @@ static int team_port_add(struct team *team, struct net_device *port_dev, struct netlink_ext_ack *extack) { struct net_device *dev = team->dev; - char *portname = port_dev->name; struct team_port *port; + char *portname = port_dev->name; int err; if (port_dev->flags & IFF_LOOPBACK) { @@ -1203,31 +1203,18 @@ static int team_port_add(struct team *team, struct net_device *port_dev, memcpy(port->orig.dev_addr, port_dev->dev_addr, port_dev->addr_len); - err = dev_open(port_dev, extack); - if (err) { - netdev_dbg(dev, "Device %s opening failed\n", - portname); - goto err_dev_open; - } - - err = team_upper_dev_link(team, port, extack); + err = team_port_enter(team, port); if (err) { - netdev_err(dev, "Device %s failed to set upper link\n", + netdev_err(dev, "Device %s failed to enter team mode\n", portname); - goto err_set_upper_link; + goto err_port_enter; } - /* lockdep subclass variable(dev->nested_level) was updated by - * team_upper_dev_link(). - */ - team_unlock(team); - team_lock(team); - - err = team_port_enter(team, port); + err = dev_open(port_dev, extack); if (err) { - netdev_err(dev, "Device %s failed to enter team mode\n", + netdev_dbg(dev, "Device %s opening failed\n", portname); - goto err_port_enter; + goto err_dev_open; } err = vlan_vids_add_by_dev(port_dev, dev); @@ -1255,6 +1242,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev, goto err_handler_register; } + err = team_upper_dev_link(team, port, extack); + if (err) { + netdev_err(dev, "Device %s failed to set upper link\n", + portname); + goto err_set_upper_link; + } + err = __team_option_inst_add_port(team, port); if (err) { netdev_err(dev, "Device %s failed to add per-port options\n", @@ -1301,6 +1295,9 @@ err_set_slave_promisc: __team_option_inst_del_port(team, port); err_option_port_add: + team_upper_dev_unlink(team, port); + +err_set_upper_link: netdev_rx_handler_unregister(port_dev); err_handler_register: @@ -1310,16 +1307,13 @@ err_enable_netpoll: vlan_vids_del_by_dev(port_dev, dev); err_vids_add: - team_port_leave(team, port); - -err_port_enter: - team_upper_dev_unlink(team, port); - -err_set_upper_link: dev_close(port_dev); err_dev_open: + team_port_leave(team, port); team_port_set_orig_dev_addr(port); + +err_port_enter: dev_set_mtu(port_dev, port->orig.mtu); err_set_mtu: @@ -1622,7 +1616,6 @@ static int team_init(struct net_device *dev) int err; team->dev = dev; - mutex_init(&team->lock); team_set_no_mode(team); team->notifier_ctx = false; @@ -1650,6 +1643,8 @@ static int team_init(struct net_device *dev) goto err_options_register; netif_carrier_off(dev); + lockdep_register_key(&team->team_lock_key); + __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key); netdev_lockdep_set_classes(dev); return 0; @@ -1670,7 +1665,7 @@ static void team_uninit(struct net_device *dev) struct team_port *port; struct team_port *tmp; - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry_safe(port, tmp, &team->port_list, list) team_port_del(team, port->dev); @@ -1679,8 +1674,9 @@ static void team_uninit(struct net_device *dev) team_mcast_rejoin_fini(team); team_notify_peers_fini(team); team_queue_override_fini(team); - team_unlock(team); + mutex_unlock(&team->lock); netdev_change_features(dev); + lockdep_unregister_key(&team->team_lock_key); } static void team_destructor(struct net_device *dev) @@ -1794,18 +1790,18 @@ static void team_set_rx_mode(struct net_device *dev) static int team_set_mac_address(struct net_device *dev, void *p) { - struct team *team = netdev_priv(dev); struct sockaddr *addr = p; + struct team *team = netdev_priv(dev); struct team_port *port; if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; dev_addr_set(dev, addr->sa_data); - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) if (team->ops.port_change_dev_addr) team->ops.port_change_dev_addr(team, port); - team_unlock(team); + mutex_unlock(&team->lock); return 0; } @@ -1819,7 +1815,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - team_lock(team); + mutex_lock(&team->lock); team->port_mtu_change_allowed = true; list_for_each_entry(port, &team->port_list, list) { err = dev_set_mtu(port->dev, new_mtu); @@ -1830,7 +1826,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) } } team->port_mtu_change_allowed = false; - team_unlock(team); + mutex_unlock(&team->lock); dev->mtu = new_mtu; @@ -1840,7 +1836,7 @@ unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) dev_set_mtu(port->dev, dev->mtu); team->port_mtu_change_allowed = false; - team_unlock(team); + mutex_unlock(&team->lock); return err; } @@ -1894,20 +1890,20 @@ static int team_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) * Alhough this is reader, it's guarded by team lock. It's not possible * to traverse list in reverse under rcu_read_lock */ - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { err = vlan_vid_add(port->dev, proto, vid); if (err) goto unwind; } - team_unlock(team); + mutex_unlock(&team->lock); return 0; unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - team_unlock(team); + mutex_unlock(&team->lock); return err; } @@ -1917,10 +1913,10 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) struct team *team = netdev_priv(dev); struct team_port *port; - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) vlan_vid_del(port->dev, proto, vid); - team_unlock(team); + mutex_unlock(&team->lock); return 0; } @@ -1942,9 +1938,9 @@ static void team_netpoll_cleanup(struct net_device *dev) { struct team *team = netdev_priv(dev); - team_lock(team); + mutex_lock(&team->lock); __team_netpoll_cleanup(team); - team_unlock(team); + mutex_unlock(&team->lock); } static int team_netpoll_setup(struct net_device *dev, @@ -1954,7 +1950,7 @@ static int team_netpoll_setup(struct net_device *dev, struct team_port *port; int err = 0; - team_lock(team); + mutex_lock(&team->lock); list_for_each_entry(port, &team->port_list, list) { err = __team_port_enable_netpoll(port); if (err) { @@ -1962,7 +1958,7 @@ static int team_netpoll_setup(struct net_device *dev, break; } } - team_unlock(team); + mutex_unlock(&team->lock); return err; } #endif @@ -1973,9 +1969,9 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev, struct team *team = netdev_priv(dev); int err; - team_lock(team); + mutex_lock(&team->lock); err = team_port_add(team, port_dev, extack); - team_unlock(team); + mutex_unlock(&team->lock); if (!err) netdev_change_features(dev); @@ -1988,12 +1984,19 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev) struct team *team = netdev_priv(dev); int err; - team_lock(team); + mutex_lock(&team->lock); err = team_port_del(team, port_dev); - team_unlock(team); + mutex_unlock(&team->lock); - if (!err) - netdev_change_features(dev); + if (err) + return err; + + if (netif_is_team_master(port_dev)) { + lockdep_unregister_key(&team->team_lock_key); + lockdep_register_key(&team->team_lock_key); + lockdep_set_class(&team->lock, &team->team_lock_key); + } + netdev_change_features(dev); return err; } @@ -2313,13 +2316,13 @@ static struct team *team_nl_team_get(struct genl_info *info) } team = netdev_priv(dev); - __team_lock(team); + mutex_lock(&team->lock); return team; } static void team_nl_team_put(struct team *team) { - team_unlock(team); + mutex_unlock(&team->lock); dev_put(team->dev); } @@ -2981,9 +2984,9 @@ static void team_port_change_check(struct team_port *port, bool linkup) { struct team *team = port->team; - team_lock(team); + mutex_lock(&team->lock); __team_port_change_check(port, linkup); - team_unlock(team); + mutex_unlock(&team->lock); } diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 7bcc9d37447a..00f8989c29c0 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -478,7 +478,7 @@ static void lb_stats_refresh(struct work_struct *work) team = lb_priv_ex->team; lb_priv = get_lb_priv(team); - if (!team_trylock(team)) { + if (!mutex_trylock(&team->lock)) { schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); return; } @@ -515,7 +515,7 @@ static void lb_stats_refresh(struct work_struct *work) schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, (lb_priv_ex->stats.refresh_interval * HZ) / 10); - team_unlock(team); + mutex_unlock(&team->lock); } static void lb_stats_refresh_interval_get(struct team *team, -- cgit v1.2.3 From 1b36955cc048c8ff6ba448dbf4be0e52f59f2963 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 6 Sep 2023 17:16:09 +0300 Subject: net: enetc: distinguish error from valid pointers in enetc_fixup_clear_rss_rfs() enetc_psi_create() returns an ERR_PTR() or a valid station interface pointer, but checking for the non-NULL quality of the return code blurs that difference away. So if enetc_psi_create() fails, we call enetc_psi_destroy() when we shouldn't. This will likely result in crashes, since enetc_psi_create() cleans up everything after itself when it returns an ERR_PTR(). Fixes: f0168042a212 ("net: enetc: reimplement RFS/RSS memory clearing as PCI quirk") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/netdev/582183ef-e03b-402b-8e2d-6d9bb3c83bd9@moroto.mountain/ Suggested-by: Dan Carpenter Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230906141609.247579-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index e0a4cb7e3f50..c153dc083aff 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -1402,7 +1402,7 @@ static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev) return; si = enetc_psi_create(pdev); - if (si) + if (!IS_ERR(si)) enetc_psi_destroy(pdev); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF, -- cgit v1.2.3