diff options
55 files changed, 1034 insertions, 542 deletions
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index caf04284711a..bb49f6e40a19 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -155,6 +155,7 @@ static int cops_irqlist[] = { }; static struct timer_list cops_timer; +static struct net_device *cops_timer_dev; /* use 0 for production, 1 for verification, 2 for debug, 3 for verbose debug */ #ifndef COPS_DEBUG @@ -187,7 +188,7 @@ static void cops_load (struct net_device *dev); static int cops_nodeid (struct net_device *dev, int nodeid); static irqreturn_t cops_interrupt (int irq, void *dev_id); -static void cops_poll (unsigned long ltdev); +static void cops_poll(struct timer_list *t); static void cops_timeout(struct net_device *dev); static void cops_rx (struct net_device *dev); static netdev_tx_t cops_send_packet (struct sk_buff *skb, @@ -424,7 +425,8 @@ static int cops_open(struct net_device *dev) */ if(lp->board==TANGENT) /* Poll 20 times per second */ { - setup_timer(&cops_timer, cops_poll, (unsigned long)dev); + cops_timer_dev = dev; + timer_setup(&cops_timer, cops_poll, 0); cops_timer.expires = jiffies + HZ/20; add_timer(&cops_timer); } @@ -671,12 +673,11 @@ static int cops_nodeid (struct net_device *dev, int nodeid) * Poll the Tangent type cards to see if we have work. */ -static void cops_poll(unsigned long ltdev) +static void cops_poll(struct timer_list *unused) { int ioaddr, status; int boguscount = 0; - - struct net_device *dev = (struct net_device *)ltdev; + struct net_device *dev = cops_timer_dev; del_timer(&cops_timer); diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 3da1fc539ef9..7bddb8efb6d5 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -85,7 +85,7 @@ static struct net_device_stats *get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static void axnet_tx_timeout(struct net_device *dev); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void axnet_reset_8390(struct net_device *dev); static int mdio_read(unsigned int addr, int phy_id, int loc); @@ -483,7 +483,7 @@ static int axnet_open(struct net_device *dev) link->open++; info->link_status = 0x00; - setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + timer_setup(&info->watchdog, ei_watchdog, 0); mod_timer(&info->watchdog, jiffies + HZ); return ax_open(dev); @@ -547,10 +547,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ax_interrupt(irq, dev_id); } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)(arg); - struct axnet_dev *info = PRIV(dev); + struct axnet_dev *info = from_timer(info, t, watchdog); + struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + AXNET_MII_EEP; u_short link; diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index bd0a2a14b649..eae9827035dc 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -99,7 +99,7 @@ static int pcnet_open(struct net_device *dev); static int pcnet_close(struct net_device *dev); static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void pcnet_reset_8390(struct net_device *dev); static int set_config(struct net_device *dev, struct ifmap *map); static int setup_shmem_window(struct pcmcia_device *link, int start_pg, @@ -917,7 +917,7 @@ static int pcnet_open(struct net_device *dev) info->phy_id = info->eth_phy; info->link_status = 0x00; - setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + timer_setup(&info->watchdog, ei_watchdog, 0); mod_timer(&info->watchdog, jiffies + HZ); return ei_open(dev); @@ -1006,10 +1006,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ret; } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - struct pcnet_dev *info = PRIV(dev); + struct pcnet_dev *info = from_timer(info, t, watchdog); + struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + DLINK_GPIO; u_short link; diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index 998d30e050a6..212fe72a190b 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -123,6 +123,7 @@ struct lance_private { int burst_sizes; /* ledma SBus burst sizes */ #endif struct timer_list multicast_timer; + struct net_device *dev; }; #define LANCE_ADDR(x) ((int)(x) & ~0xff000000) @@ -638,6 +639,13 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } +static void lance_set_multicast_retry(struct timer_list *t) +{ + struct lance_private *lp = from_timer(lp, t, multicast_timer); + + lance_set_multicast(lp->dev); +} + static int a2065_init_one(struct zorro_dev *z, const struct zorro_device_id *ent); static void a2065_remove_one(struct zorro_dev *z); @@ -728,14 +736,13 @@ static int a2065_init_one(struct zorro_dev *z, priv->lance_log_tx_bufs = LANCE_LOG_TX_BUFFERS; priv->rx_ring_mod_mask = RX_RING_MOD_MASK; priv->tx_ring_mod_mask = TX_RING_MOD_MASK; + priv->dev = dev; dev->netdev_ops = &lance_netdev_ops; dev->watchdog_timeo = 5*HZ; dev->dma = 0; - setup_timer(&priv->multicast_timer, - (void(*)(unsigned long))lance_set_multicast, - (unsigned long)dev); + timer_setup(&priv->multicast_timer, lance_set_multicast_retry, 0); err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index 0612dbee00d2..01d132c02ff9 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -302,10 +302,10 @@ am79c961_init_for_open(struct net_device *dev) write_rreg (dev->base_addr, CSR0, CSR0_IENA|CSR0_STRT); } -static void am79c961_timer(unsigned long data) +static void am79c961_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct dev_priv *priv = netdev_priv(dev); + struct dev_priv *priv = from_timer(priv, t, timer); + struct net_device *dev = priv->dev; unsigned int lnkstat, carrier; unsigned long flags; @@ -728,7 +728,8 @@ static int am79c961_probe(struct platform_device *pdev) am79c961_banner(); spin_lock_init(&priv->chip_lock); - setup_timer(&priv->timer, am79c961_timer, (unsigned long)dev); + priv->dev = dev; + timer_setup(&priv->timer, am79c961_timer, 0); if (am79c961_hw_init(dev)) goto release; diff --git a/drivers/net/ethernet/amd/am79c961a.h b/drivers/net/ethernet/amd/am79c961a.h index 9f384b79507b..fc5088c70731 100644 --- a/drivers/net/ethernet/amd/am79c961a.h +++ b/drivers/net/ethernet/amd/am79c961a.h @@ -140,6 +140,7 @@ struct dev_priv { unsigned long txhdr; spinlock_t chip_lock; struct timer_list timer; + struct net_device *dev; }; #endif diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 9bdf81c2cd00..116997a8b593 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -260,6 +260,7 @@ struct lance_private { unsigned short busmaster_regval; struct timer_list multicast_timer; + struct net_device *dev; /* Pointers to the ring buffers as seen from the CPU */ char *rx_buf_ptr_cpu[RX_RING_SIZE]; @@ -1000,9 +1001,10 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } -static void lance_set_multicast_retry(unsigned long _opaque) +static void lance_set_multicast_retry(struct timer_list *t) { - struct net_device *dev = (struct net_device *) _opaque; + struct lance_private *lp = from_timer(lp, t, multicast_timer); + struct net_device *dev = lp->dev; lance_set_multicast(dev); } @@ -1246,8 +1248,8 @@ static int dec_lance_probe(struct device *bdev, const int type) * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - setup_timer(&lp->multicast_timer, lance_set_multicast_retry, - (unsigned long)dev); + lp->dev = dev; + timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0); ret = register_netdev(dev); diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index e46153654016..a561705f232c 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -321,7 +321,7 @@ static struct net_device_stats *pcnet32_get_stats(struct net_device *); static void pcnet32_load_multicast(struct net_device *dev); static void pcnet32_set_multicast_list(struct net_device *); static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); -static void pcnet32_watchdog(struct net_device *); +static void pcnet32_watchdog(struct timer_list *); static int mdio_read(struct net_device *dev, int phy_id, int reg_num); static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val); @@ -1970,8 +1970,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) lp->options |= PCNET32_PORT_MII; } - setup_timer(&lp->watchdog_timer, (void *)&pcnet32_watchdog, - (unsigned long)dev); + timer_setup(&lp->watchdog_timer, pcnet32_watchdog, 0); /* The PCNET32-specific entries in the device structure. */ dev->netdev_ops = &pcnet32_netdev_ops; @@ -2901,9 +2900,10 @@ static void pcnet32_check_media(struct net_device *dev, int verbose) * Could possibly be changed to use mii_check_media instead. */ -static void pcnet32_watchdog(struct net_device *dev) +static void pcnet32_watchdog(struct timer_list *t) { - struct pcnet32_private *lp = netdev_priv(dev); + struct pcnet32_private *lp = from_timer(lp, t, watchdog_timer); + struct net_device *dev = lp->dev; unsigned long flags; /* Print the link status if it has changed */ diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 0183ffb9d3ba..cdd7a611479b 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1248,9 +1248,10 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } -static void lance_set_multicast_retry(unsigned long _opaque) +static void lance_set_multicast_retry(struct timer_list *t) { - struct net_device *dev = (struct net_device *) _opaque; + struct lance_private *lp = from_timer(lp, t, multicast_timer); + struct net_device *dev = lp->dev; lance_set_multicast(dev); } @@ -1459,8 +1460,7 @@ no_link_test: * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - setup_timer(&lp->multicast_timer, lance_set_multicast_retry, - (unsigned long)dev); + timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0); if (register_netdev(dev)) { printk(KERN_ERR "SunLance: Cannot register device.\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 608693d11bd7..3d53153ce751 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -642,9 +642,9 @@ static irqreturn_t xgbe_dma_isr(int irq, void *data) return IRQ_HANDLED; } -static void xgbe_tx_timer(unsigned long data) +static void xgbe_tx_timer(struct timer_list *t) { - struct xgbe_channel *channel = (struct xgbe_channel *)data; + struct xgbe_channel *channel = from_timer(channel, t, tx_timer); struct xgbe_prv_data *pdata = channel->pdata; struct napi_struct *napi; @@ -680,9 +680,9 @@ static void xgbe_service(struct work_struct *work) pdata->phy_if.phy_status(pdata); } -static void xgbe_service_timer(unsigned long data) +static void xgbe_service_timer(struct timer_list *t) { - struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer); queue_work(pdata->dev_workqueue, &pdata->service_work); @@ -694,16 +694,14 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata) struct xgbe_channel *channel; unsigned int i; - setup_timer(&pdata->service_timer, xgbe_service_timer, - (unsigned long)pdata); + timer_setup(&pdata->service_timer, xgbe_service_timer, 0); for (i = 0; i < pdata->channel_count; i++) { channel = pdata->channel[i]; if (!channel->tx_ring) break; - setup_timer(&channel->tx_timer, xgbe_tx_timer, - (unsigned long)channel); + timer_setup(&channel->tx_timer, xgbe_tx_timer, 0); } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 798d13964274..d5031f436f83 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -904,6 +904,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, struct bnxt_tc_l2_key *l2_info, struct net_device *real_dst_dev) { +#ifdef CONFIG_INET struct flowi4 flow = { {0} }; struct net_device *dst_dev; struct neighbour *nbr; @@ -925,6 +926,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, */ dst_dev = rt->dst.dev; if (is_vlan_dev(dst_dev)) { +#if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev); if (vlan->real_dev != real_dst_dev) { @@ -938,6 +940,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, l2_info->inner_vlan_tci = htons(vlan->vlan_id); l2_info->inner_vlan_tpid = vlan->vlan_proto; l2_info->num_vlans = 1; +#endif } else if (dst_dev != real_dst_dev) { netdev_info(bp->dev, "dst_dev(%s) for %pI4b is not PF-if(%s)", @@ -966,6 +969,9 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, put_rt: ip_rt_put(rt); return rc; +#else + return -EOPNOTSUPP; +#endif } static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 00c19306ecee..fd70a4844e2d 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -1288,6 +1288,9 @@ static int liquidio_stop(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; struct napi_struct *napi, *n; + /* tell Octeon to stop forwarding packets to host */ + send_rx_ctrl_cmd(lio, 0); + if (oct->props[lio->ifidx].napi_enabled) { list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) napi_disable(napi); @@ -1305,9 +1308,6 @@ static int liquidio_stop(struct net_device *netdev) netif_carrier_off(netdev); lio->link_changes++; - /* tell Octeon to stop forwarding packets to host */ - send_rx_ctrl_cmd(lio, 0); - ifstate_reset(lio, LIO_IFSTATE_RUNNING); txqs_stop(netdev); diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index e2d342647b19..e988caa797cb 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -455,6 +455,11 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q, q->pg_chunk.offset = 0; mapping = pci_map_page(adapter->pdev, q->pg_chunk.page, 0, q->alloc_size, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) { + __free_pages(q->pg_chunk.page, order); + q->pg_chunk.page = NULL; + return -EIO; + } q->pg_chunk.mapping = mapping; } sd->pg_chunk = q->pg_chunk; @@ -949,40 +954,78 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb) return flits_to_desc(flits); } +/* map_skb - map a packet main body and its page fragments + * @pdev: the PCI device + * @skb: the packet + * @addr: placeholder to save the mapped addresses + * + * map the main body of an sk_buff and its page fragments, if any. + */ +static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb, + dma_addr_t *addr) +{ + const skb_frag_t *fp, *end; + const struct skb_shared_info *si; + + if (skb_headlen(skb)) { + *addr = pci_map_single(pdev, skb->data, skb_headlen(skb), + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(pdev, *addr)) + goto out_err; + addr++; + } + + si = skb_shinfo(skb); + end = &si->frags[si->nr_frags]; + + for (fp = si->frags; fp < end; fp++) { + *addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp), + DMA_TO_DEVICE); + if (pci_dma_mapping_error(pdev, *addr)) + goto unwind; + addr++; + } + return 0; + +unwind: + while (fp-- > si->frags) + dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp), + DMA_TO_DEVICE); + + pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE); +out_err: + return -ENOMEM; +} + /** - * make_sgl - populate a scatter/gather list for a packet + * write_sgl - populate a scatter/gather list for a packet * @skb: the packet * @sgp: the SGL to populate * @start: start address of skb main body data to include in the SGL * @len: length of skb main body data to include in the SGL - * @pdev: the PCI device + * @addr: the list of the mapped addresses * - * Generates a scatter/gather list for the buffers that make up a packet + * Copies the scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ -static inline unsigned int make_sgl(const struct sk_buff *skb, - struct sg_ent *sgp, unsigned char *start, - unsigned int len, struct pci_dev *pdev) +static inline unsigned int write_sgl(const struct sk_buff *skb, + struct sg_ent *sgp, unsigned char *start, + unsigned int len, const dma_addr_t *addr) { - dma_addr_t mapping; - unsigned int i, j = 0, nfrags; + unsigned int i, j = 0, k = 0, nfrags; if (len) { - mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE); sgp->len[0] = cpu_to_be32(len); - sgp->addr[0] = cpu_to_be64(mapping); - j = 1; + sgp->addr[j++] = cpu_to_be64(addr[k++]); } nfrags = skb_shinfo(skb)->nr_frags; for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - mapping = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag), - DMA_TO_DEVICE); sgp->len[j] = cpu_to_be32(skb_frag_size(frag)); - sgp->addr[j] = cpu_to_be64(mapping); + sgp->addr[j] = cpu_to_be64(addr[k++]); j ^= 1; if (j == 0) ++sgp; @@ -1138,7 +1181,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, const struct port_info *pi, unsigned int pidx, unsigned int gen, struct sge_txq *q, unsigned int ndesc, - unsigned int compl) + unsigned int compl, const dma_addr_t *addr) { unsigned int flits, sgl_flits, cntrl, tso_info; struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1]; @@ -1196,7 +1239,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, } sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; - sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev); + sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr); write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen, htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl), @@ -1227,6 +1270,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) struct netdev_queue *txq; struct sge_qset *qs; struct sge_txq *q; + dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* * The chip min packet length is 9 octets but play safe and reject @@ -1255,6 +1299,14 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } + /* Check if ethernet packet can't be sent as immediate data */ + if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) { + if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + } + q->in_use += ndesc; if (unlikely(credits - ndesc < q->stop_thres)) { t3_stop_tx_queue(txq, qs, q); @@ -1312,7 +1364,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(!skb_shared(skb))) skb_orphan(skb); - write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl); + write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr); check_ring_tx_db(adap, q); return NETDEV_TX_OK; } @@ -1577,7 +1629,8 @@ static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev, */ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, struct sge_txq *q, unsigned int pidx, - unsigned int gen, unsigned int ndesc) + unsigned int gen, unsigned int ndesc, + const dma_addr_t *addr) { unsigned int sgl_flits, flits; struct work_request_hdr *from; @@ -1598,10 +1651,9 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, flits = skb_transport_offset(skb) / 8; sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; - sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb), - skb_tail_pointer(skb) - - skb_transport_header(skb), - adap->pdev); + sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb), + skb_tail_pointer(skb) - skb_transport_header(skb), + addr); if (need_skb_unmap()) { setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); skb->destructor = deferred_unmap_destructor; @@ -1659,6 +1711,12 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); goto again; } + if (!immediate(skb) && + map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) { + spin_unlock(&q->lock); + return NET_XMIT_SUCCESS; + } + gen = q->gen; q->in_use += ndesc; pidx = q->pidx; @@ -1669,7 +1727,7 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); } spin_unlock(&q->lock); - write_ofld_wr(adap, skb, q, pidx, gen, ndesc); + write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head); check_ring_tx_db(adap, q); return NET_XMIT_SUCCESS; } @@ -1687,6 +1745,7 @@ static void restart_offloadq(unsigned long data) struct sge_txq *q = &qs->txq[TXQ_OFLD]; const struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; + unsigned int written = 0; spin_lock(&q->lock); again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); @@ -1706,10 +1765,15 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); break; } + if (!immediate(skb) && + map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) + break; + gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; + written += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; @@ -1717,7 +1781,8 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); __skb_unlink(skb, &q->sendq); spin_unlock(&q->lock); - write_ofld_wr(adap, skb, q, pidx, gen, ndesc); + write_ofld_wr(adap, skb, q, pidx, gen, ndesc, + (dma_addr_t *)skb->head); spin_lock(&q->lock); } spin_unlock(&q->lock); @@ -1727,8 +1792,9 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif wmb(); - t3_write_reg(adap, A_SG_KDOORBELL, - F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); + if (likely(written)) + t3_write_reg(adap, A_SG_KDOORBELL, + F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /** @@ -2853,9 +2919,9 @@ void t3_sge_err_intr_handler(struct adapter *adapter) * bother cleaning them up here. * */ -static void sge_timer_tx(unsigned long data) +static void sge_timer_tx(struct timer_list *t) { - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0}; @@ -2893,10 +2959,10 @@ static void sge_timer_tx(unsigned long data) * starved. * */ -static void sge_timer_rx(unsigned long data) +static void sge_timer_rx(struct timer_list *t) { spinlock_t *lock; - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; u32 status; @@ -2976,8 +3042,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, struct sge_qset *q = &adapter->sge.qs[id]; init_qset_cntxt(q, id); - setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q); - setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q); + timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0); + timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0); q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, sizeof(struct rx_desc), diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 9b6aabe4f963..614db014ef18 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -741,9 +741,9 @@ err: return ret; } -static void ch_flower_stats_cb(unsigned long data) +static void ch_flower_stats_cb(struct timer_list *t) { - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, flower_stats_timer); struct ch_tc_flower_entry *flower_entry; struct ch_tc_flower_stats *ofld_stats; unsigned int i; @@ -815,8 +815,7 @@ err: void cxgb4_init_tc_flower(struct adapter *adap) { hash_init(adap->flower_anymatch_tbl); - setup_timer(&adap->flower_stats_timer, ch_flower_stats_cb, - (unsigned long)adap); + timer_setup(&adap->flower_stats_timer, ch_flower_stats_cb, 0); mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 4ef68f69b58c..486b01fe23bd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2583,11 +2583,11 @@ irq_handler_t t4_intr_handler(struct adapter *adap) return t4_intr_intx; } -static void sge_rx_timer_cb(unsigned long data) +static void sge_rx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.rx_timer); struct sge *s = &adap->sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -2620,11 +2620,11 @@ done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } -static void sge_tx_timer_cb(unsigned long data) +static void sge_tx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i, budget; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.tx_timer); struct sge *s = &adap->sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -3458,8 +3458,8 @@ int t4_sge_init(struct adapter *adap) /* Set up timers used for recuring callbacks to process RX and TX * administrative tasks. */ - setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); - setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); + timer_setup(&s->rx_timer, sge_rx_timer_cb, 0); + timer_setup(&s->tx_timer, sge_tx_timer_cb, 0); spin_lock_init(&s->intrq_lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 05498e7f2840..14d7e673c656 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2058,9 +2058,9 @@ irq_handler_t t4vf_intr_handler(struct adapter *adapter) * when out of memory a queue can become empty. We schedule NAPI to do * the actual refill. */ -static void sge_rx_timer_cb(unsigned long data) +static void sge_rx_timer_cb(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; + struct adapter *adapter = from_timer(adapter, t, sge.rx_timer); struct sge *s = &adapter->sge; unsigned int i; @@ -2117,9 +2117,9 @@ static void sge_rx_timer_cb(unsigned long data) * when no new packets are being submitted. This is essential for pktgen, * at least. */ -static void sge_tx_timer_cb(unsigned long data) +static void sge_tx_timer_cb(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; + struct adapter *adapter = from_timer(adapter, t, sge.tx_timer); struct sge *s = &adapter->sge; unsigned int i, budget; @@ -2676,8 +2676,8 @@ int t4vf_sge_init(struct adapter *adapter) /* * Set up tasklet timers. */ - setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adapter); - setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adapter); + timer_setup(&s->rx_timer, sge_rx_timer_cb, 0); + timer_setup(&s->tx_timer, sge_tx_timer_cb, 0); /* * Initialize Forwarded Interrupt Queue lock. diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 6ca9e981ad57..1a27176381fb 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -431,7 +431,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int mdio_wait_link(struct net_device *dev, int wait); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -913,7 +913,7 @@ static int netdev_open(struct net_device *dev) ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); /* Set the timer to check for link beat. */ - setup_timer(&np->timer, netdev_timer, (unsigned long)dev); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = jiffies + 3*HZ; add_timer(&np->timer); @@ -951,10 +951,10 @@ static void check_duplex(struct net_device *dev) } } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii_if.dev; void __iomem *ioaddr = np->base; int next_tick = 10*HZ; diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c8982313d850..23053919ebf5 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -426,8 +426,8 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int netdev_open(struct net_device *dev); static void getlinktype(struct net_device *dev); static void getlinkstatus(struct net_device *dev); -static void netdev_timer(unsigned long data); -static void reset_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); +static void reset_timer(struct timer_list *t); static void fealnx_tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -909,13 +909,13 @@ static int netdev_open(struct net_device *dev) printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name); /* Set the timer to check for link beat. */ - setup_timer(&np->timer, netdev_timer, (unsigned long)dev); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = RUN_AT(3 * HZ); /* timer handler */ add_timer(&np->timer); - setup_timer(&np->reset_timer, reset_timer, (unsigned long)dev); + timer_setup(&np->reset_timer, reset_timer, 0); np->reset_timer_armed = 0; return rc; } @@ -1078,10 +1078,10 @@ static void allocate_rx_buffers(struct net_device *dev) } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii.dev; void __iomem *ioaddr = np->mem; int old_crvalue = np->crvalue; unsigned int old_linkok = np->linkok; @@ -1167,10 +1167,10 @@ static void enable_rxtx(struct net_device *dev) } -static void reset_timer(unsigned long data) +static void reset_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, reset_timer); + struct net_device *dev = np->mii.dev; unsigned long flags; printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 7cecd9dbc111..ae195f8adff5 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -653,10 +653,10 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media) &lp->eth_regs->ethmac2); } -static void korina_poll_media(unsigned long data) +static void korina_poll_media(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct korina_private *lp = netdev_priv(dev); + struct korina_private *lp = from_timer(lp, t, media_check_timer); + struct net_device *dev = lp->dev; korina_check_media(dev, 0); mod_timer(&lp->media_check_timer, jiffies + HZ); @@ -1103,7 +1103,7 @@ static int korina_probe(struct platform_device *pdev) ": cannot register net device: %d\n", rc); goto probe_err_register; } - setup_timer(&lp->media_check_timer, korina_poll_media, (unsigned long) dev); + timer_setup(&lp->media_check_timer, korina_poll_media, 0); INIT_WORK(&lp->restart_task, korina_restart_task); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a89a68ce53ad..185dcac0abe7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -285,9 +285,9 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_unlock_irqrestore(&health->wq_lock, flags); } -static void poll_health(unsigned long data) +static void poll_health(struct timer_list *t) { - struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; + struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; u32 count; @@ -320,7 +320,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - setup_timer(&health->timer, poll_health, (unsigned long)dev); + timer_setup(&health->timer, poll_health, 0); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index dedeacd0bbca..b9a1a9f999ea 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -610,7 +610,7 @@ static int netdev_open(struct net_device *dev); static void do_cable_magic(struct net_device *dev); static void undo_cable_magic(struct net_device *dev); static void check_link(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void dump_ring(struct net_device *dev); static void ns_tx_timeout(struct net_device *dev); static int alloc_ring(struct net_device *dev); @@ -1571,7 +1571,7 @@ static int netdev_open(struct net_device *dev) dev->name, (int)readl(ioaddr + ChipCmd)); /* Set the timer to check for link beat. */ - setup_timer(&np->timer, netdev_timer, (unsigned long)dev); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); add_timer(&np->timer); @@ -1787,10 +1787,10 @@ static void init_registers(struct net_device *dev) * this check via dspcfg_workaround sysfs option. * 3) check of death of the RX path due to OOM */ -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->dev; void __iomem * ioaddr = ns_ioaddr(dev); int next_tick = NATSEMI_TIMER_FREQ; const int irq = np->pci_dev->irq; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 99d3c7884a4a..958fced4dacf 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1600,10 +1600,10 @@ static void ns83820_tx_timeout(struct net_device *ndev) spin_unlock_irqrestore(&dev->tx_lock, flags); } -static void ns83820_tx_watch(unsigned long data) +static void ns83820_tx_watch(struct timer_list *t) { - struct net_device *ndev = (void *)data; - struct ns83820 *dev = PRIV(ndev); + struct ns83820 *dev = from_timer(dev, t, tx_watchdog); + struct net_device *ndev = dev->ndev; #if defined(DEBUG) printk("ns83820_tx_watch: %u %u %d\n", @@ -1652,7 +1652,7 @@ static int ns83820_open(struct net_device *ndev) writel(0, dev->base + TXDP_HI); writel(desc, dev->base + TXDP); - setup_timer(&dev->tx_watchdog, ns83820_tx_watch, (unsigned long)ndev); + timer_setup(&dev->tx_watchdog, ns83820_tx_watch, 0); mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); netif_start_queue(ndev); /* FIXME: wait for phy to come up */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c index e6d2e06b050c..8b1b962cf1d1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c @@ -112,7 +112,13 @@ int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) writew(get_unaligned_be16(mac + 4), app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO); - return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + err = nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + if (!err) + nfp_info(app->pf->cpp, + "MAC %pM set on VF %d, reload the VF driver to make this change effective.\n", + mac, vf); + + return err; } int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 77bc7cca8980..c9529c29a0a7 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -413,13 +413,13 @@ that case. /* The rest of these values should never change. */ -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); enum capability_flags {CanHaveMII=1, }; static const struct chip_info { u16 vendor_id, device_id, device_id_mask, pad; const char *name; - void (*media_timer)(unsigned long data); + void (*media_timer)(struct timer_list *t); int flags; } chip_tbl[] = { {0x1318, 0x0911, 0xffff, 0, "Hamachi GNIC-II", hamachi_timer, 0}, @@ -547,7 +547,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int hamachi_open(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); static void hamachi_tx_timeout(struct net_device *dev); static void hamachi_init_ring(struct net_device *dev); static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, @@ -979,7 +979,7 @@ static int hamachi_open(struct net_device *dev) dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus)); } /* Set the timer to check for link beat. */ - setup_timer(&hmp->timer, hamachi_timer, (unsigned long)dev); + timer_setup(&hmp->timer, hamachi_timer, 0); hmp->timer.expires = RUN_AT((24*HZ)/10); /* 2.4 sec. */ add_timer(&hmp->timer); @@ -1017,10 +1017,10 @@ static inline int hamachi_tx(struct net_device *dev) return 0; } -static void hamachi_timer(unsigned long data) +static void hamachi_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct hamachi_private *hmp = netdev_priv(dev); + struct hamachi_private *hmp = from_timer(hmp, t, timer); + struct net_device *dev = hmp->mii_if.dev; void __iomem *ioaddr = hmp->base; int next_tick = 10*HZ; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index 33c241f52a71..54224d1822e3 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -343,7 +343,7 @@ static int mdio_read(void __iomem *ioaddr, int phy_id, int location); static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int yellowfin_open(struct net_device *dev); -static void yellowfin_timer(unsigned long data); +static void yellowfin_timer(struct timer_list *t); static void yellowfin_tx_timeout(struct net_device *dev); static int yellowfin_init_ring(struct net_device *dev); static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, @@ -632,7 +632,7 @@ static int yellowfin_open(struct net_device *dev) } /* Set the timer to check for link beat. */ - setup_timer(&yp->timer, yellowfin_timer, (unsigned long)dev); + timer_setup(&yp->timer, yellowfin_timer, 0); yp->timer.expires = jiffies + 3*HZ; add_timer(&yp->timer); out: @@ -643,10 +643,10 @@ err_free_irq: goto out; } -static void yellowfin_timer(unsigned long data) +static void yellowfin_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct yellowfin_private *yp = netdev_priv(dev); + struct yellowfin_private *yp = from_timer(yp, t, timer); + struct net_device *dev = pci_get_drvdata(yp->pci_dev); void __iomem *ioaddr = yp->base; int next_tick = 60*HZ; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 7dc4b6de31e6..fd218fd9ef3c 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -399,6 +399,12 @@ enum rtl_registers { RxMaxSize = 0xda, CPlusCmd = 0xe0, IntrMitigate = 0xe2, + +#define RTL_COALESCE_MASK 0x0f +#define RTL_COALESCE_SHIFT 4 +#define RTL_COALESCE_T_MAX (RTL_COALESCE_MASK) +#define RTL_COALESCE_FRAME_MAX (RTL_COALESCE_MASK << 2) + RxDescAddrLow = 0xe4, RxDescAddrHigh = 0xe8, EarlyTxThres = 0xec, /* 8169. Unit of 32 bytes. */ @@ -795,6 +801,7 @@ struct rtl8169_private { u16 cp_cmd; u16 event_slow; + const struct rtl_coalesce_info *coalesce_info; struct mdio_ops { void (*write)(struct rtl8169_private *, int, int); @@ -2363,10 +2370,229 @@ static int rtl8169_nway_reset(struct net_device *dev) return mii_nway_restart(&tp->mii); } +/* + * Interrupt coalescing + * + * > 1 - the availability of the IntrMitigate (0xe2) register through the + * > 8169, 8168 and 810x line of chipsets + * + * 8169, 8168, and 8136(810x) serial chipsets support it. + * + * > 2 - the Tx timer unit at gigabit speed + * + * The unit of the timer depends on both the speed and the setting of CPlusCmd + * (0xe0) bit 1 and bit 0. + * + * For 8169 + * bit[1:0] \ speed 1000M 100M 10M + * 0 0 320ns 2.56us 40.96us + * 0 1 2.56us 20.48us 327.7us + * 1 0 5.12us 40.96us 655.4us + * 1 1 10.24us 81.92us 1.31ms + * + * For the other + * bit[1:0] \ speed 1000M 100M 10M + * 0 0 5us 2.56us 40.96us + * 0 1 40us 20.48us 327.7us + * 1 0 80us 40.96us 655.4us + * 1 1 160us 81.92us 1.31ms + */ + +/* rx/tx scale factors for one particular CPlusCmd[0:1] value */ +struct rtl_coalesce_scale { + /* Rx / Tx */ + u32 nsecs[2]; +}; + +/* rx/tx scale factors for all CPlusCmd[0:1] cases */ +struct rtl_coalesce_info { + u32 speed; + struct rtl_coalesce_scale scalev[4]; /* each CPlusCmd[0:1] case */ +}; + +/* produce (r,t) pairs with each being in series of *1, *8, *8*2, *8*2*2 */ +#define rxtx_x1822(r, t) { \ + {{(r), (t)}}, \ + {{(r)*8, (t)*8}}, \ + {{(r)*8*2, (t)*8*2}}, \ + {{(r)*8*2*2, (t)*8*2*2}}, \ +} +static const struct rtl_coalesce_info rtl_coalesce_info_8169[] = { + /* speed delays: rx00 tx00 */ + { SPEED_10, rxtx_x1822(40960, 40960) }, + { SPEED_100, rxtx_x1822( 2560, 2560) }, + { SPEED_1000, rxtx_x1822( 320, 320) }, + { 0 }, +}; + +static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = { + /* speed delays: rx00 tx00 */ + { SPEED_10, rxtx_x1822(40960, 40960) }, + { SPEED_100, rxtx_x1822( 2560, 2560) }, + { SPEED_1000, rxtx_x1822( 5000, 5000) }, + { 0 }, +}; +#undef rxtx_x1822 + +/* get rx/tx scale vector corresponding to current speed */ +static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev) +{ + struct rtl8169_private *tp = netdev_priv(dev); + struct ethtool_link_ksettings ecmd; + const struct rtl_coalesce_info *ci; + int rc; + + rc = rtl8169_get_link_ksettings(dev, &ecmd); + if (rc < 0) + return ERR_PTR(rc); + + for (ci = tp->coalesce_info; ci->speed != 0; ci++) { + if (ecmd.base.speed == ci->speed) { + return ci; + } + } + + return ERR_PTR(-ELNRNG); +} + +static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + const struct rtl_coalesce_info *ci; + const struct rtl_coalesce_scale *scale; + struct { + u32 *max_frames; + u32 *usecs; + } coal_settings [] = { + { &ec->rx_max_coalesced_frames, &ec->rx_coalesce_usecs }, + { &ec->tx_max_coalesced_frames, &ec->tx_coalesce_usecs } + }, *p = coal_settings; + int i; + u16 w; + + memset(ec, 0, sizeof(*ec)); + + /* get rx/tx scale corresponding to current speed and CPlusCmd[0:1] */ + ci = rtl_coalesce_info(dev); + if (IS_ERR(ci)) + return PTR_ERR(ci); + + scale = &ci->scalev[RTL_R16(CPlusCmd) & 3]; + + /* read IntrMitigate and adjust according to scale */ + for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) { + *p->max_frames = (w & RTL_COALESCE_MASK) << 2; + w >>= RTL_COALESCE_SHIFT; + *p->usecs = w & RTL_COALESCE_MASK; + } + + for (i = 0; i < 2; i++) { + p = coal_settings + i; + *p->usecs = (*p->usecs * scale->nsecs[i]) / 1000; + + /* + * ethtool_coalesce says it is illegal to set both usecs and + * max_frames to 0. + */ + if (!*p->usecs && !*p->max_frames) + *p->max_frames = 1; + } + + return 0; +} + +/* choose appropriate scale factor and CPlusCmd[0:1] for (speed, nsec) */ +static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale( + struct net_device *dev, u32 nsec, u16 *cp01) +{ + const struct rtl_coalesce_info *ci; + u16 i; + + ci = rtl_coalesce_info(dev); + if (IS_ERR(ci)) + return ERR_CAST(ci); + + for (i = 0; i < 4; i++) { + u32 rxtx_maxscale = max(ci->scalev[i].nsecs[0], + ci->scalev[i].nsecs[1]); + if (nsec <= rxtx_maxscale * RTL_COALESCE_T_MAX) { + *cp01 = i; + return &ci->scalev[i]; + } + } + + return ERR_PTR(-EINVAL); +} + +static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + const struct rtl_coalesce_scale *scale; + struct { + u32 frames; + u32 usecs; + } coal_settings [] = { + { ec->rx_max_coalesced_frames, ec->rx_coalesce_usecs }, + { ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs } + }, *p = coal_settings; + u16 w = 0, cp01; + int i; + + scale = rtl_coalesce_choose_scale(dev, + max(p[0].usecs, p[1].usecs) * 1000, &cp01); + if (IS_ERR(scale)) + return PTR_ERR(scale); + + for (i = 0; i < 2; i++, p++) { + u32 units; + + /* + * accept max_frames=1 we returned in rtl_get_coalesce. + * accept it not only when usecs=0 because of e.g. the following scenario: + * + * - both rx_usecs=0 & rx_frames=0 in hardware (no delay on RX) + * - rtl_get_coalesce returns rx_usecs=0, rx_frames=1 + * - then user does `ethtool -C eth0 rx-usecs 100` + * + * since ethtool sends to kernel whole ethtool_coalesce + * settings, if we do not handle rx_usecs=!0, rx_frames=1 + * we'll reject it below in `frames % 4 != 0`. + */ + if (p->frames == 1) { + p->frames = 0; + } + + units = p->usecs * 1000 / scale->nsecs[i]; + if (p->frames > RTL_COALESCE_FRAME_MAX || p->frames % 4) + return -EINVAL; + + w <<= RTL_COALESCE_SHIFT; + w |= units; + w <<= RTL_COALESCE_SHIFT; + w |= p->frames >> 2; + } + + rtl_lock_work(tp); + + RTL_W16(IntrMitigate, swab16(w)); + + tp->cp_cmd = (tp->cp_cmd & ~3) | cp01; + RTL_W16(CPlusCmd, tp->cp_cmd); + RTL_R16(CPlusCmd); + + rtl_unlock_work(tp); + + return 0; +} + static const struct ethtool_ops rtl8169_ethtool_ops = { .get_drvinfo = rtl8169_get_drvinfo, .get_regs_len = rtl8169_get_regs_len, .get_link = ethtool_op_get_link, + .get_coalesce = rtl_get_coalesce, + .set_coalesce = rtl_set_coalesce, .set_settings = rtl8169_set_settings, .get_msglevel = rtl8169_get_msglevel, .set_msglevel = rtl8169_set_msglevel, @@ -8061,6 +8287,7 @@ static const struct rtl_cfg_info { unsigned int align; u16 event_slow; unsigned features; + const struct rtl_coalesce_info *coalesce_info; u8 default_ver; } rtl_cfg_infos [] = { [RTL_CFG_0] = { @@ -8069,6 +8296,7 @@ static const struct rtl_cfg_info { .align = 0, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver, .features = RTL_FEATURE_GMII, + .coalesce_info = rtl_coalesce_info_8169, .default_ver = RTL_GIGA_MAC_VER_01, }, [RTL_CFG_1] = { @@ -8077,6 +8305,7 @@ static const struct rtl_cfg_info { .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow, .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI, + .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_11, }, [RTL_CFG_2] = { @@ -8086,6 +8315,7 @@ static const struct rtl_cfg_info { .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver | PCSTimeout, .features = RTL_FEATURE_MSI, + .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_13, } }; @@ -8449,6 +8679,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->hw_start = cfg->hw_start; tp->event_slow = cfg->event_slow; + tp->coalesce_info = cfg->coalesce_info; tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ? ~(RxBOVF | RxFOVF) : ~0; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 2a9724898fcf..949aaef390b6 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -290,7 +290,7 @@ static int read_eeprom(struct epic_private *, int); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int loc, int val); static void epic_restart(struct net_device *dev); -static void epic_timer(unsigned long data); +static void epic_timer(struct timer_list *t); static void epic_tx_timeout(struct net_device *dev); static void epic_init_ring(struct net_device *dev); static netdev_tx_t epic_start_xmit(struct sk_buff *skb, @@ -739,7 +739,7 @@ static int epic_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - setup_timer(&ep->timer, epic_timer, (unsigned long)dev); + timer_setup(&ep->timer, epic_timer, 0); ep->timer.expires = jiffies + 3*HZ; add_timer(&ep->timer); @@ -843,10 +843,10 @@ static void check_media(struct net_device *dev) } } -static void epic_timer(unsigned long data) +static void epic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct epic_private *ep = netdev_priv(dev); + struct epic_private *ep = from_timer(ep, t, timer); + struct net_device *dev = ep->mii.dev; void __iomem *ioaddr = ep->ioaddr; int next_tick = 5*HZ; diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 92c927aec66d..a55f430f6a7b 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -280,7 +280,7 @@ static void set_rx_mode(struct net_device *dev); static int s9k_config(struct net_device *dev, struct ifmap *map); static void smc_set_xcvr(struct net_device *dev, int if_port); static void smc_reset(struct net_device *dev); -static void media_check(u_long arg); +static void media_check(struct timer_list *t); static void mdio_sync(unsigned int addr); static int mdio_read(struct net_device *dev, int phy_id, int loc); static void mdio_write(struct net_device *dev, int phy_id, int loc, int value); @@ -1070,7 +1070,7 @@ static int smc_open(struct net_device *dev) smc->packets_waiting = 0; smc_reset(dev); - setup_timer(&smc->media, media_check, (u_long)dev); + timer_setup(&smc->media, media_check, 0); mod_timer(&smc->media, jiffies + HZ); return 0; @@ -1708,10 +1708,10 @@ static void smc_reset(struct net_device *dev) ======================================================================*/ -static void media_check(u_long arg) +static void media_check(struct timer_list *t) { - struct net_device *dev = (struct net_device *) arg; - struct smc_private *smc = netdev_priv(dev); + struct smc_private *smc = from_timer(smc, t, media); + struct net_device *dev = smc->mii_if.dev; unsigned int ioaddr = dev->base_addr; u_short i, media, saved_bank; u_short link; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c7a894ead274..ff4fb5eae1af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3749,6 +3749,20 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return ret; } +static int stmmac_set_mac_address(struct net_device *ndev, void *addr) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = 0; + + ret = eth_mac_addr(ndev, addr); + if (ret) + return ret; + + priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0); + + return ret; +} + #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; @@ -3976,7 +3990,7 @@ static const struct net_device_ops stmmac_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = stmmac_set_mac_address, }; /** diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f4622e28db3a..141ba82b5efb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -142,6 +142,18 @@ struct netns_ipv4 { int sysctl_tcp_app_win; int sysctl_tcp_adv_win_scale; int sysctl_tcp_frto; + int sysctl_tcp_nometrics_save; + int sysctl_tcp_moderate_rcvbuf; + int sysctl_tcp_tso_win_divisor; + int sysctl_tcp_workaround_signed_windows; + int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_challenge_ack_limit; + int sysctl_tcp_min_tso_segs; + int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_autocorking; + int sysctl_tcp_invalid_ratelimit; + int sysctl_tcp_pacing_ss_ratio; + int sysctl_tcp_pacing_ca_ratio; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/tcp.h b/include/net/tcp.h index 18f047501f53..aa1cc90fdc02 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -45,9 +45,6 @@ #include <linux/seq_file.h> #include <linux/memcontrol.h> - -#include <linux/bpf.h> -#include <linux/filter.h> #include <linux/bpf-cgroup.h> extern struct inet_hashinfo tcp_hashinfo; @@ -247,22 +244,9 @@ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; -extern int sysctl_tcp_nometrics_save; -extern int sysctl_tcp_moderate_rcvbuf; -extern int sysctl_tcp_tso_win_divisor; -extern int sysctl_tcp_workaround_signed_windows; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ -extern int sysctl_tcp_limit_output_bytes; -extern int sysctl_tcp_challenge_ack_limit; -extern int sysctl_tcp_min_tso_segs; -extern int sysctl_tcp_min_rtt_wlen; -extern int sysctl_tcp_autocorking; -extern int sysctl_tcp_invalid_ratelimit; -extern int sysctl_tcp_pacing_ss_ratio; -extern int sysctl_tcp_pacing_ca_ratio; - extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern unsigned long tcp_memory_pressure; @@ -1305,7 +1289,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) } /* Determine a window scaling and initial window to offer. */ -void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, +void tcp_select_initial_window(const struct sock *sk, int __space, + __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fb61b6c79235..e732403669c6 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -506,8 +506,9 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, } static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, - int cmd, struct bridge_vlan_info *vinfo) + int cmd, struct bridge_vlan_info *vinfo, bool *changed) { + bool curr_change; int err = 0; switch (cmd) { @@ -516,22 +517,27 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, /* if the MASTER flag is set this will act on the global * per-VLAN entry as well */ - err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); - if (err) - break; + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags, + &curr_change); } else { vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; - err = br_vlan_add(br, vinfo->vid, vinfo->flags); + err = br_vlan_add(br, vinfo->vid, vinfo->flags, + &curr_change); } + if (curr_change) + *changed = true; break; case RTM_DELLINK: if (p) { - nbp_vlan_delete(p, vinfo->vid); - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - br_vlan_delete(p->br, vinfo->vid); - } else { - br_vlan_delete(br, vinfo->vid); + if (!nbp_vlan_delete(p, vinfo->vid)) + *changed = true; + + if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) && + !br_vlan_delete(p->br, vinfo->vid)) + *changed = true; + } else if (!br_vlan_delete(br, vinfo->vid)) { + *changed = true; } break; } @@ -542,7 +548,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, static int br_process_vlan_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct bridge_vlan_info *vinfo_curr, - struct bridge_vlan_info **vinfo_last) + struct bridge_vlan_info **vinfo_last, + bool *changed) { if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK) return -EINVAL; @@ -572,7 +579,7 @@ static int br_process_vlan_info(struct net_bridge *br, sizeof(struct bridge_vlan_info)); for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { tmp_vinfo.vid = v; - err = br_vlan_info(br, p, cmd, &tmp_vinfo); + err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed); if (err) break; } @@ -581,13 +588,13 @@ static int br_process_vlan_info(struct net_bridge *br, return err; } - return br_vlan_info(br, p, cmd, vinfo_curr); + return br_vlan_info(br, p, cmd, vinfo_curr, changed); } static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, - int cmd) + int cmd, bool *changed) { struct bridge_vlan_info *vinfo_curr = NULL; struct bridge_vlan_info *vinfo_last = NULL; @@ -607,7 +614,8 @@ static int br_afspec(struct net_bridge *br, return err; err = br_process_vlan_tunnel_info(br, p, cmd, &tinfo_curr, - &tinfo_last); + &tinfo_last, + changed); if (err) return err; break; @@ -616,7 +624,7 @@ static int br_afspec(struct net_bridge *br, return -EINVAL; vinfo_curr = nla_data(attr); err = br_process_vlan_info(br, p, cmd, vinfo_curr, - &vinfo_last); + &vinfo_last, changed); if (err) return err; break; @@ -804,6 +812,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; + bool changed = false; int err = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); @@ -839,14 +848,15 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) } if (err) goto out; + changed = true; } if (afspec) { err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_SETLINK); + afspec, RTM_SETLINK, &changed); } - if (err == 0) + if (changed) br_ifinfo_notify(RTM_NEWLINK, p); out: return err; @@ -857,6 +867,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { struct nlattr *afspec; struct net_bridge_port *p; + bool changed = false; int err = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -869,8 +880,8 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) return -EINVAL; err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_DELLINK); - if (err == 0) + afspec, RTM_DELLINK, &changed); + if (changed) /* Send RTM_NEWLINK because userspace * expects RTM_NEWLINK for vlan dels */ diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index 3712c7f0e00c..da8cb99fd259 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + }; static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, - u16 vid, u32 tun_id) + u16 vid, u32 tun_id, bool *changed) { int err = 0; @@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, switch (cmd) { case RTM_SETLINK: err = nbp_vlan_tunnel_info_add(p, vid, tun_id); + if (!err) + *changed = true; break; case RTM_DELLINK: - nbp_vlan_tunnel_info_delete(p, vid); + if (!nbp_vlan_tunnel_info_delete(p, vid)) + *changed = true; break; } @@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, int br_process_vlan_tunnel_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, - struct vtunnel_info *tinfo_last) + struct vtunnel_info *tinfo_last, + bool *changed) { int err; @@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, return -EINVAL; t = tinfo_last->tunid; for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { - err = br_vlan_tunnel_info(p, cmd, v, t); + err = br_vlan_tunnel_info(p, cmd, v, t, changed); if (err) return err; t++; @@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, if (tinfo_last->flags) return -EINVAL; err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, - tinfo_curr->tunid); + tinfo_curr->tunid, changed); if (err) return err; memset(tinfo_last, 0, sizeof(struct vtunnel_info)); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index fa0039f44818..860e4afaf71a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -803,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb); -int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); @@ -816,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); -int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); int nbp_vlan_init(struct net_bridge_port *port); @@ -903,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, return skb; } -static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed) { + *changed = false; return -EOPNOTSUPP; } @@ -926,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br) return 0; } -static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed) { + *changed = false; return -EOPNOTSUPP; } diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h index 4a447a378ab3..a259471bfd78 100644 --- a/net/bridge/br_private_tunnel.h +++ b/net/bridge/br_private_tunnel.h @@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, - struct vtunnel_info *tinfo_last); + struct vtunnel_info *tinfo_last, + bool *changed); int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); int br_fill_vlan_tunnel_info(struct sk_buff *skb, struct net_bridge_vlan_group *vg); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 233a30040c91..51935270c651 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid) return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); } -static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) +static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) { if (vg->pvid == vid) - return; + return false; smp_wmb(); vg->pvid = vid; + + return true; } -static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) +static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) { if (vg->pvid != vid) - return; + return false; smp_wmb(); vg->pvid = 0; + + return true; } -static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) +/* return true if anything changed, false otherwise */ +static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) { struct net_bridge_vlan_group *vg; + u16 old_flags = v->flags; + bool ret; if (br_vlan_is_master(v)) vg = br_vlan_group(v->br); @@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) vg = nbp_vlan_group(v->port); if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(vg, v->vid); + ret = __vlan_add_pvid(vg, v->vid); else - __vlan_delete_pvid(vg, v->vid); + ret = __vlan_delete_pvid(vg, v->vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; else v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; + + return ret || !!(old_flags ^ v->flags); } static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, @@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid vg = br_vlan_group(br); masterv = br_vlan_find(vg, vid); if (!masterv) { + bool changed; + /* missing global ctx, create it now */ - if (br_vlan_add(br, vid, 0)) + if (br_vlan_add(br, vid, 0, &changed)) return NULL; masterv = br_vlan_find(vg, vid); if (WARN_ON(!masterv)) @@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags) /* need to work on the master vlan too */ if (flags & BRIDGE_VLAN_INFO_MASTER) { - err = br_vlan_add(br, v->vid, flags | - BRIDGE_VLAN_INFO_BRENTRY); + bool changed; + + err = br_vlan_add(br, v->vid, + flags | BRIDGE_VLAN_INFO_BRENTRY, + &changed); if (err) goto out_filt; } @@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated */ -int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; @@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) ASSERT_RTNL(); + *changed = false; vg = br_vlan_group(br); vlan = br_vlan_find(vg, vid); if (vlan) { @@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) refcount_inc(&vlan->refcnt); vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans++; + *changed = true; } - __vlan_add_flags(vlan, flags); + if (__vlan_add_flags(vlan, flags)) + *changed = true; + return 0; } @@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) if (ret) { free_percpu(vlan->stats); kfree(vlan); + } else { + *changed = true; } return ret; @@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) const struct net_bridge_vlan *pvent; struct net_bridge_vlan_group *vg; struct net_bridge_port *p; + unsigned long *changed; + bool vlchange; u16 old_pvid; int err = 0; - unsigned long *changed; if (!pvid) { br_vlan_disable_default_pvid(br); @@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) err = br_vlan_add(br, pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange); if (err) goto out; br_vlan_delete(br, old_pvid); @@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) err = nbp_vlan_add(p, pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange); if (err) goto err_port; nbp_vlan_delete(p, old_pvid); @@ -890,7 +914,8 @@ err_port: if (old_pvid) nbp_vlan_add(p, old_pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange); nbp_vlan_delete(p, pvid); } @@ -899,7 +924,8 @@ err_port: br_vlan_add(br, old_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange); br_vlan_delete(br, pvid); } goto out; @@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br) { struct net_bridge_vlan_group *vg; int ret = -ENOMEM; + bool changed; vg = kzalloc(sizeof(*vg), GFP_KERNEL); if (!vg) @@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br) rcu_assign_pointer(br->vlgrp, vg); ret = br_vlan_add(br, 1, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, &changed); if (ret) goto err_vlan_add; @@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p) INIT_LIST_HEAD(&vg->vlan_list); rcu_assign_pointer(p->vlgrp, vg); if (p->br->default_pvid) { + bool changed; + ret = nbp_vlan_add(p, p->br->default_pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &changed); if (ret) goto err_vlan_add; } @@ -1016,8 +1046,10 @@ err_vlan_enabled: /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated */ -int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed) { struct switchdev_obj_port_vlan v = { .obj.orig_dev = port->dev, @@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) ASSERT_RTNL(); + *changed = false; vlan = br_vlan_find(nbp_vlan_group(port), vid); if (vlan) { /* Pass the flags to the hardware bridge */ ret = switchdev_port_obj_add(port->dev, &v.obj); if (ret && ret != -EOPNOTSUPP) return ret; - __vlan_add_flags(vlan, flags); + *changed = __vlan_add_flags(vlan, flags); + return 0; } @@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) ret = __vlan_add(vlan, flags); if (ret) kfree(vlan); + else + *changed = true; return ret; } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index fe0081730305..b8f2d9f7c3ed 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -68,37 +68,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { [DSA_TAG_PROTO_NONE] = &none_ops, }; -int dsa_cpu_dsa_setup(struct dsa_port *port) -{ - struct device_node *port_dn = port->dn; - struct dsa_switch *ds = port->ds; - struct phy_device *phydev; - int ret, mode; - - if (of_phy_is_fixed_link(port_dn)) { - ret = of_phy_register_fixed_link(port_dn); - if (ret) { - dev_err(ds->dev, "failed to register fixed PHY\n"); - return ret; - } - phydev = of_phy_find_device(port_dn); - - mode = of_get_phy_mode(port_dn); - if (mode < 0) - mode = PHY_INTERFACE_MODE_NA; - phydev->interface = mode; - - genphy_config_init(phydev); - genphy_read_status(phydev); - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port->index, phydev); - - put_device(&phydev->mdio.dev); - } - - return 0; -} - const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) { const struct dsa_device_ops *ops; @@ -113,14 +82,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) return ops; } -void dsa_cpu_dsa_destroy(struct dsa_port *port) -{ - struct device_node *port_dn = port->dn; - - if (of_phy_is_fixed_link(port_dn)) - of_phy_deregister_fixed_link(port_dn); -} - static int dev_is_class(struct device *dev, void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index ec58654a71cd..de91c48b6806 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -219,7 +219,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port) struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(port); + err = dsa_port_fixed_link_register_of(port); if (err) { dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", port->index, err); @@ -235,7 +235,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port) static void dsa_dsa_port_unapply(struct dsa_port *port) { devlink_port_unregister(&port->devlink_port); - dsa_cpu_dsa_destroy(port); + dsa_port_fixed_link_unregister_of(port); } static int dsa_cpu_port_apply(struct dsa_port *port) @@ -243,7 +243,7 @@ static int dsa_cpu_port_apply(struct dsa_port *port) struct dsa_switch *ds = port->ds; int err; - err = dsa_cpu_dsa_setup(port); + err = dsa_port_fixed_link_register_of(port); if (err) { dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", port->index, err); @@ -259,7 +259,7 @@ static int dsa_cpu_port_apply(struct dsa_port *port) static void dsa_cpu_port_unapply(struct dsa_port *port) { devlink_port_unregister(&port->devlink_port); - dsa_cpu_dsa_destroy(port); + dsa_port_fixed_link_unregister_of(port); } static int dsa_user_port_apply(struct dsa_port *port) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 1e9914062d0b..1e65afd6989e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -93,8 +93,6 @@ struct dsa_slave_priv { }; /* dsa.c */ -int dsa_cpu_dsa_setup(struct dsa_port *port); -void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); bool dsa_schedule_work(struct work_struct *work); @@ -159,6 +157,9 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); +int dsa_port_fixed_link_register_of(struct dsa_port *dp); +void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 93e1b116ef83..ed7aae342fca 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds) if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - ret = dsa_cpu_dsa_setup(&ds->ports[port]); + ret = dsa_port_fixed_link_register_of(&ds->ports[port]); if (ret) return ret; } @@ -274,7 +274,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dsa_cpu_dsa_destroy(&ds->ports[port]); + dsa_port_fixed_link_unregister_of(&ds->ports[port]); } if (ds->slave_mii_bus && ds->ops->phy_read) diff --git a/net/dsa/port.c b/net/dsa/port.c index 72c8dbd3d3f2..bb30b1a7de3a 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -12,6 +12,8 @@ #include <linux/if_bridge.h> #include <linux/notifier.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> #include "dsa_priv.h" @@ -264,3 +266,48 @@ int dsa_port_vlan_del(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); } + +int dsa_port_fixed_link_register_of(struct dsa_port *dp) +{ + struct device_node *dn = dp->dn; + struct dsa_switch *ds = dp->ds; + struct phy_device *phydev; + int port = dp->index; + int mode; + int err; + + if (of_phy_is_fixed_link(dn)) { + err = of_phy_register_fixed_link(dn); + if (err) { + dev_err(ds->dev, + "failed to register the fixed PHY of port %d\n", + port); + return err; + } + + phydev = of_phy_find_device(dn); + + mode = of_get_phy_mode(dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + phydev->interface = mode; + + genphy_config_init(phydev); + genphy_read_status(phydev); + + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); + + put_device(&phydev->mdio.dev); + } + + return 0; +} + +void dsa_port_fixed_link_unregister_of(struct dsa_port *dp) +{ + struct device_node *dn = dp->dn; + + if (of_phy_is_fixed_link(dn)) + of_phy_deregister_fixed_link(dn); +} diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 77cf32a80952..fda37f2862c9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); - tcp_select_initial_window(tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f1bcb9b7e082..4602af6d5358 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -438,13 +438,6 @@ static struct ctl_table ipv4_table[] = { .extra1 = &one, }, { - .procname = "tcp_min_rtt_wlen", - .data = &sysctl_tcp_min_rtt_wlen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), @@ -452,53 +445,11 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_no_metrics_save", - .data = &sysctl_tcp_nometrics_save, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_moderate_rcvbuf", - .data = &sysctl_tcp_moderate_rcvbuf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_tso_win_divisor", - .data = &sysctl_tcp_tso_win_divisor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { .procname = "tcp_congestion_control", .mode = 0644, .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, }, - { - .procname = "tcp_workaround_signed_windows", - .data = &sysctl_tcp_workaround_signed_windows, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_limit_output_bytes", - .data = &sysctl_tcp_limit_output_bytes, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_challenge_ack_limit", - .data = &sysctl_tcp_challenge_ack_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", @@ -542,49 +493,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_allowed_congestion_control, }, { - .procname = "tcp_min_tso_segs", - .data = &sysctl_tcp_min_tso_segs, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &gso_max_segs, - }, - { - .procname = "tcp_pacing_ss_ratio", - .data = &sysctl_tcp_pacing_ss_ratio, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &thousand, - }, - { - .procname = "tcp_pacing_ca_ratio", - .data = &sysctl_tcp_pacing_ca_ratio, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &thousand, - }, - { - .procname = "tcp_autocorking", - .data = &sysctl_tcp_autocorking, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, - }, - { - .procname = "tcp_invalid_ratelimit", - .data = &sysctl_tcp_invalid_ratelimit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { .procname = "tcp_available_ulp", .maxlen = TCP_ULP_BUF_MAX, .mode = 0444, @@ -1145,6 +1053,98 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_no_metrics_save", + .data = &init_net.ipv4.sysctl_tcp_nometrics_save, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_moderate_rcvbuf", + .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_tso_win_divisor", + .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_workaround_signed_windows", + .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_limit_output_bytes", + .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_challenge_ack_limit", + .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_min_tso_segs", + .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &gso_max_segs, + }, + { + .procname = "tcp_min_rtt_wlen", + .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_autocorking", + .data = &init_net.ipv4.sysctl_tcp_autocorking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "tcp_invalid_ratelimit", + .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "tcp_pacing_ss_ratio", + .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, + { + .procname = "tcp_pacing_ca_ratio", + .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c7c983f0f817..a7a0f316eb86 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -285,10 +285,6 @@ #include <trace/events/tcp.h> -int sysctl_tcp_min_tso_segs __read_mostly = 2; - -int sysctl_tcp_autocorking __read_mostly = 1; - struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -699,7 +695,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sysctl_tcp_autocorking && + sock_net(sk)->ipv4.sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 90d76f1c8f96..b62a7d1707ae 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -76,16 +76,9 @@ #include <asm/unaligned.h> #include <linux/errqueue.h> #include <trace/events/tcp.h> -#include <linux/unaligned/access_ok.h> #include <linux/static_key.h> -/* rfc5961 challenge ack rate limiting */ -int sysctl_tcp_challenge_ack_limit = 1000; - int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_min_rtt_wlen __read_mostly = 300; -int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -411,7 +404,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency * Allow enough cushion so that sender is not limited by our window */ - if (sysctl_tcp_moderate_rcvbuf) + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) rcvmem <<= 2; if (sk->sk_rcvbuf < rcvmem) @@ -602,7 +595,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * <prev RTT . ><current RTT .. ><next RTT .... > */ - if (sysctl_tcp_moderate_rcvbuf && + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvwin, rcvmem, rcvbuf; @@ -773,15 +766,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->srtt_us = max(1U, srtt); } -/* Set the sk_pacing_rate to allow proper sizing of TSO packets. - * Note: TCP stack does not yet implement pacing. - * FQ packet scheduler can be used to implement cheap but effective - * TCP pacing, to smooth the burst on large writes when packets - * in flight is significantly lower than cwnd (or rwin) - */ -int sysctl_tcp_pacing_ss_ratio __read_mostly = 200; -int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; - static void tcp_update_pacing_rate(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -799,9 +783,9 @@ static void tcp_update_pacing_rate(struct sock *sk) * end of slow start and should slow down. */ if (tp->snd_cwnd < tp->snd_ssthresh / 2) - rate *= sysctl_tcp_pacing_ss_ratio; + rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; else - rate *= sysctl_tcp_pacing_ca_ratio; + rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; rate *= max(tp->snd_cwnd, tp->packets_out); @@ -2919,8 +2903,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { + u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; struct tcp_sock *tp = tcp_sk(sk); - u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); @@ -3408,7 +3392,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } @@ -3444,10 +3428,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) static u32 challenge_timestamp; static unsigned int challenge_count; struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); u32 count, now; /* First check our per-socket dupack rate limit. */ - if (__tcp_oow_rate_limited(sock_net(sk), + if (__tcp_oow_rate_limited(net, LINUX_MIB_TCPACKSKIPPEDCHALLENGE, &tp->last_oow_ack_time)) return; @@ -3455,16 +3440,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; + u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; - WRITE_ONCE(challenge_count, half + - prandom_u32_max(sysctl_tcp_challenge_ack_limit)); + WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); } count = READ_ONCE(challenge_count); if (count > 0) { WRITE_ONCE(challenge_count, count - 1); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 49757c758211..7c1dae6493c3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2493,6 +2493,22 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_app_win = 31; net->ipv4.sysctl_tcp_adv_win_scale = 1; net->ipv4.sysctl_tcp_frto = 2; + net->ipv4.sysctl_tcp_moderate_rcvbuf = 1; + /* This limits the percentage of the congestion window which we + * will allow a single TSO frame to consume. Building TSO frames + * which are too large can cause TCP streams to be bursty. + */ + net->ipv4.sysctl_tcp_tso_win_divisor = 3; + /* Default TSQ limit of four TSO segments */ + net->ipv4.sysctl_tcp_limit_output_bytes = 262144; + /* rfc5961 challenge ack rate limiting */ + net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; + net->ipv4.sysctl_tcp_min_tso_segs = 2; + net->ipv4.sysctl_tcp_min_rtt_wlen = 300; + net->ipv4.sysctl_tcp_autocorking = 1; + net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; + net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; + net->ipv4.sysctl_tcp_pacing_ca_ratio = 120; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0ab78abc811b..0507b56b6d4b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -20,8 +20,6 @@ #include <net/tcp.h> #include <net/genetlink.h> -int sysctl_tcp_nometrics_save __read_mostly; - static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, const struct inetpeer_addr *daddr, struct net *net, unsigned int hash); @@ -330,7 +328,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (sysctl_tcp_nometrics_save || !dst) + if (net->ipv4.sysctl_tcp_nometrics_save || !dst) return; rcu_read_lock(); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3270ab8416ce..3c65c1a3f944 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -369,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, full_space = rcv_wnd * mss; /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(full_space, + tcp_select_initial_window(sk_listener, full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rsk_rcv_wnd, &req->rsk_window_clamp, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 55a0aa4b96df..aab6e7145013 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -45,20 +45,6 @@ #include <trace/events/tcp.h> -/* People can turn this on to work with those rare, broken TCPs that - * interpret the window field as a signed quantity. - */ -int sysctl_tcp_workaround_signed_windows __read_mostly = 0; - -/* Default TSQ limit of four TSO segments */ -int sysctl_tcp_limit_output_bytes __read_mostly = 262144; - -/* This limits the percentage of the congestion window which we - * will allow a single TSO frame to consume. Building TSO frames - * which are too large can cause TCP streams to be bursty. - */ -int sysctl_tcp_tso_win_divisor __read_mostly = 3; - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); @@ -202,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss) * be a multiple of mss if possible. We assume here that mss >= 1. * This MUST be enforced by all callers. */ -void tcp_select_initial_window(int __space, __u32 mss, +void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd) @@ -226,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. */ - if (sysctl_tcp_workaround_signed_windows) + if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = space; @@ -286,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk) /* Make sure we do not exceed the maximum possible * scaled window. */ - if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) + if (!tp->rx_opt.rcv_wscale && + sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); @@ -1771,7 +1758,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; return tso_segs ? : - tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); } /* Returns the portion of skb which can be sent right away */ @@ -1988,7 +1976,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; - win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); + win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor); if (win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); @@ -2225,7 +2213,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, unsigned int limit; limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); - limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); + limit = min_t(u32, limit, + sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor; if (refcount_read(&sk->sk_wmem_alloc) > limit) { @@ -3355,7 +3344,7 @@ static void tcp_connect_init(struct sock *sk) if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); - tcp_select_initial_window(tcp_full_space(sk), + tcp_select_initial_window(sk, tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 7835dea930b4..9f918a770f87 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -187,7 +187,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - struct in6_addr tmp_addr; int ret; if (opt->dsthao) { @@ -229,9 +228,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; - tmp_addr = ipv6h->saddr; - ipv6h->saddr = hao->addr; - hao->addr = tmp_addr; + swap(ipv6h->saddr, hao->addr); if (skb->tstamp == 0) __net_timestamp(skb); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 4e7817abc0b9..e7a3a6b6cf56 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index af22aa8ae35b..a1d56e143fcd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -322,8 +322,8 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); -static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, - struct l2tp_session *session) +int l2tp_session_register(struct l2tp_session *session, + struct l2tp_tunnel *tunnel) { struct l2tp_session *session_walk; struct hlist_head *g_head; @@ -371,6 +371,10 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, hlist_add_head(&session->hlist, head); write_unlock_bh(&tunnel->hlist_lock); + /* Ignore management session in session count value */ + if (session->session_id != 0) + atomic_inc(&l2tp_session_count); + return 0; err_tlock_pnlock: @@ -380,6 +384,7 @@ err_tlock: return err; } +EXPORT_SYMBOL_GPL(l2tp_session_register); /* Lookup a tunnel by id */ @@ -1788,7 +1793,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; - int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1846,17 +1850,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn refcount_set(&session->ref_count, 1); - err = l2tp_session_add_to_tunnel(tunnel, session); - if (err) { - kfree(session); - - return ERR_PTR(err); - } - - /* Ignore management session in session count value */ - if (session->session_id != 0) - atomic_inc(&l2tp_session_count); - return session; } diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 67c79d9b5c6c..77caa5966736 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,9 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +int l2tp_session_register(struct l2tp_session *session, + struct l2tp_tunnel *tunnel); + void __l2tp_session_unhash(struct l2tp_session *session); int l2tp_session_delete(struct l2tp_session *session); void l2tp_session_free(struct l2tp_session *session); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 014a7bc2a872..d29bfee291cb 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -54,7 +54,7 @@ struct l2tp_eth { /* via l2tp_session_priv() */ struct l2tp_eth_sess { - struct net_device *dev; + struct net_device __rcu *dev; }; @@ -72,7 +72,14 @@ static int l2tp_eth_dev_init(struct net_device *dev) static void l2tp_eth_dev_uninit(struct net_device *dev) { - dev_put(dev); + struct l2tp_eth *priv = netdev_priv(dev); + struct l2tp_eth_sess *spriv; + + spriv = l2tp_session_priv(priv->session); + RCU_INIT_POINTER(spriv->dev, NULL); + /* No need for synchronize_net() here. We're called by + * unregister_netdev*(), which does the synchronisation for us. + */ } static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) @@ -130,8 +137,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev) static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) { struct l2tp_eth_sess *spriv = l2tp_session_priv(session); - struct net_device *dev = spriv->dev; - struct l2tp_eth *priv = netdev_priv(dev); + struct net_device *dev; + struct l2tp_eth *priv; if (session->debug & L2TP_MSG_DATA) { unsigned int length; @@ -155,16 +162,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, skb_dst_drop(skb); nf_reset(skb); + rcu_read_lock(); + dev = rcu_dereference(spriv->dev); + if (!dev) + goto error_rcu; + + priv = netdev_priv(dev); if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { atomic_long_inc(&priv->rx_packets); atomic_long_add(data_len, &priv->rx_bytes); } else { atomic_long_inc(&priv->rx_errors); } + rcu_read_unlock(); + return; +error_rcu: + rcu_read_unlock(); error: - atomic_long_inc(&priv->rx_errors); kfree_skb(skb); } @@ -175,11 +191,15 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (session) { spriv = l2tp_session_priv(session); - dev = spriv->dev; + + rtnl_lock(); + dev = rtnl_dereference(spriv->dev); if (dev) { - unregister_netdev(dev); - spriv->dev = NULL; + unregister_netdevice(dev); + rtnl_unlock(); module_put(THIS_MODULE); + } else { + rtnl_unlock(); } } } @@ -189,9 +209,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; struct l2tp_eth_sess *spriv = l2tp_session_priv(session); - struct net_device *dev = spriv->dev; + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(spriv->dev); + if (!dev) { + rcu_read_unlock(); + return; + } + dev_hold(dev); + rcu_read_unlock(); seq_printf(m, " interface %s\n", dev->name); + + dev_put(dev); } #endif @@ -268,14 +299,14 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, peer_session_id, cfg); if (IS_ERR(session)) { rc = PTR_ERR(session); - goto out; + goto err; } dev = alloc_netdev(sizeof(*priv), name, name_assign_type, l2tp_eth_dev_setup); if (!dev) { rc = -ENOMEM; - goto out_del_session; + goto err_sess; } dev_net_set(dev, net); @@ -295,26 +326,48 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, #endif spriv = l2tp_session_priv(session); - spriv->dev = dev; - rc = register_netdev(dev); - if (rc < 0) - goto out_del_dev; + l2tp_session_inc_refcount(session); + + rtnl_lock(); + + /* Register both device and session while holding the rtnl lock. This + * ensures that l2tp_eth_delete() will see that there's a device to + * unregister, even if it happened to run before we assign spriv->dev. + */ + rc = l2tp_session_register(session, tunnel); + if (rc < 0) { + rtnl_unlock(); + goto err_sess_dev; + } + + rc = register_netdevice(dev); + if (rc < 0) { + rtnl_unlock(); + l2tp_session_delete(session); + l2tp_session_dec_refcount(session); + free_netdev(dev); + + return rc; + } - __module_get(THIS_MODULE); - /* Must be done after register_netdev() */ strlcpy(session->ifname, dev->name, IFNAMSIZ); + rcu_assign_pointer(spriv->dev, dev); - dev_hold(dev); + rtnl_unlock(); + + l2tp_session_dec_refcount(session); + + __module_get(THIS_MODULE); return 0; -out_del_dev: +err_sess_dev: + l2tp_session_dec_refcount(session); free_netdev(dev); - spriv->dev = NULL; -out_del_session: - l2tp_session_delete(session); -out: +err_sess: + kfree(session); +err: return rc; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f50452b919d5..845aba543dce 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -122,8 +122,11 @@ struct pppol2tp_session { int owner; /* pid that opened the socket */ - struct sock *sock; /* Pointer to the session + struct mutex sk_lock; /* Protects .sk */ + struct sock __rcu *sk; /* Pointer to the session * PPPoX socket */ + struct sock *__sk; /* Copy of .sk, for cleanup */ + struct rcu_head rcu; /* For asynchronous release */ struct sock *tunnel_sock; /* Pointer to the tunnel UDP * socket */ int flags; /* accessed by PPPIOCGFLAGS. @@ -138,6 +141,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = { static const struct proto_ops pppol2tp_ops; +/* Retrieves the pppol2tp socket associated to a session. + * A reference is held on the returned socket, so this function must be paired + * with sock_put(). + */ +static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session) +{ + struct pppol2tp_session *ps = l2tp_session_priv(session); + struct sock *sk; + + rcu_read_lock(); + sk = rcu_dereference(ps->sk); + if (sk) + sock_hold(sk); + rcu_read_unlock(); + + return sk; +} + /* Helpers to obtain tunnel/session contexts from sockets. */ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) @@ -224,7 +245,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int /* If the socket is bound, send it in to PPP's input queue. Otherwise * queue it on the session socket. */ - sk = ps->sock; + rcu_read_lock(); + sk = rcu_dereference(ps->sk); if (sk == NULL) goto no_sock; @@ -247,30 +269,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int kfree_skb(skb); } } + rcu_read_unlock(); return; no_sock: + rcu_read_unlock(); l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name); kfree_skb(skb); } -static void pppol2tp_session_sock_hold(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock) - sock_hold(ps->sock); -} - -static void pppol2tp_session_sock_put(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock) - sock_put(ps->sock); -} - /************************************************************************ * Transmit handling ***********************************************************************/ @@ -431,17 +439,16 @@ abort: */ static void pppol2tp_session_close(struct l2tp_session *session) { - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct sock *sk = ps->sock; - struct socket *sock = sk->sk_socket; + struct sock *sk; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (sock) - inet_shutdown(sock, SEND_SHUTDOWN); - - /* Don't let the session go away before our socket does */ - l2tp_session_inc_refcount(session); + sk = pppol2tp_session_get_sock(session); + if (sk) { + if (sk->sk_socket) + inet_shutdown(sk->sk_socket, SEND_SHUTDOWN); + sock_put(sk); + } } /* Really kill the session socket. (Called from sock_put() if @@ -461,6 +468,14 @@ static void pppol2tp_session_destruct(struct sock *sk) } } +static void pppol2tp_put_sk(struct rcu_head *head) +{ + struct pppol2tp_session *ps; + + ps = container_of(head, typeof(*ps), rcu); + sock_put(ps->__sk); +} + /* Called when the PPPoX socket (session) is closed. */ static int pppol2tp_release(struct socket *sock) @@ -486,11 +501,23 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); - /* Purge any queued data */ if (session != NULL) { - __l2tp_session_unhash(session); - l2tp_session_queue_purge(session); - sock_put(sk); + struct pppol2tp_session *ps; + + l2tp_session_delete(session); + + ps = l2tp_session_priv(session); + mutex_lock(&ps->sk_lock); + ps->__sk = rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock)); + RCU_INIT_POINTER(ps->sk, NULL); + mutex_unlock(&ps->sk_lock); + call_rcu(&ps->rcu, pppol2tp_put_sk); + + /* Rely on the sock_put() call at the end of the function for + * dropping the reference held by pppol2tp_sock_to_session(). + * The last reference will be dropped by pppol2tp_put_sk(). + */ } release_sock(sk); @@ -557,16 +584,47 @@ out: static void pppol2tp_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; - struct pppol2tp_session *ps = l2tp_session_priv(session); + struct sock *sk; + + sk = pppol2tp_session_get_sock(session); + if (sk) { + struct pppox_sock *po = pppox_sk(sk); - if (ps) { - struct pppox_sock *po = pppox_sk(ps->sock); - if (po) - seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + sock_put(sk); } } #endif +static void pppol2tp_session_init(struct l2tp_session *session) +{ + struct pppol2tp_session *ps; + struct dst_entry *dst; + + session->recv_skb = pppol2tp_recv; + session->session_close = pppol2tp_session_close; +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) + session->show = pppol2tp_show; +#endif + + ps = l2tp_session_priv(session); + mutex_init(&ps->sk_lock); + ps->tunnel_sock = session->tunnel->sock; + ps->owner = current->pid; + + /* If PMTU discovery was enabled, use the MTU that was discovered */ + dst = sk_dst_get(session->tunnel->sock); + if (dst) { + u32 pmtu = dst_mtu(dst); + + if (pmtu) { + session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD; + session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD; + } + dst_release(dst); + } +} + /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket */ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, @@ -578,7 +636,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, struct l2tp_session *session = NULL; struct l2tp_tunnel *tunnel; struct pppol2tp_session *ps; - struct dst_entry *dst; struct l2tp_session_cfg cfg = { 0, }; int error = 0; u32 tunnel_id, peer_tunnel_id; @@ -693,13 +750,17 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, /* Using a pre-existing session is fine as long as it hasn't * been connected yet. */ - if (ps->sock) { + mutex_lock(&ps->sk_lock); + if (rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock))) { + mutex_unlock(&ps->sk_lock); error = -EEXIST; goto end; } /* consistency checks */ if (ps->tunnel_sock != tunnel->sock) { + mutex_unlock(&ps->sk_lock); error = -EEXIST; goto end; } @@ -715,35 +776,19 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = PTR_ERR(session); goto end; } - } - - /* Associate session with its PPPoL2TP socket */ - ps = l2tp_session_priv(session); - ps->owner = current->pid; - ps->sock = sk; - ps->tunnel_sock = tunnel->sock; - - session->recv_skb = pppol2tp_recv; - session->session_close = pppol2tp_session_close; -#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) - session->show = pppol2tp_show; -#endif - - /* We need to know each time a skb is dropped from the reorder - * queue. - */ - session->ref = pppol2tp_session_sock_hold; - session->deref = pppol2tp_session_sock_put; - /* If PMTU discovery was enabled, use the MTU that was discovered */ - dst = sk_dst_get(tunnel->sock); - if (dst != NULL) { - u32 pmtu = dst_mtu(dst); + pppol2tp_session_init(session); + ps = l2tp_session_priv(session); + l2tp_session_inc_refcount(session); - if (pmtu != 0) - session->mtu = session->mru = pmtu - - PPPOL2TP_HEADER_OVERHEAD; - dst_release(dst); + mutex_lock(&ps->sk_lock); + error = l2tp_session_register(session, tunnel); + if (error < 0) { + mutex_unlock(&ps->sk_lock); + kfree(session); + goto end; + } + drop_refcnt = true; } /* Special case: if source & dest session_id == 0x0000, this @@ -768,12 +813,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.mtu = session->mtu; error = ppp_register_net_channel(sock_net(sk), &po->chan); - if (error) + if (error) { + mutex_unlock(&ps->sk_lock); goto end; + } out_no_ppp: /* This is how we get the session context from the socket. */ sk->sk_user_data = session; + rcu_assign_pointer(ps->sk, sk); + mutex_unlock(&ps->sk_lock); + + /* Keep the reference we've grabbed on the session: sk doesn't expect + * the session to disappear. pppol2tp_session_destruct() is responsible + * for dropping it. + */ + drop_refcnt = false; + sk->sk_state = PPPOX_CONNECTED; l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", session->name); @@ -795,12 +851,11 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, { int error; struct l2tp_session *session; - struct pppol2tp_session *ps; /* Error if tunnel socket is not prepped */ if (!tunnel->sock) { error = -ENOENT; - goto out; + goto err; } /* Default MTU values. */ @@ -815,18 +870,20 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, peer_session_id, cfg); if (IS_ERR(session)) { error = PTR_ERR(session); - goto out; + goto err; } - ps = l2tp_session_priv(session); - ps->tunnel_sock = tunnel->sock; + pppol2tp_session_init(session); - l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", - session->name); + error = l2tp_session_register(session, tunnel); + if (error < 0) + goto err_sess; - error = 0; + return 0; -out: +err_sess: + kfree(session); +err: return error; } @@ -987,12 +1044,10 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session, "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", session->name, cmd, arg); - sk = ps->sock; + sk = pppol2tp_session_get_sock(session); if (!sk) return -EBADR; - sock_hold(sk); - switch (cmd) { case SIOCGIFMTU: err = -ENXIO; @@ -1268,7 +1323,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk, int optname, int val) { int err = 0; - struct pppol2tp_session *ps = l2tp_session_priv(session); switch (optname) { case PPPOL2TP_SO_RECVSEQ: @@ -1289,8 +1343,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk, } session->send_seq = !!val; { - struct sock *ssk = ps->sock; - struct pppox_sock *po = pppox_sk(ssk); + struct pppox_sock *po = pppox_sk(sk); + po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } @@ -1629,8 +1683,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) { struct l2tp_session *session = v; struct l2tp_tunnel *tunnel = session->tunnel; - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct pppox_sock *po = pppox_sk(ps->sock); + unsigned char state; + char user_data_ok; + struct sock *sk; u32 ip = 0; u16 port = 0; @@ -1640,6 +1695,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) port = ntohs(inet->inet_sport); } + sk = pppol2tp_session_get_sock(session); + if (sk) { + state = sk->sk_state; + user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N'; + } else { + state = 0; + user_data_ok = 'N'; + } + seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> " "%04X/%04X %d %c\n", session->name, ip, port, @@ -1647,9 +1711,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->session_id, tunnel->peer_tunnel_id, session->peer_session_id, - ps->sock->sk_state, - (session == ps->sock->sk_user_data) ? - 'Y' : 'N'); + state, user_data_ok); seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n", session->mtu, session->mru, session->recv_seq ? 'R' : '-', @@ -1666,8 +1728,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) atomic_long_read(&session->stats.rx_bytes), atomic_long_read(&session->stats.rx_errors)); - if (po) + if (sk) { + struct pppox_sock *po = pppox_sk(sk); + seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + sock_put(sk); + } } static int pppol2tp_seq_show(struct seq_file *m, void *v) |