diff options
Diffstat (limited to 'net')
46 files changed, 1956 insertions, 1552 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b41375d4d295..15787e8c0629 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -57,6 +57,7 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <net/route.h> +#include <net/compat.h> #include <linux/atalk.h> #include <linux/highmem.h> @@ -867,6 +868,24 @@ static int atif_ioctl(int cmd, void __user *arg) return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0; } +static int atrtr_ioctl_addrt(struct rtentry *rt) +{ + struct net_device *dev = NULL; + + if (rt->rt_dev) { + char name[IFNAMSIZ]; + + if (copy_from_user(name, rt->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + name[IFNAMSIZ-1] = '\0'; + + dev = __dev_get_by_name(&init_net, name); + if (!dev) + return -ENODEV; + } + return atrtr_create(rt, dev); +} + /* Routing ioctl() calls */ static int atrtr_ioctl(unsigned int cmd, void __user *arg) { @@ -882,19 +901,8 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) return atrtr_delete(&((struct sockaddr_at *) &rt.rt_dst)->sat_addr); - case SIOCADDRT: { - struct net_device *dev = NULL; - if (rt.rt_dev) { - char name[IFNAMSIZ]; - if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) - return -EFAULT; - name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(&init_net, name); - if (!dev) - return -ENODEV; - } - return atrtr_create(&rt, dev); - } + case SIOCADDRT: + return atrtr_ioctl_addrt(&rt); } return -EINVAL; } @@ -1832,20 +1840,58 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #ifdef CONFIG_COMPAT +static int atalk_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_rtentry __user *ur) +{ + compat_uptr_t rtdev; + struct rtentry rt; + + if (copy_from_user(&rt.rt_dst, &ur->rt_dst, + 3 * sizeof(struct sockaddr)) || + get_user(rt.rt_flags, &ur->rt_flags) || + get_user(rt.rt_metric, &ur->rt_metric) || + get_user(rt.rt_mtu, &ur->rt_mtu) || + get_user(rt.rt_window, &ur->rt_window) || + get_user(rt.rt_irtt, &ur->rt_irtt) || + get_user(rtdev, &ur->rt_dev)) + return -EFAULT; + + switch (cmd) { + case SIOCDELRT: + if (rt.rt_dst.sa_family != AF_APPLETALK) + return -EINVAL; + return atrtr_delete(&((struct sockaddr_at *) + &rt.rt_dst)->sat_addr); + + case SIOCADDRT: + rt.rt_dev = compat_ptr(rtdev); + return atrtr_ioctl_addrt(&rt); + default: + return -EINVAL; + } +} static int atalk_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return atalk_compat_routing_ioctl(sk, cmd, argp); /* * SIOCATALKDIFADDR is a SIOCPROTOPRIVATE ioctl number, so we * cannot handle it in common code. The data we access if ifreq * here is compatible, so we can simply call the native * handler. */ - if (cmd == SIOCATALKDIFADDR) - return atalk_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); - - return -ENOIOCTLCMD; + case SIOCATALKDIFADDR: + return atalk_ioctl(sock, cmd, (unsigned long)argp); + default: + return -ENOIOCTLCMD; + } } -#endif +#endif /* CONFIG_COMPAT */ static const struct net_proto_family atalk_family_ops = { diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index d955b683aa7c..838ebf0cabbf 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -56,6 +56,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, int error; struct list_head *pos; void __user *argp = (void __user *)arg; + void __user *buf; + int __user *len; vcc = ATM_SD(sock); switch (cmd) { @@ -162,7 +164,49 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, if (error != -ENOIOCTLCMD) goto done; - error = atm_dev_ioctl(cmd, argp, compat); + if (cmd == ATM_GETNAMES) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { +#ifdef CONFIG_COMPAT + struct compat_atm_iobuf __user *ciobuf = argp; + compat_uptr_t cbuf; + len = &ciobuf->length; + if (get_user(cbuf, &ciobuf->buffer)) + return -EFAULT; + buf = compat_ptr(cbuf); +#endif + } else { + struct atm_iobuf __user *iobuf = argp; + len = &iobuf->length; + if (get_user(buf, &iobuf->buffer)) + return -EFAULT; + } + error = atm_getnames(buf, len); + } else { + int number; + + if (IS_ENABLED(CONFIG_COMPAT) && compat) { +#ifdef CONFIG_COMPAT + struct compat_atmif_sioc __user *csioc = argp; + compat_uptr_t carg; + + len = &csioc->length; + if (get_user(carg, &csioc->arg)) + return -EFAULT; + buf = compat_ptr(carg); + if (get_user(number, &csioc->number)) + return -EFAULT; +#endif + } else { + struct atmif_sioc __user *sioc = argp; + + len = &sioc->length; + if (get_user(buf, &sioc->arg)) + return -EFAULT; + if (get_user(number, &sioc->number)) + return -EFAULT; + } + error = atm_dev_ioctl(cmd, buf, len, number, compat); + } done: return error; @@ -230,61 +274,25 @@ static struct { static int do_atm_iobuf(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct atm_iobuf __user *iobuf; - struct compat_atm_iobuf __user *iobuf32; + struct compat_atm_iobuf __user *iobuf32 = compat_ptr(arg); u32 data; - void __user *datap; - int len, err; - - iobuf = compat_alloc_user_space(sizeof(*iobuf)); - iobuf32 = compat_ptr(arg); - if (get_user(len, &iobuf32->length) || - get_user(data, &iobuf32->buffer)) + if (get_user(data, &iobuf32->buffer)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(len, &iobuf->length) || - put_user(datap, &iobuf->buffer)) - return -EFAULT; - - err = do_vcc_ioctl(sock, cmd, (unsigned long) iobuf, 0); - if (!err) { - if (copy_in_user(&iobuf32->length, &iobuf->length, - sizeof(int))) - err = -EFAULT; - } - - return err; + return atm_getnames(&iobuf32->length, compat_ptr(data)); } static int do_atmif_sioc(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct atmif_sioc __user *sioc; - struct compat_atmif_sioc __user *sioc32; + struct compat_atmif_sioc __user *sioc32 = compat_ptr(arg); + int number; u32 data; - void __user *datap; - int err; - - sioc = compat_alloc_user_space(sizeof(*sioc)); - sioc32 = compat_ptr(arg); - if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) || - get_user(data, &sioc32->arg)) + if (get_user(data, &sioc32->arg) || get_user(number, &sioc32->number)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &sioc->arg)) - return -EFAULT; - - err = do_vcc_ioctl(sock, cmd, (unsigned long) sioc, 0); - - if (!err) { - if (copy_in_user(&sioc32->length, &sioc->length, - sizeof(int))) - err = -EFAULT; - } - return err; + return atm_dev_ioctl(cmd, compat_ptr(data), &sioc32->length, number, 0); } static int do_atm_ioctl(struct socket *sock, unsigned int cmd32, diff --git a/net/atm/resources.c b/net/atm/resources.c index 889349c6d90d..94bdc6527ee8 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -193,88 +193,48 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, return error ? -EFAULT : 0; } -int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) +int atm_getnames(void __user *buf, int __user *iobuf_len) { - void __user *buf; - int error, len, number, size = 0; + int error, len, size = 0; struct atm_dev *dev; struct list_head *p; int *tmp_buf, *tmp_p; - int __user *sioc_len; - int __user *iobuf_len; - switch (cmd) { - case ATM_GETNAMES: - if (IS_ENABLED(CONFIG_COMPAT) && compat) { -#ifdef CONFIG_COMPAT - struct compat_atm_iobuf __user *ciobuf = arg; - compat_uptr_t cbuf; - iobuf_len = &ciobuf->length; - if (get_user(cbuf, &ciobuf->buffer)) - return -EFAULT; - buf = compat_ptr(cbuf); -#endif - } else { - struct atm_iobuf __user *iobuf = arg; - iobuf_len = &iobuf->length; - if (get_user(buf, &iobuf->buffer)) - return -EFAULT; - } - if (get_user(len, iobuf_len)) - return -EFAULT; - mutex_lock(&atm_dev_mutex); - list_for_each(p, &atm_devs) - size += sizeof(int); - if (size > len) { - mutex_unlock(&atm_dev_mutex); - return -E2BIG; - } - tmp_buf = kmalloc(size, GFP_ATOMIC); - if (!tmp_buf) { - mutex_unlock(&atm_dev_mutex); - return -ENOMEM; - } - tmp_p = tmp_buf; - list_for_each(p, &atm_devs) { - dev = list_entry(p, struct atm_dev, dev_list); - *tmp_p++ = dev->number; - } + if (get_user(len, iobuf_len)) + return -EFAULT; + mutex_lock(&atm_dev_mutex); + list_for_each(p, &atm_devs) + size += sizeof(int); + if (size > len) { mutex_unlock(&atm_dev_mutex); - error = ((copy_to_user(buf, tmp_buf, size)) || - put_user(size, iobuf_len)) - ? -EFAULT : 0; - kfree(tmp_buf); - return error; - default: - break; + return -E2BIG; } - - if (IS_ENABLED(CONFIG_COMPAT) && compat) { -#ifdef CONFIG_COMPAT - struct compat_atmif_sioc __user *csioc = arg; - compat_uptr_t carg; - - sioc_len = &csioc->length; - if (get_user(carg, &csioc->arg)) - return -EFAULT; - buf = compat_ptr(carg); - - if (get_user(len, &csioc->length)) - return -EFAULT; - if (get_user(number, &csioc->number)) - return -EFAULT; -#endif - } else { - struct atmif_sioc __user *sioc = arg; - - sioc_len = &sioc->length; - if (get_user(buf, &sioc->arg)) - return -EFAULT; - if (get_user(len, &sioc->length)) - return -EFAULT; - if (get_user(number, &sioc->number)) - return -EFAULT; + tmp_buf = kmalloc(size, GFP_ATOMIC); + if (!tmp_buf) { + mutex_unlock(&atm_dev_mutex); + return -ENOMEM; + } + tmp_p = tmp_buf; + list_for_each(p, &atm_devs) { + dev = list_entry(p, struct atm_dev, dev_list); + *tmp_p++ = dev->number; } + mutex_unlock(&atm_dev_mutex); + error = ((copy_to_user(buf, tmp_buf, size)) || + put_user(size, iobuf_len)) + ? -EFAULT : 0; + kfree(tmp_buf); + return error; +} + +int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len, + int number, int compat) +{ + int error, len, size = 0; + struct atm_dev *dev; + + if (get_user(len, sioc_len)) + return -EFAULT; dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d", number); diff --git a/net/atm/resources.h b/net/atm/resources.h index 048232e4d4c6..4a0839e92ff3 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -14,8 +14,9 @@ extern struct list_head atm_devs; extern struct mutex atm_dev_mutex; -int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat); - +int atm_getnames(void __user *buf, int __user *iobuf_len); +int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len, + int number, int compat); #ifdef CONFIG_PROC_FS diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index ccb535c77e5d..8bdabc03b0b2 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -135,9 +135,6 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf, if (!buf || count < sizeof(struct batadv_icmp_packet)) return -EINVAL; - if (!access_ok(buf, count)) - return -EFAULT; - error = wait_event_interruptible(socket_client->queue_wait, socket_client->queue_len); diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index d7bc09de4c13..528d767eb026 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -37,6 +37,26 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id) return res; } +static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) +{ + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + struct net_bridge_port *p; + + p = rtnl_dereference(mrp->p_port); + if (p && p->dev->ifindex == ifindex) + return false; + + p = rtnl_dereference(mrp->s_port); + if (p && p->dev->ifindex == ifindex) + return false; + } + + return true; +} + static struct br_mrp *br_mrp_find_port(struct net_bridge *br, struct net_bridge_port *p) { @@ -203,6 +223,7 @@ out: static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) { struct net_bridge_port *p; + u8 state; /* Stop sending MRP_Test frames */ cancel_delayed_work_sync(&mrp->test_work); @@ -214,20 +235,24 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) p = rtnl_dereference(mrp->p_port); if (p) { spin_lock_bh(&br->lock); - p->state = BR_STATE_FORWARDING; + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); - br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING); + br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->p_port, NULL); } p = rtnl_dereference(mrp->s_port); if (p) { spin_lock_bh(&br->lock); - p->state = BR_STATE_FORWARDING; + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); - br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING); + br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->s_port, NULL); } @@ -255,6 +280,11 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance) !br_mrp_get_port(br, instance->s_ifindex)) return -EINVAL; + /* It is not possible to have the same port part of multiple rings */ + if (!br_mrp_unique_ifindex(br, instance->p_ifindex) || + !br_mrp_unique_ifindex(br, instance->s_ifindex)) + return -EINVAL; + mrp = kzalloc(sizeof(*mrp), GFP_KERNEL); if (!mrp) return -ENOMEM; diff --git a/net/compat.c b/net/compat.c index 69fc6d1e4e6e..afd7b444e0bf 100644 --- a/net/compat.c +++ b/net/compat.c @@ -448,200 +448,6 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, return __compat_sys_getsockopt(fd, level, optname, optval, optlen); } -struct compat_group_req { - __u32 gr_interface; - struct __kernel_sockaddr_storage gr_group - __aligned(4); -} __packed; - -struct compat_group_source_req { - __u32 gsr_interface; - struct __kernel_sockaddr_storage gsr_group - __aligned(4); - struct __kernel_sockaddr_storage gsr_source - __aligned(4); -} __packed; - -struct compat_group_filter { - __u32 gf_interface; - struct __kernel_sockaddr_storage gf_group - __aligned(4); - __u32 gf_fmode; - __u32 gf_numsrc; - struct __kernel_sockaddr_storage gf_slist[1] - __aligned(4); -} __packed; - -#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ - sizeof(struct __kernel_sockaddr_storage)) - - -int compat_mc_setsockopt(struct sock *sock, int level, int optname, - char __user *optval, unsigned int optlen, - int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int)) -{ - char __user *koptval = optval; - int koptlen = optlen; - - switch (optname) { - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - { - struct compat_group_req __user *gr32 = (void __user *)optval; - struct group_req __user *kgr = - compat_alloc_user_space(sizeof(struct group_req)); - u32 interface; - - if (!access_ok(gr32, sizeof(*gr32)) || - !access_ok(kgr, sizeof(struct group_req)) || - __get_user(interface, &gr32->gr_interface) || - __put_user(interface, &kgr->gr_interface) || - copy_in_user(&kgr->gr_group, &gr32->gr_group, - sizeof(kgr->gr_group))) - return -EFAULT; - koptval = (char __user *)kgr; - koptlen = sizeof(struct group_req); - break; - } - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - { - struct compat_group_source_req __user *gsr32 = (void __user *)optval; - struct group_source_req __user *kgsr = compat_alloc_user_space( - sizeof(struct group_source_req)); - u32 interface; - - if (!access_ok(gsr32, sizeof(*gsr32)) || - !access_ok(kgsr, - sizeof(struct group_source_req)) || - __get_user(interface, &gsr32->gsr_interface) || - __put_user(interface, &kgsr->gsr_interface) || - copy_in_user(&kgsr->gsr_group, &gsr32->gsr_group, - sizeof(kgsr->gsr_group)) || - copy_in_user(&kgsr->gsr_source, &gsr32->gsr_source, - sizeof(kgsr->gsr_source))) - return -EFAULT; - koptval = (char __user *)kgsr; - koptlen = sizeof(struct group_source_req); - break; - } - case MCAST_MSFILTER: - { - struct compat_group_filter __user *gf32 = (void __user *)optval; - struct group_filter __user *kgf; - u32 interface, fmode, numsrc; - - if (!access_ok(gf32, __COMPAT_GF0_SIZE) || - __get_user(interface, &gf32->gf_interface) || - __get_user(fmode, &gf32->gf_fmode) || - __get_user(numsrc, &gf32->gf_numsrc)) - return -EFAULT; - koptlen = optlen + sizeof(struct group_filter) - - sizeof(struct compat_group_filter); - if (koptlen < GROUP_FILTER_SIZE(numsrc)) - return -EINVAL; - kgf = compat_alloc_user_space(koptlen); - if (!access_ok(kgf, koptlen) || - __put_user(interface, &kgf->gf_interface) || - __put_user(fmode, &kgf->gf_fmode) || - __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group, &gf32->gf_group, - sizeof(kgf->gf_group)) || - (numsrc && copy_in_user(kgf->gf_slist, gf32->gf_slist, - numsrc * sizeof(kgf->gf_slist[0])))) - return -EFAULT; - koptval = (char __user *)kgf; - break; - } - - default: - break; - } - return setsockopt(sock, level, optname, koptval, koptlen); -} -EXPORT_SYMBOL(compat_mc_setsockopt); - -int compat_mc_getsockopt(struct sock *sock, int level, int optname, - char __user *optval, int __user *optlen, - int (*getsockopt)(struct sock *, int, int, char __user *, int __user *)) -{ - struct compat_group_filter __user *gf32 = (void __user *)optval; - struct group_filter __user *kgf; - int __user *koptlen; - u32 interface, fmode, numsrc; - int klen, ulen, err; - - if (optname != MCAST_MSFILTER) - return getsockopt(sock, level, optname, optval, optlen); - - koptlen = compat_alloc_user_space(sizeof(*koptlen)); - if (!access_ok(optlen, sizeof(*optlen)) || - __get_user(ulen, optlen)) - return -EFAULT; - - /* adjust len for pad */ - klen = ulen + sizeof(*kgf) - sizeof(*gf32); - - if (klen < GROUP_FILTER_SIZE(0)) - return -EINVAL; - - if (!access_ok(koptlen, sizeof(*koptlen)) || - __put_user(klen, koptlen)) - return -EFAULT; - - /* have to allow space for previous compat_alloc_user_space, too */ - kgf = compat_alloc_user_space(klen+sizeof(*optlen)); - - if (!access_ok(gf32, __COMPAT_GF0_SIZE) || - __get_user(interface, &gf32->gf_interface) || - __get_user(fmode, &gf32->gf_fmode) || - __get_user(numsrc, &gf32->gf_numsrc) || - __put_user(interface, &kgf->gf_interface) || - __put_user(fmode, &kgf->gf_fmode) || - __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group))) - return -EFAULT; - - err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); - if (err) - return err; - - if (!access_ok(koptlen, sizeof(*koptlen)) || - __get_user(klen, koptlen)) - return -EFAULT; - - ulen = klen - (sizeof(*kgf)-sizeof(*gf32)); - - if (!access_ok(optlen, sizeof(*optlen)) || - __put_user(ulen, optlen)) - return -EFAULT; - - if (!access_ok(kgf, klen) || - !access_ok(gf32, ulen) || - __get_user(interface, &kgf->gf_interface) || - __get_user(fmode, &kgf->gf_fmode) || - __get_user(numsrc, &kgf->gf_numsrc) || - __put_user(interface, &gf32->gf_interface) || - __put_user(fmode, &gf32->gf_fmode) || - __put_user(numsrc, &gf32->gf_numsrc)) - return -EFAULT; - if (numsrc) { - int copylen; - - klen -= GROUP_FILTER_SIZE(0); - copylen = numsrc * sizeof(gf32->gf_slist[0]); - if (copylen > klen) - copylen = klen; - if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) - return -EFAULT; - } - return err; -} -EXPORT_SYMBOL(compat_mc_getsockopt); - - /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) static unsigned char nas[21] = { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index e951b743bed3..e64941c526b1 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -8,6 +8,7 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) { struct flow_rule *rule; + int i; rule = kzalloc(struct_size(rule, action.entries, num_actions), GFP_KERNEL); @@ -15,6 +16,11 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) return NULL; rule->action.num_entries = num_actions; + /* Pre-fill each action hw_stats with DONT_CARE. + * Caller can override this if it wants stats for a given action. + */ + for (i = 0; i < num_actions; i++) + rule->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE; return rule; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b607ea602774..37e4dba62460 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1771,6 +1771,7 @@ static struct neigh_table *neigh_find_table(int family) } const struct nla_policy nda_policy[NDA_MAX+1] = { + [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID }, [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, @@ -1781,6 +1782,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_IFINDEX] = { .type = NLA_U32 }, [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, + [NDA_NH_ID] = { .type = NLA_U32 }, }; static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1bf0c3d278e7..b8afefe6f6b6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3727,7 +3727,6 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive_list); /** * skb_segment - Perform protocol segmentation on skb. @@ -4191,7 +4190,6 @@ done: NAPI_GRO_CB(skb)->same_flow = 1; return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive); #ifdef CONFIG_SKB_EXTENSIONS #define SKB_EXT_ALIGN_VALUE 8 @@ -6087,13 +6085,15 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) /** * __skb_ext_alloc - allocate a new skb extensions storage * + * @flags: See kmalloc(). + * * Returns the newly allocated pointer. The pointer can later attached to a * skb via __skb_ext_set(). * Note: caller must handle the skb_ext as an opaque data. */ -struct skb_ext *__skb_ext_alloc(void) +struct skb_ext *__skb_ext_alloc(gfp_t flags) { - struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); + struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags); if (new) { memset(new->offset, 0, sizeof(new->offset)); @@ -6188,7 +6188,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); - new = __skb_ext_alloc(); + new = __skb_ext_alloc(GFP_ATOMIC); if (!new) return NULL; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0ddb13a6282b..b109cc8a6dd8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,6 +23,7 @@ #include <net/pkt_sched.h> static int two __maybe_unused = 2; +static int three = 3; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; @@ -39,6 +40,7 @@ EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); * IPv6: reset all settings to default * 1 - Both inherit all current settings from init_net * 2 - Both reset all settings to default + * 3 - Both inherit all settings from current netns */ int sysctl_devconf_inherit_init_net __read_mostly; EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); @@ -553,7 +555,7 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, }, { .procname = "high_order_alloc_disable", diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1e5e08cc0bfc..650187d68851 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1082,6 +1082,7 @@ static const struct proto_ops inet6_dccp_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 658a8580b464..9ef54cdcf662 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -129,13 +129,13 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ETHTOOL_A_CHANNELS_MAX + 1]; unsigned int from_channel, old_total, i; + bool mod = false, mod_combined = false; struct ethtool_channels channels = {}; struct ethnl_req_info req_info = {}; const struct nlattr *err_attr; const struct ethtool_ops *ops; struct net_device *dev; u32 max_rx_in_use = 0; - bool mod = false; int ret; ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, @@ -170,7 +170,8 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) ethnl_update_u32(&channels.other_count, tb[ETHTOOL_A_CHANNELS_OTHER_COUNT], &mod); ethnl_update_u32(&channels.combined_count, - tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod); + tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod_combined); + mod |= mod_combined; ret = 0; if (!mod) goto out_ops; @@ -193,6 +194,21 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) goto out_ops; } + /* ensure there is at least one RX and one TX channel */ + if (!channels.combined_count && !channels.rx_count) + err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + else if (!channels.combined_count && !channels.tx_count) + err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + else + err_attr = NULL; + if (err_attr) { + if (mod_combined) + err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured"); + goto out_ops; + } + /* ensure the new Rx count fits within the configured Rx flow * indirection table settings */ diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 74892623bacd..31e0b4e88a9d 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1669,6 +1669,12 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, dev->ethtool_ops->get_channels(dev, &curr); + if (channels.rx_count == curr.rx_count && + channels.tx_count == curr.tx_count && + channels.combined_count == curr.combined_count && + channels.other_count == curr.other_count) + return 0; + /* ensure new counts are within the maximums */ if (channels.rx_count > curr.max_rx || channels.tx_count > curr.max_tx || @@ -1676,6 +1682,11 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, channels.other_count > curr.max_other) return -EINVAL; + /* ensure there is at least one RX and one TX channel */ + if (!channels.combined_count && + (!channels.rx_count || !channels.tx_count)) + return -EINVAL; + /* ensure the new Rx count fits within the configured Rx flow * indirection table settings */ if (netif_is_rxfh_configured(dev) && diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 2740cde0a182..7f47ba89054e 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -2,6 +2,7 @@ #include "netlink.h" #include "common.h" +#include <linux/phy.h> struct linkstate_req_info { struct ethnl_req_info base; @@ -10,6 +11,8 @@ struct linkstate_req_info { struct linkstate_reply_data { struct ethnl_reply_data base; int link; + int sqi; + int sqi_max; }; #define LINKSTATE_REPDATA(__reply_base) \ @@ -20,8 +23,46 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = { [ETHTOOL_A_LINKSTATE_UNSPEC] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKSTATE_HEADER] = { .type = NLA_NESTED }, [ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_SQI] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_SQI_MAX] = { .type = NLA_REJECT }, }; +static int linkstate_get_sqi(struct net_device *dev) +{ + struct phy_device *phydev = dev->phydev; + int ret; + + if (!phydev) + return -EOPNOTSUPP; + + mutex_lock(&phydev->lock); + if (!phydev->drv || !phydev->drv->get_sqi) + ret = -EOPNOTSUPP; + else + ret = phydev->drv->get_sqi(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + +static int linkstate_get_sqi_max(struct net_device *dev) +{ + struct phy_device *phydev = dev->phydev; + int ret; + + if (!phydev) + return -EOPNOTSUPP; + + mutex_lock(&phydev->lock); + if (!phydev->drv || !phydev->drv->get_sqi_max) + ret = -EOPNOTSUPP; + else + ret = phydev->drv->get_sqi_max(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, struct genl_info *info) @@ -34,6 +75,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; data->link = __ethtool_get_link(dev); + + ret = linkstate_get_sqi(dev); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + + data->sqi = ret; + + ret = linkstate_get_sqi_max(dev); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + + data->sqi_max = ret; + ethnl_ops_complete(dev); return 0; @@ -42,8 +96,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, static int linkstate_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { - return nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); + int len; + + len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; + + if (data->sqi != -EOPNOTSUPP) + len += nla_total_size(sizeof(u32)); + + if (data->sqi_max != -EOPNOTSUPP) + len += nla_total_size(sizeof(u32)); + + return len; } static int linkstate_fill_reply(struct sk_buff *skb, @@ -56,6 +121,14 @@ static int linkstate_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; + if (data->sqi != -EOPNOTSUPP && + nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) + return -EMSGSIZE; + + if (data->sqi_max != -EOPNOTSUPP && + nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) + return -EMSGSIZE; + return 0; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8f5c8c9409d3..02aa5cb3a4fd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -116,6 +116,7 @@ #include <linux/mroute.h> #endif #include <net/l3mdev.h> +#include <net/compat.h> #include <trace/events/sock.h> @@ -974,17 +975,42 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(inet_ioctl); #ifdef CONFIG_COMPAT +static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_rtentry __user *ur) +{ + compat_uptr_t rtdev; + struct rtentry rt; + + if (copy_from_user(&rt.rt_dst, &ur->rt_dst, + 3 * sizeof(struct sockaddr)) || + get_user(rt.rt_flags, &ur->rt_flags) || + get_user(rt.rt_metric, &ur->rt_metric) || + get_user(rt.rt_mtu, &ur->rt_mtu) || + get_user(rt.rt_window, &ur->rt_window) || + get_user(rt.rt_irtt, &ur->rt_irtt) || + get_user(rtdev, &ur->rt_dev)) + return -EFAULT; + + rt.rt_dev = compat_ptr(rtdev); + return ip_rt_ioctl(sock_net(sk), cmd, &rt); +} + static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; - int err = -ENOIOCTLCMD; - - if (sk->sk_prot->compat_ioctl) - err = sk->sk_prot->compat_ioctl(sk, cmd, arg); - return err; + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return inet_compat_routing_ioctl(sk, cmd, argp); + default: + if (!sk->sk_prot->compat_ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->compat_ioctl(sk, cmd, arg); + } } -#endif +#endif /* CONFIG_COMPAT */ const struct proto_ops inet_stream_ops = { .family = PF_INET, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index fc94f82f82c7..f048d0a188b7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2666,11 +2666,24 @@ static __net_init int devinet_init_net(struct net *net) tbl[0].extra2 = net; #endif - if ((!IS_ENABLED(CONFIG_SYSCTL) || - sysctl_devconf_inherit_init_net != 2) && - !net_eq(net, &init_net)) { - memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf)); - memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); + if (!net_eq(net, &init_net)) { + if (IS_ENABLED(CONFIG_SYSCTL) && + sysctl_devconf_inherit_init_net == 3) { + /* copy from the current netns */ + memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all, + sizeof(ipv4_devconf)); + memcpy(dflt, + current->nsproxy->net_ns->ipv4.devconf_dflt, + sizeof(ipv4_devconf_dflt)); + } else if (!IS_ENABLED(CONFIG_SYSCTL) || + sysctl_devconf_inherit_init_net != 2) { + /* inherit == 0 or 1: copy from init_net */ + memcpy(all, init_net.ipv4.devconf_all, + sizeof(ipv4_devconf)); + memcpy(dflt, init_net.ipv4.devconf_dflt, + sizeof(ipv4_devconf_dflt)); + } + /* else inherit == 2: use compiled values */ } #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 47f0502b2101..7b272bbed2b4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2565,9 +2565,9 @@ done: } int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen) + struct sockaddr_storage __user *p) { - int err, i, count, copycount; + int i, count, copycount; struct sockaddr_in *psin; __be32 addr; struct ip_mc_socklist *pmc; @@ -2583,37 +2583,29 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - err = -EADDRNOTAVAIL; - for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; } if (!pmc) /* must have a prior join */ - goto done; + return -EADDRNOTAVAIL; gsf->gf_fmode = pmc->sfmode; psl = rtnl_dereference(pmc->sflist); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - if (put_user(GROUP_FILTER_SIZE(copycount), optlen) || - copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { - return -EFAULT; - } - for (i = 0; i < copycount; i++) { + for (i = 0; i < copycount; i++, p++) { struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; memset(&ss, 0, sizeof(ss)); psin->sin_family = AF_INET; psin->sin_addr.s_addr = psl->sl_addr[i]; - if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss))) + if (copy_to_user(p, &ss, sizeof(ss))) return -EFAULT; } return 0; -done: - return err; } /* diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0ce9b91ff55c..4e31f23e4117 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -768,45 +768,37 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) } } -static int ipgre_tunnel_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, + int cmd) { - struct ip_tunnel_parm p; int err; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || - ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || + ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } - p.i_flags = gre_flags_to_tnl_flags(p.i_flags); - p.o_flags = gre_flags_to_tnl_flags(p.o_flags); + p->i_flags = gre_flags_to_tnl_flags(p->i_flags); + p->o_flags = gre_flags_to_tnl_flags(p->o_flags); - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd == SIOCCHGTUNNEL) { struct ip_tunnel *t = netdev_priv(dev); - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; if (strcmp(dev->rtnl_link_ops->kind, "erspan")) ipgre_link_update(dev, true); } - p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); - p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - + p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); return 0; } @@ -924,10 +916,11 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_stop = ipgre_close, #endif .ndo_start_xmit = ipgre_xmit, - .ndo_do_ioctl = ipgre_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipgre_tunnel_ctl, }; #define GRE_FEATURES (NETIF_F_SG | \ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8206047d70b6..a2469bc57cfe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -587,6 +587,86 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int set_mcast_msfilter(struct sock *sk, int ifindex, + int numsrc, int fmode, + struct sockaddr_storage *group, + struct sockaddr_storage *list) +{ + int msize = IP_MSFILTER_SIZE(numsrc); + struct ip_msfilter *msf; + struct sockaddr_in *psin; + int err, i; + + msf = kmalloc(msize, GFP_KERNEL); + if (!msf) + return -ENOBUFS; + + psin = (struct sockaddr_in *)group; + if (psin->sin_family != AF_INET) + goto Eaddrnotavail; + msf->imsf_multiaddr = psin->sin_addr.s_addr; + msf->imsf_interface = 0; + msf->imsf_fmode = fmode; + msf->imsf_numsrc = numsrc; + for (i = 0; i < numsrc; ++i) { + psin = (struct sockaddr_in *)&list[i]; + + if (psin->sin_family != AF_INET) + goto Eaddrnotavail; + msf->imsf_slist[i] = psin->sin_addr.s_addr; + } + err = ip_mc_msfilter(sk, msf, ifindex); + kfree(msf); + return err; + +Eaddrnotavail: + kfree(msf); + return -EADDRNOTAVAIL; +} + +static int do_mcast_group_source(struct sock *sk, int optname, + struct group_source_req *greqs) +{ + struct ip_mreq_source mreqs; + struct sockaddr_in *psin; + int omode, add, err; + + if (greqs->gsr_group.ss_family != AF_INET || + greqs->gsr_source.ss_family != AF_INET) + return -EADDRNOTAVAIL; + + psin = (struct sockaddr_in *)&greqs->gsr_group; + mreqs.imr_multiaddr = psin->sin_addr.s_addr; + psin = (struct sockaddr_in *)&greqs->gsr_source; + mreqs.imr_sourceaddr = psin->sin_addr.s_addr; + mreqs.imr_interface = 0; /* use index for mc_source */ + + if (optname == MCAST_BLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 1; + } else if (optname == MCAST_UNBLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 0; + } else if (optname == MCAST_JOIN_SOURCE_GROUP) { + struct ip_mreqn mreq; + + psin = (struct sockaddr_in *)&greqs->gsr_group; + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_address.s_addr = 0; + mreq.imr_ifindex = greqs->gsr_interface; + err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); + if (err && err != -EADDRINUSE) + return err; + greqs->gsr_interface = mreq.imr_ifindex; + omode = MCAST_INCLUDE; + add = 1; + } else /* MCAST_LEAVE_SOURCE_GROUP */ { + omode = MCAST_INCLUDE; + add = 0; + } + return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); +} + static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -1029,9 +1109,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; - struct ip_mreq_source mreqs; - struct sockaddr_in *psin; - int omode, add; if (optlen != sizeof(struct group_source_req)) goto e_inval; @@ -1039,50 +1116,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EFAULT; break; } - if (greqs.gsr_group.ss_family != AF_INET || - greqs.gsr_source.ss_family != AF_INET) { - err = -EADDRNOTAVAIL; - break; - } - psin = (struct sockaddr_in *)&greqs.gsr_group; - mreqs.imr_multiaddr = psin->sin_addr.s_addr; - psin = (struct sockaddr_in *)&greqs.gsr_source; - mreqs.imr_sourceaddr = psin->sin_addr.s_addr; - mreqs.imr_interface = 0; /* use index for mc_source */ - - if (optname == MCAST_BLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 1; - } else if (optname == MCAST_UNBLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 0; - } else if (optname == MCAST_JOIN_SOURCE_GROUP) { - struct ip_mreqn mreq; - - psin = (struct sockaddr_in *)&greqs.gsr_group; - mreq.imr_multiaddr = psin->sin_addr; - mreq.imr_address.s_addr = 0; - mreq.imr_ifindex = greqs.gsr_interface; - err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); - if (err && err != -EADDRINUSE) - break; - greqs.gsr_interface = mreq.imr_ifindex; - omode = MCAST_INCLUDE; - add = 1; - } else /* MCAST_LEAVE_SOURCE_GROUP */ { - omode = MCAST_INCLUDE; - add = 0; - } - err = ip_mc_source(add, omode, sk, &mreqs, - greqs.gsr_interface); + err = do_mcast_group_source(sk, optname, &greqs); break; } case MCAST_MSFILTER: { - struct sockaddr_in *psin; - struct ip_msfilter *msf = NULL; struct group_filter *gsf = NULL; - int msize, i, ifindex; if (optlen < GROUP_FILTER_SIZE(0)) goto e_inval; @@ -1095,7 +1134,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = PTR_ERR(gsf); break; } - /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffff || gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { @@ -1106,36 +1144,10 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EINVAL; goto mc_msf_out; } - msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); - msf = kmalloc(msize, GFP_KERNEL); - if (!msf) { - err = -ENOBUFS; - goto mc_msf_out; - } - ifindex = gsf->gf_interface; - psin = (struct sockaddr_in *)&gsf->gf_group; - if (psin->sin_family != AF_INET) { - err = -EADDRNOTAVAIL; - goto mc_msf_out; - } - msf->imsf_multiaddr = psin->sin_addr.s_addr; - msf->imsf_interface = 0; - msf->imsf_fmode = gsf->gf_fmode; - msf->imsf_numsrc = gsf->gf_numsrc; - err = -EADDRNOTAVAIL; - for (i = 0; i < gsf->gf_numsrc; ++i) { - psin = (struct sockaddr_in *)&gsf->gf_slist[i]; - - if (psin->sin_family != AF_INET) - goto mc_msf_out; - msf->imsf_slist[i] = psin->sin_addr.s_addr; - } - kfree(gsf); - gsf = NULL; - - err = ip_mc_msfilter(sk, msf, ifindex); + err = set_mcast_msfilter(sk, gsf->gf_interface, + gsf->gf_numsrc, gsf->gf_fmode, + &gsf->gf_group, gsf->gf_slist); mc_msf_out: - kfree(msf); kfree(gsf); break; } @@ -1272,9 +1284,113 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IP) return -ENOPROTOOPT; - if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) - return compat_mc_setsockopt(sk, level, optname, optval, optlen, - ip_setsockopt); + switch (optname) { + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + { + struct compat_group_req __user *gr32 = (void __user *)optval; + struct group_req greq; + struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group; + struct ip_mreqn mreq; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + + if (get_user(greq.gr_interface, &gr32->gr_interface) || + copy_from_user(&greq.gr_group, &gr32->gr_group, + sizeof(greq.gr_group))) + return -EFAULT; + + if (psin->sin_family != AF_INET) + return -EINVAL; + + memset(&mreq, 0, sizeof(mreq)); + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_ifindex = greq.gr_interface; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ip_mc_join_group(sk, &mreq); + else + err = ip_mc_leave_group(sk, &mreq); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + { + struct compat_group_source_req __user *gsr32 = (void __user *)optval; + struct group_source_req greqs; + + if (optlen != sizeof(struct compat_group_source_req)) + return -EINVAL; + + if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || + copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, + sizeof(greqs.gsr_group)) || + copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, + sizeof(greqs.gsr_source))) + return -EFAULT; + + rtnl_lock(); + lock_sock(sk); + err = do_mcast_group_source(sk, optname, &greqs); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_MSFILTER: + { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + void *p; + int n; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + if (copy_from_user(gf32, optval, optlen)) { + err = -EFAULT; + goto mc_msf_out; + } + + n = gf32->gf_numsrc; + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n >= 0x1ffffff) { + err = -ENOBUFS; + goto mc_msf_out; + } + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { + err = -EINVAL; + goto mc_msf_out; + } + + rtnl_lock(); + lock_sock(sk); + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + err = -ENOBUFS; + else + err = set_mcast_msfilter(sk, gf32->gf_interface, + n, gf32->gf_fmode, + &gf32->gf_group, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); +mc_msf_out: + kfree(p); + return err; + } + } err = do_ip_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER @@ -1465,19 +1581,28 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, } case MCAST_MSFILTER: { + struct group_filter __user *p = (void __user *)optval; struct group_filter gsf; + const int size0 = offsetof(struct group_filter, gf_slist); + int num; - if (len < GROUP_FILTER_SIZE(0)) { + if (len < size0) { err = -EINVAL; goto out; } - if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { + if (copy_from_user(&gsf, p, size0)) { err = -EFAULT; goto out; } - err = ip_mc_gsfget(sk, &gsf, - (struct group_filter __user *)optval, - optlen); + num = gsf.gf_numsrc; + err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + if (err) + goto out; + if (gsf.gf_numsrc < num) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; goto out; } case IP_MULTICAST_ALL: @@ -1590,9 +1715,47 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, { int err; - if (optname == MCAST_MSFILTER) - return compat_mc_getsockopt(sk, level, optname, optval, optlen, - ip_getsockopt); + if (optname == MCAST_MSFILTER) { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = (void __user *)optval; + struct compat_group_filter gf32; + struct group_filter gf; + int ulen, err; + int num; + + if (level != SOL_IP) + return -EOPNOTSUPP; + + if (get_user(ulen, optlen)) + return -EFAULT; + + if (ulen < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + rtnl_lock(); + lock_sock(sk); + err = ip_mc_gsfget(sk, &gf, p->gf_slist); + release_sock(sk); + rtnl_unlock(); + if (err) + return err; + if (gf.gf_numsrc < num) + num = gf.gf_numsrc; + ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); + if (put_user(ulen, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; + } err = do_ip_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cd4b84310d92..f4f1d11eab50 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -860,7 +860,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, netdev_state_change(dev); } -int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; struct ip_tunnel *t = netdev_priv(dev); @@ -960,6 +960,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) done: return err; } +EXPORT_SYMBOL_GPL(ip_tunnel_ctl); + +int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel_parm p; + int err; + + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); + if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; + return err; +} EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1b4e6f298648..c8974360a99f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -378,38 +378,31 @@ static int vti4_err(struct sk_buff *skb, u32 info) } static int -vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_IPIP || + p->iph.ihl != 5) return -EINVAL; } - if (!(p.i_flags & GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags & GRE_KEY)) - p.o_key = 0; + if (!(p->i_flags & GRE_KEY)) + p->i_key = 0; + if (!(p->o_flags & GRE_KEY)) + p->o_key = 0; - p.i_flags = VTI_ISVTI; + p->i_flags = VTI_ISVTI; - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p.i_flags |= GRE_KEY; - p.o_flags |= GRE_KEY; + p->i_flags |= GRE_KEY; + p->o_flags |= GRE_KEY; } - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; return 0; } @@ -417,10 +410,11 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_init = vti_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = vti_tunnel_xmit, - .ndo_do_ioctl = vti_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = vti_tunnel_ctl, }; static void vti_tunnel_setup(struct net_device *dev) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2f01cf6fa0de..df663baf2516 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -327,41 +327,29 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto) } static int -ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { - int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || - !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + if (p->iph.version != 4 || + !ipip_tunnel_ioctl_verify_protocol(p->iph.protocol) || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) return -EINVAL; } - p.i_key = p.o_key = 0; - p.i_flags = p.o_flags = 0; - err = ip_tunnel_ioctl(dev, &p, cmd); - if (err) - return err; - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - - return 0; + p->i_key = p->o_key = 0; + p->i_flags = p->o_flags = 0; + return ip_tunnel_ctl(dev, p, cmd); } static const struct net_device_ops ipip_netdev_ops = { .ndo_init = ipip_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, - .ndo_do_ioctl = ipip_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip_tunnel_ctl, }; #define IPIP_FEATURES (NETIF_F_SG | \ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5c218db2dede..d3e9b80a57de 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -421,37 +421,6 @@ static void ipmr_free_table(struct mr_table *mrt) /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ -static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) -{ - struct net *net = dev_net(dev); - - dev_close(dev); - - dev = __dev_get_by_name(net, "tunl0"); - if (dev) { - const struct net_device_ops *ops = dev->netdev_ops; - struct ifreq ifr; - struct ip_tunnel_parm p; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = v->vifc_rmt_addr.s_addr; - p.iph.saddr = v->vifc_lcl_addr.s_addr; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPIP; - sprintf(p.name, "dvmrp%d", v->vifc_vifi); - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); - - set_fs(KERNEL_DS); - ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); - set_fs(oldfs); - } - } -} - /* Initialize ipmr pimreg/tunnel in_device */ static bool ipmr_init_vif_indev(const struct net_device *dev) { @@ -471,51 +440,52 @@ static bool ipmr_init_vif_indev(const struct net_device *dev) static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { - struct net_device *dev; - - dev = __dev_get_by_name(net, "tunl0"); + struct net_device *tunnel_dev, *new_dev; + struct ip_tunnel_parm p = { }; + int err; - if (dev) { - const struct net_device_ops *ops = dev->netdev_ops; - int err; - struct ifreq ifr; - struct ip_tunnel_parm p; + tunnel_dev = __dev_get_by_name(net, "tunl0"); + if (!tunnel_dev) + goto out; - memset(&p, 0, sizeof(p)); - p.iph.daddr = v->vifc_rmt_addr.s_addr; - p.iph.saddr = v->vifc_lcl_addr.s_addr; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPIP; - sprintf(p.name, "dvmrp%d", v->vifc_vifi); - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; + p.iph.daddr = v->vifc_rmt_addr.s_addr; + p.iph.saddr = v->vifc_lcl_addr.s_addr; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPIP; + sprintf(p.name, "dvmrp%d", v->vifc_vifi); - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); + if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) + goto out; + err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, + SIOCADDTUNNEL); + if (err) + goto out; - set_fs(KERNEL_DS); - err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - } else { - err = -EOPNOTSUPP; - } - dev = NULL; - - if (err == 0 && - (dev = __dev_get_by_name(net, p.name)) != NULL) { - dev->flags |= IFF_MULTICAST; - if (!ipmr_init_vif_indev(dev)) - goto failure; - if (dev_open(dev, NULL)) - goto failure; - dev_hold(dev); - } - } - return dev; + new_dev = __dev_get_by_name(net, p.name); + if (!new_dev) + goto out; -failure: - unregister_netdevice(dev); - return NULL; + new_dev->flags |= IFF_MULTICAST; + if (!ipmr_init_vif_indev(new_dev)) + goto out_unregister; + if (dev_open(new_dev, NULL)) + goto out_unregister; + dev_hold(new_dev); + err = dev_set_allmulti(new_dev, 1); + if (err) { + dev_close(new_dev); + tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, + SIOCDELTUNNEL); + dev_put(new_dev); + new_dev = ERR_PTR(err); + } + return new_dev; + +out_unregister: + unregister_netdevice(new_dev); +out: + return ERR_PTR(-ENOBUFS); } #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) @@ -867,14 +837,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, break; case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); - if (!dev) - return -ENOBUFS; - err = dev_set_allmulti(dev, 1); - if (err) { - ipmr_del_tunnel(dev, vifc); - dev_put(dev); - return err; - } + if (IS_ERR(dev)) + return PTR_ERR(dev); break; case VIFF_USE_IFINDEX: case 0: diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 3957364d556c..c337e73e02dd 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -33,8 +33,20 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_ENCAP] = { .type = NLA_NESTED }, [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, + [NHA_FDB] = { .type = NLA_FLAG }, }; +static int call_nexthop_notifiers(struct net *net, + enum fib_event_type event_type, + struct nexthop *nh) +{ + int err; + + err = atomic_notifier_call_chain(&net->nexthop.notifier_chain, + event_type, nh); + return notifier_to_errno(err); +} + static unsigned int nh_dev_hashfn(unsigned int val) { unsigned int mask = NH_DEV_HASHSIZE - 1; @@ -107,6 +119,7 @@ static struct nexthop *nexthop_alloc(void) INIT_LIST_HEAD(&nh->fi_list); INIT_LIST_HEAD(&nh->f6i_list); INIT_LIST_HEAD(&nh->grp_list); + INIT_LIST_HEAD(&nh->fdb_list); } return nh; } @@ -227,6 +240,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_u32(skb, NHA_ID, nh->id)) goto nla_put_failure; + if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; + if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); @@ -241,7 +257,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_flag(skb, NHA_BLACKHOLE)) goto nla_put_failure; goto out; - } else { + } else if (!nh->is_fdb_nh) { const struct net_device *dev; dev = nhi->fib_nhc.nhc_dev; @@ -387,12 +403,35 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, return true; } +static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, + struct netlink_ext_ack *extack) +{ + struct nh_info *nhi; + + if (!nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); + return -EINVAL; + } + + nhi = rtnl_dereference(nh->nh_info); + if (*nh_family == AF_UNSPEC) { + *nh_family = nhi->family; + } else if (*nh_family != nhi->family) { + NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops"); + return -EINVAL; + } + + return 0; +} + static int nh_check_attr_group(struct net *net, struct nlattr *tb[], struct netlink_ext_ack *extack) { unsigned int len = nla_len(tb[NHA_GROUP]); + u8 nh_family = AF_UNSPEC; struct nexthop_grp *nhg; unsigned int i, j; + u8 nhg_fdb = 0; if (len & (sizeof(struct nexthop_grp) - 1)) { NL_SET_ERR_MSG(extack, @@ -421,6 +460,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } } + if (tb[NHA_FDB]) + nhg_fdb = 1; nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { struct nexthop *nh; @@ -432,11 +473,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } if (!valid_group_nh(nh, len, extack)) return -EINVAL; + + if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) + return -EINVAL; + + if (!nhg_fdb && nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); + return -EINVAL; + } } for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - + if (tb[NHA_FDB]) + continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); return -EINVAL; @@ -495,6 +545,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) if (hash > atomic_read(&nhge->upper_bound)) continue; + if (nhge->nh->is_fdb_nh) + return nhge->nh; + /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ @@ -564,6 +617,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, { struct nh_info *nhi; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + return -EINVAL; + } + /* fib6_src is unique to a fib6_info and limits the ability to cache * routes in fib6_nh within a nexthop that is potentially shared * across multiple fib entries. If the config wants to use source @@ -640,6 +698,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope, { int err = 0; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + if (nh->is_group) { struct nh_group *nhg; @@ -773,6 +837,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) bool do_flush = false; struct fib_info *fi; + call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh); + list_for_each_entry(fi, &nh->fi_list, nh_list) { fi->fib_flags |= RTNH_F_DEAD; do_flush = true; @@ -1125,6 +1191,9 @@ static struct nexthop *nexthop_create_group(struct net *net, nh_group_rebalance(nhg); } + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + rcu_assign_pointer(nh->nh_grp, nhg); return nh; @@ -1152,7 +1221,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, }; - u32 tb_id = l3mdev_fib_table(cfg->dev); + u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); int err; err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); @@ -1161,6 +1230,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, goto out; } + if (nh->is_fdb_nh) + goto out; + /* sets nh_dev if successful */ err = fib_check_nh(net, fib_nh, tb_id, 0, extack); if (!err) { @@ -1186,6 +1258,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, .fc_flags = cfg->nh_flags, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, + .fc_is_fdb = cfg->nh_fdb, }; int err; @@ -1227,6 +1300,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, nhi->family = cfg->nh_family; nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + if (cfg->nh_blackhole) { nhi->reject_nh = 1; cfg->nh_ifindex = net->loopback_dev->ifindex; @@ -1248,7 +1324,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, } /* add the entry to the device based hash */ - nexthop_devhash_add(net, nhi); + if (!nh->is_fdb_nh) + nexthop_devhash_add(net, nhi); rcu_assign_pointer(nh->nh_info, nhi); @@ -1352,6 +1429,19 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, if (tb[NHA_ID]) cfg->nh_id = nla_get_u32(tb[NHA_ID]); + if (tb[NHA_FDB]) { + if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] || + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { + NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole"); + goto out; + } + if (nhm->nh_flags) { + NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header"); + goto out; + } + cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); + } + if (tb[NHA_GROUP]) { if (nhm->nh_family != AF_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid family for group"); @@ -1375,8 +1465,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, if (tb[NHA_BLACKHOLE]) { if (tb[NHA_GATEWAY] || tb[NHA_OIF] || - tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { - NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif"); + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) { + NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb"); goto out; } @@ -1385,26 +1475,28 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } - if (!tb[NHA_OIF]) { - NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops"); + if (!cfg->nh_fdb && !tb[NHA_OIF]) { + NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); goto out; } - cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); - if (cfg->nh_ifindex) - cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); + if (!cfg->nh_fdb && tb[NHA_OIF]) { + cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); + if (cfg->nh_ifindex) + cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); - if (!cfg->dev) { - NL_SET_ERR_MSG(extack, "Invalid device index"); - goto out; - } else if (!(cfg->dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } else if (!netif_carrier_ok(cfg->dev)) { - NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); - err = -ENETDOWN; - goto out; + if (!cfg->dev) { + NL_SET_ERR_MSG(extack, "Invalid device index"); + goto out; + } else if (!(cfg->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } else if (!netif_carrier_ok(cfg->dev)) { + NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); + err = -ENETDOWN; + goto out; + } } err = -EINVAL; @@ -1633,7 +1725,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, int *master_idx, bool *group_filter, - struct netlink_callback *cb) + bool *fdb_filter, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[NHA_MAX + 1]; @@ -1670,6 +1762,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, case NHA_GROUPS: *group_filter = true; break; + case NHA_FDB: + *fdb_filter = true; + break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); return -EINVAL; @@ -1688,17 +1783,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { + bool group_filter = false, fdb_filter = false; struct nhmsg *nhm = nlmsg_data(cb->nlh); int dev_filter_idx = 0, master_idx = 0; struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; - bool group_filter = false; struct rb_node *node; int idx = 0, s_idx; int err; err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, - &group_filter, cb); + &group_filter, &fdb_filter, cb); if (err < 0) return err; @@ -1783,6 +1878,19 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; +int register_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&net->nexthop.notifier_chain, nb); +} +EXPORT_SYMBOL(register_nexthop_notifier); + +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&net->nexthop.notifier_chain, + nb); +} +EXPORT_SYMBOL(unregister_nexthop_notifier); + static void __net_exit nexthop_net_exit(struct net *net) { rtnl_lock(); @@ -1799,6 +1907,7 @@ static int __net_init nexthop_net_init(struct net *net) net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); if (!net->nexthop.devhash) return -ENOMEM; + ATOMIC_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fd885f06c4ed..09cfbf5dd7ce 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2783,6 +2783,33 @@ put: in6_dev_put(in6_dev); } +static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev, + struct in6_ifreq *ireq) +{ + struct ip_tunnel_parm p = { }; + int err; + + if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4)) + return -EADDRNOTAVAIL; + + p.iph.daddr = ireq->ifr6_addr.s6_addr32[3]; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPV6; + p.iph.ttl = 64; + + if (!dev->netdev_ops->ndo_tunnel_ctl) + return -EOPNOTSUPP; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, SIOCADDTUNNEL); + if (err) + return err; + + dev = __dev_get_by_name(net, p.name); + if (!dev) + return -ENOBUFS; + return dev_open(dev, NULL); +} + /* * Set destination address. * Special case for SIT interfaces where we create a new "virtual" @@ -2790,61 +2817,19 @@ put: */ int addrconf_set_dstaddr(struct net *net, void __user *arg) { - struct in6_ifreq ireq; struct net_device *dev; - int err = -EINVAL; - - rtnl_lock(); + struct in6_ifreq ireq; + int err = -ENODEV; - err = -EFAULT; + if (!IS_ENABLED(CONFIG_IPV6_SIT)) + return -ENODEV; if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) - goto err_exit; + return -EFAULT; + rtnl_lock(); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); - - err = -ENODEV; - if (!dev) - goto err_exit; - -#if IS_ENABLED(CONFIG_IPV6_SIT) - if (dev->type == ARPHRD_SIT) { - const struct net_device_ops *ops = dev->netdev_ops; - struct ifreq ifr; - struct ip_tunnel_parm p; - - err = -EADDRNOTAVAIL; - if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) - goto err_exit; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; - p.iph.saddr = 0; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPV6; - p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); - - set_fs(KERNEL_DS); - err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - } else - err = -EOPNOTSUPP; - - if (err == 0) { - err = -ENOBUFS; - dev = __dev_get_by_name(net, p.name); - if (!dev) - goto err_exit; - err = dev_open(dev, NULL); - } - } -#endif - -err_exit: + if (dev && dev->type == ARPHRD_SIT) + err = addrconf_set_sit_dstaddr(net, dev, &ireq); rtnl_unlock(); return err; } @@ -6991,9 +6976,26 @@ static int __net_init addrconf_init_net(struct net *net) goto err_alloc_dflt; if (IS_ENABLED(CONFIG_SYSCTL) && - sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) { - memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); - memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt)); + !net_eq(net, &init_net)) { + switch (sysctl_devconf_inherit_init_net) { + case 1: /* copy from init_net */ + memcpy(all, init_net.ipv6.devconf_all, + sizeof(ipv6_devconf)); + memcpy(dflt, init_net.ipv6.devconf_dflt, + sizeof(ipv6_devconf_dflt)); + break; + case 3: /* copy from the current netns */ + memcpy(all, current->nsproxy->net_ns->ipv6.devconf_all, + sizeof(ipv6_devconf)); + memcpy(dflt, + current->nsproxy->net_ns->ipv6.devconf_dflt, + sizeof(ipv6_devconf_dflt)); + break; + case 0: + case 2: + /* use compiled values */ + break; + } } /* these will be inherited by all namespaces */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3b6fcc0c321a..0625a97a8894 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -60,6 +60,7 @@ #include <net/calipso.h> #include <net/seg6.h> #include <net/rpl.h> +#include <net/compat.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -545,21 +546,25 @@ EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); switch (cmd) { case SIOCADDRT: - case SIOCDELRT: - - return ipv6_route_ioctl(net, cmd, (void __user *)arg); + case SIOCDELRT: { + struct in6_rtmsg rtmsg; + if (copy_from_user(&rtmsg, argp, sizeof(rtmsg))) + return -EFAULT; + return ipv6_route_ioctl(net, cmd, &rtmsg); + } case SIOCSIFADDR: - return addrconf_add_ifaddr(net, (void __user *) arg); + return addrconf_add_ifaddr(net, argp); case SIOCDIFADDR: - return addrconf_del_ifaddr(net, (void __user *) arg); + return addrconf_del_ifaddr(net, argp); case SIOCSIFDSTADDR: - return addrconf_set_dstaddr(net, (void __user *) arg); + return addrconf_set_dstaddr(net, argp); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; @@ -570,6 +575,56 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet6_ioctl); +#ifdef CONFIG_COMPAT +struct compat_in6_rtmsg { + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_src; + struct in6_addr rtmsg_gateway; + u32 rtmsg_type; + u16 rtmsg_dst_len; + u16 rtmsg_src_len; + u32 rtmsg_metric; + u32 rtmsg_info; + u32 rtmsg_flags; + s32 rtmsg_ifindex; +}; + +static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_in6_rtmsg __user *ur) +{ + struct in6_rtmsg rt; + + if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, + 3 * sizeof(struct in6_addr)) || + get_user(rt.rtmsg_type, &ur->rtmsg_type) || + get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) || + get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) || + get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || + get_user(rt.rtmsg_info, &ur->rtmsg_info) || + get_user(rt.rtmsg_flags, &ur->rtmsg_flags) || + get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex)) + return -EFAULT; + + + return ipv6_route_ioctl(sock_net(sk), cmd, &rt); +} + +int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return inet6_compat_routing_ioctl(sk, cmd, argp); + default: + return -ENOIOCTLCMD; + } +} +EXPORT_SYMBOL_GPL(inet6_compat_ioctl); +#endif /* CONFIG_COMPAT */ + INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -631,6 +686,7 @@ const struct proto_ops inet6_stream_ops = { .read_sock = tcp_read_sock, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif @@ -659,6 +715,7 @@ const struct proto_ops inet6_dgram_ops = { .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4703b09808d0..821d96c720b9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -89,6 +89,11 @@ struct ip6_tnl_net { struct ip6_tnl __rcu *collect_md_tun; }; +static inline int ip6_tnl_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + static struct net_device_stats *ip6_get_stats(struct net_device *dev) { struct pcpu_sw_netstats tmp, sum = { 0 }; @@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } +static int +mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + __u32 rel_info = ntohl(info); + int err, rel_msg = 0; + u8 rel_type = type; + u8 rel_code = code; + + err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code, + &rel_msg, &rel_info, offset); + return err; +} + static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) @@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, return IP6_ECN_decapsulate(ipv6h, skb); } +static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) +{ + /* ECN is not supported in AF_MPLS */ + return 0; +} + __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) @@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = { .proto = htons(ETH_P_IP), }; +static const struct tnl_ptk_info tpi_mpls = { + /* no tunnel info required for mplsip6. */ + .proto = htons(ETH_P_MPLS_UC), +}; + static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, const struct tnl_ptk_info *tpi, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, @@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb) ip6ip6_dscp_ecn_decapsulate); } +static int mplsip6_rcv(struct sk_buff *skb) +{ + return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls, + mplsip6_dscp_ecn_decapsulate); +} + struct ipv6_tel_txoption { struct ipv6_txoptions ops; __u8 dst_opt[8]; @@ -1232,6 +1270,8 @@ route_lookup: ipv6_push_frag_opts(skb, &opt.ops, &proto); } + skb_set_inner_ipproto(skb, proto); + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1253,22 +1293,22 @@ tx_err_dst_release: EXPORT_SYMBOL(ip6_tnl_xmit); static inline int -ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, + u8 protocol) { struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h; const struct iphdr *iph; int encap_limit = -1; + __u16 offset; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield, orig_dsfield; __u32 mtu; u8 tproto; int err; - iph = ip_hdr(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - tproto = READ_ONCE(t->parms.proto); - if (tproto != IPPROTO_IPIP && tproto != 0) + if (tproto != protocol && tproto != 0) return -1; if (t->parms.collect_md) { @@ -1281,129 +1321,100 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) return -1; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_proto = protocol; fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + break; + default: + orig_dsfield = dsfield; + break; + } } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; + if (protocol == IPPROTO_IPV6) { + offset = ip6_tnl_parse_tlv_enc_lim(skb, + skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have + * reallocated skb->head + */ + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv4_get_dsfield(iph); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - } - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - - if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) - return -1; - - skb_set_inner_ipproto(skb, IPPROTO_IPIP); - - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPIP); - if (err != 0) { - /* XXX: send ICMP error even if DF is not set. */ - if (err == -EMSGSIZE) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - return -1; - } - - return 0; -} - -static inline int -ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h; - int encap_limit = -1; - __u16 offset; - struct flowi6 fl6; - __u8 dsfield; - __u32 mtu; - u8 tproto; - int err; - - ipv6h = ipv6_hdr(skb); - tproto = READ_ONCE(t->parms.proto); - if ((tproto != IPPROTO_IPV6 && tproto != 0) || - ip6_tnl_addr_conflict(t, ipv6h)) - return -1; - - if (t->parms.collect_md) { - struct ip_tunnel_info *tun_info; - const struct ip_tunnel_key *key; - - tun_info = skb_tunnel_info(skb); - if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || - ip_tunnel_info_af(tun_info) != AF_INET6)) - return -1; - key = &tun_info->key; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; - fl6.saddr = key->u.ipv6.src; - fl6.daddr = key->u.ipv6.dst; - fl6.flowlabel = key->label; - dsfield = key->tos; - } else { - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ - ipv6h = ipv6_hdr(skb); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - - tel = (void *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { - encap_limit = t->parms.encap_limit; } memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_proto = protocol; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv6_get_dsfield(ipv6h); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else fl6.flowi6_mark = t->parms.fwmark; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + break; + default: + orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo); + break; + } } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPV6); + protocol); if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + switch (protocol) { + case IPPROTO_IPIP: + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + break; + case IPPROTO_IPV6: + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + break; + default: + break; + } return -1; } @@ -1415,6 +1426,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; + u8 ipproto; int ret; if (!pskb_inet_may_pull(skb)) @@ -1422,15 +1434,21 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): - ret = ip4ip6_tnl_xmit(skb, dev); + ipproto = IPPROTO_IPIP; break; case htons(ETH_P_IPV6): - ret = ip6ip6_tnl_xmit(skb, dev); + if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb))) + goto tx_err; + ipproto = IPPROTO_IPV6; + break; + case htons(ETH_P_MPLS_UC): + ipproto = IPPROTO_MPLS; break; default: goto tx_err; } + ret = ipxip6_tnl_xmit(skb, dev, ipproto); if (ret < 0) goto tx_err; @@ -2218,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; +static struct xfrm6_tunnel mplsip6_handler __read_mostly = { + .handler = mplsip6_rcv, + .err_handler = mplsip6_err, + .priority = 1, +}; + static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -2332,6 +2356,15 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + + if (ip6_tnl_mpls_supported()) { + err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS); + if (err < 0) { + pr_err("%s: can't register mplsip6\n", __func__); + goto out_mplsip6; + } + } + err = rtnl_link_register(&ip6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -2339,6 +2372,9 @@ static int __init ip6_tunnel_init(void) return 0; rtnl_link_failed: + if (ip6_tnl_mpls_supported()) + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS); +out_mplsip6: xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); @@ -2361,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void) if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) pr_info("%s: can't deregister ip6ip6\n", __func__); + if (ip6_tnl_mpls_supported() && + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS)) + pr_info("%s: can't deregister mplsip6\n", __func__); unregister_pernet_device(&ip6_tnl_net_ops); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a0e50cc57e54..e10258c2210e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -136,6 +136,41 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int do_ipv6_mcast_group_source(struct sock *sk, int optname, + struct group_source_req *greqs) +{ + int omode, add; + + if (greqs->gsr_group.ss_family != AF_INET6 || + greqs->gsr_source.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + if (optname == MCAST_BLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 1; + } else if (optname == MCAST_UNBLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 0; + } else if (optname == MCAST_JOIN_SOURCE_GROUP) { + struct sockaddr_in6 *psin6; + int retv; + + psin6 = (struct sockaddr_in6 *)&greqs->gsr_group; + retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface, + &psin6->sin6_addr, + MCAST_INCLUDE); + /* prior join w/ different source is ok */ + if (retv && retv != -EADDRINUSE) + return retv; + omode = MCAST_INCLUDE; + add = 1; + } else /* MCAST_LEAVE_SOURCE_GROUP */ { + omode = MCAST_INCLUDE; + add = 0; + } + return ip6_mc_source(add, omode, sk, greqs); +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -715,7 +750,6 @@ done: case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; - int omode, add; if (optlen < sizeof(struct group_source_req)) goto e_inval; @@ -723,34 +757,7 @@ done: retv = -EFAULT; break; } - if (greqs.gsr_group.ss_family != AF_INET6 || - greqs.gsr_source.ss_family != AF_INET6) { - retv = -EADDRNOTAVAIL; - break; - } - if (optname == MCAST_BLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 1; - } else if (optname == MCAST_UNBLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 0; - } else if (optname == MCAST_JOIN_SOURCE_GROUP) { - struct sockaddr_in6 *psin6; - - psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; - retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface, - &psin6->sin6_addr, - MCAST_INCLUDE); - /* prior join w/ different source is ok */ - if (retv && retv != -EADDRINUSE) - break; - omode = MCAST_INCLUDE; - add = 1; - } else /* MCAST_LEAVE_SOURCE_GROUP */ { - omode = MCAST_INCLUDE; - add = 0; - } - retv = ip6_mc_source(add, omode, sk, &greqs); + retv = do_ipv6_mcast_group_source(sk, optname, &greqs); break; } case MCAST_MSFILTER: @@ -780,7 +787,7 @@ done: retv = -EINVAL; break; } - retv = ip6_mc_msfilter(sk, gsf); + retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); kfree(gsf); break; @@ -973,9 +980,110 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) - return compat_mc_setsockopt(sk, level, optname, optval, optlen, - ipv6_setsockopt); + switch (optname) { + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + { + struct compat_group_req __user *gr32 = (void __user *)optval; + struct group_req greq; + struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + + if (get_user(greq.gr_interface, &gr32->gr_interface) || + copy_from_user(&greq.gr_group, &gr32->gr_group, + sizeof(greq.gr_group))) + return -EFAULT; + + if (greq.gr_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ipv6_sock_mc_join(sk, greq.gr_interface, + &psin6->sin6_addr); + else + err = ipv6_sock_mc_drop(sk, greq.gr_interface, + &psin6->sin6_addr); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + { + struct compat_group_source_req __user *gsr32 = (void __user *)optval; + struct group_source_req greqs; + + if (optlen < sizeof(struct compat_group_source_req)) + return -EINVAL; + + if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || + copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, + sizeof(greqs.gsr_group)) || + copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, + sizeof(greqs.gsr_source))) + return -EFAULT; + + rtnl_lock(); + lock_sock(sk); + err = do_ipv6_mcast_group_source(sk, optname, &greqs); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_MSFILTER: + { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + void *p; + int n; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + if (copy_from_user(gf32, optval, optlen)) { + err = -EFAULT; + goto mc_msf_out; + } + + n = gf32->gf_numsrc; + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n >= 0x1ffffffU || + n > sysctl_mld_max_msf) { + err = -ENOBUFS; + goto mc_msf_out; + } + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { + err = -EINVAL; + goto mc_msf_out; + } + + rtnl_lock(); + lock_sock(sk); + err = ip6_mc_msfilter(sk, &(struct group_filter){ + .gf_interface = gf32->gf_interface, + .gf_group = gf32->gf_group, + .gf_fmode = gf32->gf_fmode, + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); +mc_msf_out: + kfree(p); + return err; + } + } err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER @@ -1048,18 +1156,28 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case MCAST_MSFILTER: { + struct group_filter __user *p = (void __user *)optval; struct group_filter gsf; + const int size0 = offsetof(struct group_filter, gf_slist); + int num; int err; - if (len < GROUP_FILTER_SIZE(0)) + if (len < size0) return -EINVAL; - if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) + if (copy_from_user(&gsf, p, size0)) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; + num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, - (struct group_filter __user *)optval, optlen); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + if (!err) { + if (num > gsf.gf_numsrc) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; + } release_sock(sk); return err; } @@ -1428,9 +1546,44 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname == MCAST_MSFILTER) - return compat_mc_getsockopt(sk, level, optname, optval, optlen, - ipv6_getsockopt); + if (optname == MCAST_MSFILTER) { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = (void __user *)optval; + struct compat_group_filter gf32; + struct group_filter gf; + int ulen, err; + int num; + + if (get_user(ulen, optlen)) + return -EFAULT; + + if (ulen < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + if (gf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + lock_sock(sk); + err = ip6_mc_msfget(sk, &gf, p->gf_slist); + release_sock(sk); + if (err) + return err; + if (num > gf.gf_numsrc) + num = gf.gf_numsrc; + ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); + if (put_user(ulen, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; + } err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index eaa4c2cc2fbb..7e12d2114158 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -457,7 +457,8 @@ done: return err; } -int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) +int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, + struct sockaddr_storage *list) { const struct in6_addr *group; struct ipv6_mc_socklist *pmc; @@ -509,10 +510,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) goto done; } newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc; - for (i = 0; i < newpsl->sl_count; ++i) { + for (i = 0; i < newpsl->sl_count; ++i, ++list) { struct sockaddr_in6 *psin6; - psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i]; + psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } err = ip6_mc_add_src(idev, group, gsf->gf_fmode, @@ -547,7 +548,7 @@ done: } int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen) + struct sockaddr_storage *p) { int err, i, count, copycount; const struct in6_addr *group; @@ -592,14 +593,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - if (put_user(GROUP_FILTER_SIZE(copycount), optlen) || - copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { - return -EFAULT; - } /* changes to psl require the socket lock, and a write lock * on pmc->sflock. We have the socket lock so reading here is safe. */ - for (i = 0; i < copycount; i++) { + for (i = 0; i < copycount; i++, p++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -607,7 +604,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, memset(&ss, 0, sizeof(ss)); psin6->sin6_family = AF_INET6; psin6->sin6_addr = psl->sl_addr[i]; - if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss))) + if (copy_to_user(p, &ss, sizeof(ss))) return -EFAULT; } return 0; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0028aa1d7869..8ef5a7b30524 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1377,6 +1377,7 @@ const struct proto_ops inet6_sockraw_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a8b4add0b545..82cbb46a2a4f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3421,6 +3421,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, #ifdef CONFIG_IPV6_ROUTER_PREF fib6_nh->last_probe = jiffies; #endif + if (cfg->fc_is_fdb) { + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; + fib6_nh->fib_nh_gw_family = AF_INET6; + return 0; + } err = -ENODEV; if (cfg->fc_ifindex) { @@ -4336,41 +4341,29 @@ static void rtmsg_to_fib6_config(struct net *net, }; } -int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) { struct fib6_config cfg; - struct in6_rtmsg rtmsg; int err; - switch (cmd) { - case SIOCADDRT: /* Add a route */ - case SIOCDELRT: /* Delete a route */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = copy_from_user(&rtmsg, arg, - sizeof(struct in6_rtmsg)); - if (err) - return -EFAULT; + if (cmd != SIOCADDRT && cmd != SIOCDELRT) + return -EINVAL; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; - rtmsg_to_fib6_config(net, &rtmsg, &cfg); + rtmsg_to_fib6_config(net, rtmsg, &cfg); - rtnl_lock(); - switch (cmd) { - case SIOCADDRT: - err = ip6_route_add(&cfg, GFP_KERNEL, NULL); - break; - case SIOCDELRT: - err = ip6_route_del(&cfg, NULL); - break; - default: - err = -EINVAL; - } - rtnl_unlock(); - - return err; + rtnl_lock(); + switch (cmd) { + case SIOCADDRT: + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); + break; + case SIOCDELRT: + err = ip6_route_del(&cfg, NULL); + break; } - - return -EINVAL; + rtnl_unlock(); + return err; } /* diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 98954830c40b..1fbb4dfbb191 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -83,6 +83,13 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; +static inline struct sit_net *dev_to_sit_net(struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + return net_generic(t->net, sit_net_id); +} + /* * Must be invoked with rcu_read_lock */ @@ -291,14 +298,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, - struct ip_tunnel_prl __user *a) +static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) { + struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + if (copy_from_user(&kprl, a, sizeof(kprl))) return -EFAULT; cmax = kprl.datalen / sizeof(kprl); @@ -441,6 +452,35 @@ out: return err; } +static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_prl prl; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + + if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + return -EFAULT; + + switch (cmd) { + case SIOCDELPRL: + err = ipip6_tunnel_del_prl(t, &prl); + break; + case SIOCADDPRL: + case SIOCCHGPRL: + err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); + break; + } + dst_cache_reset(&t->dst_cache); + netdev_state_change(dev); + return err; +} + static int isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) { @@ -1151,7 +1191,53 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, netdev_state_change(t->dev); return 0; } -#endif + +static int +ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + struct ip_tunnel_parm p; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + t = ipip6_tunnel_locate(t->net, &p, 0); + } + if (!t) + t = netdev_priv(dev); + + ip6rd.prefix = t->ip6rd.prefix; + ip6rd.relay_prefix = t->ip6rd.relay_prefix; + ip6rd.prefixlen = t->ip6rd.prefixlen; + ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; + if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd))) + return -EFAULT; + return 0; +} + +static int +ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd))) + return -EFAULT; + + if (cmd != SIOCDEL6RD) { + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) + return err; + } else + ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev)); + return 0; +} + +#endif /* CONFIG_IPV6_SIT_6RD */ static bool ipip6_valid_ip_proto(u8 ipproto) { @@ -1164,185 +1250,145 @@ static bool ipip6_valid_ip_proto(u8 ipproto) } static int -ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p) +{ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!ipip6_valid_ip_proto(p->iph.protocol)) + return -EINVAL; + if (p->iph.version != 4 || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) + return -EINVAL; + + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + return 0; +} + +static int +ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p) { - int err = 0; - struct ip_tunnel_parm p; - struct ip_tunnel_prl prl; struct ip_tunnel *t = netdev_priv(dev); - struct net *net = t->net; - struct sit_net *sitn = net_generic(net, sit_net_id); -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd ip6rd; -#endif - switch (cmd) { - case SIOCGETTUNNEL: -#ifdef CONFIG_IPV6_SIT_6RD - case SIOCGET6RD: -#endif - if (dev == sitn->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - t = netdev_priv(dev); - } + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + return 0; +} - err = -EFAULT; - if (cmd == SIOCGETTUNNEL) { - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, - sizeof(p))) - goto done; -#ifdef CONFIG_IPV6_SIT_6RD +static int +ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 1); + if (!t) + return -ENOBUFS; + return 0; +} + +static int +ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 0); + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (!t) + return -ENOENT; + } else { + if (t) { + if (t->dev != dev) + return -EEXIST; } else { - ip6rd.prefix = t->ip6rd.prefix; - ip6rd.relay_prefix = t->ip6rd.relay_prefix; - ip6rd.prefixlen = t->ip6rd.prefixlen; - ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, - sizeof(ip6rd))) - goto done; -#endif + if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) || + (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr)) + return -EINVAL; + t = netdev_priv(dev); } - err = 0; - break; - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; + ipip6_tunnel_update(t, p, t->fwmark); + } - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (!ipip6_valid_ip_proto(p.iph.protocol)) - goto done; - if (p.iph.version != 4 || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || - (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { - err = -EINVAL; - break; - } - t = netdev_priv(dev); - } + return 0; +} - ipip6_tunnel_update(t, &p, t->fwmark); - } +static int +ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); - if (t) { - err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + return -ENOENT; + if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev)) + return -EPERM; + dev = t->dev; + } + unregister_netdevice(dev); + return 0; +} +static int +ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + return ipip6_tunnel_get(dev, p); + case SIOCADDTUNNEL: + return ipip6_tunnel_add(dev, p); + case SIOCCHGTUNNEL: + return ipip6_tunnel_change(dev, p); case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == sitn->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - goto done; - err = -EPERM; - if (t == netdev_priv(sitn->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; + return ipip6_tunnel_del(dev, p); + default: + return -EINVAL; + } +} +static int +ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + case SIOCDELTUNNEL: + return ip_tunnel_ioctl(dev, ifr, cmd); case SIOCGETPRL: - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); - break; - + return ipip6_tunnel_get_prl(dev, ifr); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = -EFAULT; - if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) - goto done; - - switch (cmd) { - case SIOCDELPRL: - err = ipip6_tunnel_del_prl(t, &prl); - break; - case SIOCADDPRL: - case SIOCCHGPRL: - err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); - break; - } - dst_cache_reset(&t->dst_cache); - netdev_state_change(dev); - break; - + return ipip6_tunnel_prl_ctl(dev, ifr, cmd); #ifdef CONFIG_IPV6_SIT_6RD + case SIOCGET6RD: + return ipip6_tunnel_get6rd(dev, ifr); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, - sizeof(ip6rd))) - goto done; - - if (cmd != SIOCDEL6RD) { - err = ipip6_tunnel_update_6rd(t, &ip6rd); - if (err < 0) - goto done; - } else - ipip6_tunnel_clone_6rd(dev, sitn); - - err = 0; - break; + return ipip6_tunnel_6rdctl(dev, ifr, cmd); #endif - default: - err = -EINVAL; + return -EINVAL; } - -done: - return err; } static const struct net_device_ops ipip6_netdev_ops = { @@ -1352,6 +1398,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; static void ipip6_dev_free(struct net_device *dev) diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 21e7b95ddbfa..06c02ebe6b9b 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -21,8 +21,14 @@ static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly; static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly; +static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly; static DEFINE_MUTEX(tunnel6_mutex); +static inline int xfrm6_tunnel_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) { struct xfrm6_tunnel __rcu **pprev; @@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t->priority > priority) @@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t == handler) { @@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) } } +err: mutex_unlock(&tunnel6_mutex); synchronize_net(); @@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister); handler != NULL; \ handler = rcu_dereference(handler->next)) \ +static int tunnelmpls6_rcv(struct sk_buff *skb) +{ + struct xfrm6_tunnel *handler; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto drop; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->handler(skb)) + return 0; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} + static int tunnel6_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; @@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return -ENOENT; } +static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_tunnel *handler; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + return 0; + + return -ENOENT; +} + static const struct inet6_protocol tunnel6_protocol = { .handler = tunnel6_rcv, .err_handler = tunnel6_err, @@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +static const struct inet6_protocol tunnelmpls6_protocol = { + .handler = tunnelmpls6_rcv, + .err_handler = tunnelmpls6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + static int __init tunnel6_init(void) { if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) { @@ -169,6 +238,13 @@ static int __init tunnel6_init(void) inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); return -EAGAIN; } + if (xfrm6_tunnel_mpls_supported() && + inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) { + pr_err("%s: can't add protocol\n", __func__); + inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); + inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); + return -EAGAIN; + } return 0; } @@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void) pr_err("%s: can't remove protocol\n", __func__); if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) pr_err("%s: can't remove protocol\n", __func__); + if (xfrm6_tunnel_mpls_supported() && + inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) + pr_err("%s: can't remove protocol\n", __func__); } module_init(tunnel6_init); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c4bdcbc84b07..ee0add15497d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/types.h> +#include <linux/limits.h> #include <linux/list.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -36,8 +37,6 @@ static char iucv_userid[80]; -static const struct proto_ops iucv_sock_ops; - static struct proto iucv_proto = { .name = "AF_IUCV", .owner = THIS_MODULE, @@ -85,14 +84,11 @@ do { \ __ret; \ }) +static struct sock *iucv_accept_dequeue(struct sock *parent, + struct socket *newsock); static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); -static void iucv_sever_path(struct sock *, int); -static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); -static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, - struct sk_buff *skb, u8 flags); static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); /* Call Back functions */ @@ -127,110 +123,6 @@ static inline void low_nmcpy(unsigned char *dst, char *src) memcpy(&dst[8], src, 8); } -static int afiucv_pm_prepare(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_prepare\n"); -#endif - return 0; -} - -static void afiucv_pm_complete(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_complete\n"); -#endif -} - -/** - * afiucv_pm_freeze() - Freeze PM callback - * @dev: AFIUCV dummy device - * - * Sever all established IUCV communication pathes - */ -static int afiucv_pm_freeze(struct device *dev) -{ - struct iucv_sock *iucv; - struct sock *sk; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_freeze\n"); -#endif - read_lock(&iucv_sk_list.lock); - sk_for_each(sk, &iucv_sk_list.head) { - iucv = iucv_sk(sk); - switch (sk->sk_state) { - case IUCV_DISCONN: - case IUCV_CLOSING: - case IUCV_CONNECTED: - iucv_sever_path(sk, 0); - break; - case IUCV_OPEN: - case IUCV_BOUND: - case IUCV_LISTEN: - case IUCV_CLOSED: - default: - break; - } - skb_queue_purge(&iucv->send_skb_q); - skb_queue_purge(&iucv->backlog_skb_q); - } - read_unlock(&iucv_sk_list.lock); - return 0; -} - -/** - * afiucv_pm_restore_thaw() - Thaw and restore PM callback - * @dev: AFIUCV dummy device - * - * socket clean up after freeze - */ -static int afiucv_pm_restore_thaw(struct device *dev) -{ - struct sock *sk; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_restore_thaw\n"); -#endif - read_lock(&iucv_sk_list.lock); - sk_for_each(sk, &iucv_sk_list.head) { - switch (sk->sk_state) { - case IUCV_CONNECTED: - sk->sk_err = EPIPE; - sk->sk_state = IUCV_DISCONN; - sk->sk_state_change(sk); - break; - case IUCV_DISCONN: - case IUCV_CLOSING: - case IUCV_LISTEN: - case IUCV_BOUND: - case IUCV_OPEN: - default: - break; - } - } - read_unlock(&iucv_sk_list.lock); - return 0; -} - -static const struct dev_pm_ops afiucv_pm_ops = { - .prepare = afiucv_pm_prepare, - .complete = afiucv_pm_complete, - .freeze = afiucv_pm_freeze, - .thaw = afiucv_pm_restore_thaw, - .restore = afiucv_pm_restore_thaw, -}; - -static struct device_driver af_iucv_driver = { - .owner = THIS_MODULE, - .name = "afiucv", - .bus = NULL, - .pm = &afiucv_pm_ops, -}; - -/* dummy device used as trigger for PM functions */ -static struct device *af_iucv_dev; - /** * iucv_msg_length() - Returns the length of an iucv message. * @msg: Pointer to struct iucv_message, MUST NOT be NULL @@ -435,6 +327,20 @@ static void iucv_sock_cleanup_listen(struct sock *parent) parent->sk_state = IUCV_CLOSED; } +static void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_add_node(sk, &l->head); + write_unlock_bh(&l->lock); +} + +static void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_del_node_init(sk); + write_unlock_bh(&l->lock); +} + /* Kill socket (only if zapped and orphaned) */ static void iucv_sock_kill(struct sock *sk) { @@ -607,53 +513,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, return sk; } -/* Create an IUCV socket */ -static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - if (protocol && protocol != PF_IUCV) - return -EPROTONOSUPPORT; - - sock->state = SS_UNCONNECTED; - - switch (sock->type) { - case SOCK_STREAM: - sock->ops = &iucv_sock_ops; - break; - case SOCK_SEQPACKET: - /* currently, proto ops can handle both sk types */ - sock->ops = &iucv_sock_ops; - break; - default: - return -ESOCKTNOSUPPORT; - } - - sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); - if (!sk) - return -ENOMEM; - - iucv_sock_init(sk, NULL); - - return 0; -} - -void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) -{ - write_lock_bh(&l->lock); - sk_add_node(sk, &l->head); - write_unlock_bh(&l->lock); -} - -void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) -{ - write_lock_bh(&l->lock); - sk_del_node_init(sk); - write_unlock_bh(&l->lock); -} - -void iucv_accept_enqueue(struct sock *parent, struct sock *sk) +static void iucv_accept_enqueue(struct sock *parent, struct sock *sk) { unsigned long flags; struct iucv_sock *par = iucv_sk(parent); @@ -666,7 +526,7 @@ void iucv_accept_enqueue(struct sock *parent, struct sock *sk) sk_acceptq_added(parent); } -void iucv_accept_unlink(struct sock *sk) +static void iucv_accept_unlink(struct sock *sk) { unsigned long flags; struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent); @@ -679,7 +539,8 @@ void iucv_accept_unlink(struct sock *sk) sock_put(sk); } -struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) +static struct sock *iucv_accept_dequeue(struct sock *parent, + struct socket *newsock) { struct iucv_sock *isk, *n; struct sock *sk; @@ -1100,7 +961,6 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* initialize defaults */ cmsg_done = 0; /* check for duplicate headers */ - txmsg.class = 0; /* iterate over control messages */ for_each_cmsghdr(cmsg, msg) { @@ -1511,8 +1371,8 @@ static inline __poll_t iucv_accept_poll(struct sock *parent) return 0; } -__poll_t iucv_sock_poll(struct file *file, struct socket *sock, - poll_table *wait) +static __poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; @@ -1664,7 +1524,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, switch (sk->sk_state) { case IUCV_OPEN: case IUCV_BOUND: - if (val < 1 || val > (u16)(~0)) + if (val < 1 || val > U16_MAX) rc = -EINVAL; else iucv->msglimit = val; @@ -2396,6 +2256,35 @@ static const struct proto_ops iucv_sock_ops = { .getsockopt = iucv_sock_getsockopt, }; +static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + + if (protocol && protocol != PF_IUCV) + return -EPROTONOSUPPORT; + + sock->state = SS_UNCONNECTED; + + switch (sock->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + /* currently, proto ops can handle both sk types */ + sock->ops = &iucv_sock_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); + if (!sk) + return -ENOMEM; + + iucv_sock_init(sk, NULL); + + return 0; +} + static const struct net_proto_family iucv_sock_family_ops = { .family = AF_IUCV, .owner = THIS_MODULE, @@ -2409,45 +2298,11 @@ static struct packet_type iucv_packet_type = { static int afiucv_iucv_init(void) { - int err; - - err = pr_iucv->iucv_register(&af_iucv_handler, 0); - if (err) - goto out; - /* establish dummy device */ - af_iucv_driver.bus = pr_iucv->bus; - err = driver_register(&af_iucv_driver); - if (err) - goto out_iucv; - af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); - if (!af_iucv_dev) { - err = -ENOMEM; - goto out_driver; - } - dev_set_name(af_iucv_dev, "af_iucv"); - af_iucv_dev->bus = pr_iucv->bus; - af_iucv_dev->parent = pr_iucv->root; - af_iucv_dev->release = (void (*)(struct device *))kfree; - af_iucv_dev->driver = &af_iucv_driver; - err = device_register(af_iucv_dev); - if (err) - goto out_iucv_dev; - return 0; - -out_iucv_dev: - put_device(af_iucv_dev); -out_driver: - driver_unregister(&af_iucv_driver); -out_iucv: - pr_iucv->iucv_unregister(&af_iucv_handler, 0); -out: - return err; + return pr_iucv->iucv_register(&af_iucv_handler, 0); } static void afiucv_iucv_exit(void) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); pr_iucv->iucv_unregister(&af_iucv_handler, 0); } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 9a2d023842fe..19250a0c85d3 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -67,32 +67,9 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv) return 0; } -enum iucv_pm_states { - IUCV_PM_INITIAL = 0, - IUCV_PM_FREEZING = 1, - IUCV_PM_THAWING = 2, - IUCV_PM_RESTORING = 3, -}; -static enum iucv_pm_states iucv_pm_state; - -static int iucv_pm_prepare(struct device *); -static void iucv_pm_complete(struct device *); -static int iucv_pm_freeze(struct device *); -static int iucv_pm_thaw(struct device *); -static int iucv_pm_restore(struct device *); - -static const struct dev_pm_ops iucv_pm_ops = { - .prepare = iucv_pm_prepare, - .complete = iucv_pm_complete, - .freeze = iucv_pm_freeze, - .thaw = iucv_pm_thaw, - .restore = iucv_pm_restore, -}; - struct bus_type iucv_bus = { .name = "iucv", .match = iucv_bus_match, - .pm = &iucv_pm_ops, }; EXPORT_SYMBOL(iucv_bus); @@ -435,31 +412,6 @@ static void iucv_block_cpu(void *data) } /** - * iucv_block_cpu_almost - * @data: unused - * - * Allow connection-severed interrupts only on this cpu. - */ -static void iucv_block_cpu_almost(void *data) -{ - int cpu = smp_processor_id(); - union iucv_param *parm; - - /* Allow iucv control interrupts only */ - parm = iucv_param_irq[cpu]; - memset(parm, 0, sizeof(union iucv_param)); - parm->set_mask.ipmask = 0x08; - iucv_call_b2f0(IUCV_SETMASK, parm); - /* Allow iucv-severed interrupt only */ - memset(parm, 0, sizeof(union iucv_param)); - parm->set_mask.ipmask = 0x20; - iucv_call_b2f0(IUCV_SETCONTROLMASK, parm); - - /* Clear indication that iucv interrupts are allowed for this cpu. */ - cpumask_clear_cpu(cpu, &iucv_irq_cpumask); -} - -/** * iucv_declare_cpu * @data: unused * @@ -1834,146 +1786,6 @@ static void iucv_external_interrupt(struct ext_code ext_code, spin_unlock(&iucv_queue_lock); } -static int iucv_pm_prepare(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_INFO "iucv_pm_prepare\n"); -#endif - if (dev->driver && dev->driver->pm && dev->driver->pm->prepare) - rc = dev->driver->pm->prepare(dev); - return rc; -} - -static void iucv_pm_complete(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_INFO "iucv_pm_complete\n"); -#endif - if (dev->driver && dev->driver->pm && dev->driver->pm->complete) - dev->driver->pm->complete(dev); -} - -/** - * iucv_path_table_empty() - determine if iucv path table is empty - * - * Returns 0 if there are still iucv pathes defined - * 1 if there are no iucv pathes defined - */ -static int iucv_path_table_empty(void) -{ - int i; - - for (i = 0; i < iucv_max_pathid; i++) { - if (iucv_path_table[i]) - return 0; - } - return 1; -} - -/** - * iucv_pm_freeze() - Freeze PM callback - * @dev: iucv-based device - * - * disable iucv interrupts - * invoke callback function of the iucv-based driver - * shut down iucv, if no iucv-pathes are established anymore - */ -static int iucv_pm_freeze(struct device *dev) -{ - int cpu; - struct iucv_irq_list *p, *n; - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_freeze\n"); -#endif - if (iucv_pm_state != IUCV_PM_FREEZING) { - for_each_cpu(cpu, &iucv_irq_cpumask) - smp_call_function_single(cpu, iucv_block_cpu_almost, - NULL, 1); - cancel_work_sync(&iucv_work); - list_for_each_entry_safe(p, n, &iucv_work_queue, list) { - list_del_init(&p->list); - iucv_sever_pathid(p->data.ippathid, - iucv_error_no_listener); - kfree(p); - } - } - iucv_pm_state = IUCV_PM_FREEZING; - if (dev->driver && dev->driver->pm && dev->driver->pm->freeze) - rc = dev->driver->pm->freeze(dev); - if (iucv_path_table_empty()) - iucv_disable(); - return rc; -} - -/** - * iucv_pm_thaw() - Thaw PM callback - * @dev: iucv-based device - * - * make iucv ready for use again: allocate path table, declare interrupt buffers - * and enable iucv interrupts - * invoke callback function of the iucv-based driver - */ -static int iucv_pm_thaw(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_thaw\n"); -#endif - iucv_pm_state = IUCV_PM_THAWING; - if (!iucv_path_table) { - rc = iucv_enable(); - if (rc) - goto out; - } - if (cpumask_empty(&iucv_irq_cpumask)) { - if (iucv_nonsmp_handler) - /* enable interrupts on one cpu */ - iucv_allow_cpu(NULL); - else - /* enable interrupts on all cpus */ - iucv_setmask_mp(); - } - if (dev->driver && dev->driver->pm && dev->driver->pm->thaw) - rc = dev->driver->pm->thaw(dev); -out: - return rc; -} - -/** - * iucv_pm_restore() - Restore PM callback - * @dev: iucv-based device - * - * make iucv ready for use again: allocate path table, declare interrupt buffers - * and enable iucv interrupts - * invoke callback function of the iucv-based driver - */ -static int iucv_pm_restore(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_restore %p\n", iucv_path_table); -#endif - if ((iucv_pm_state != IUCV_PM_RESTORING) && iucv_path_table) - pr_warn("Suspending Linux did not completely close all IUCV connections\n"); - iucv_pm_state = IUCV_PM_RESTORING; - if (cpumask_empty(&iucv_irq_cpumask)) { - rc = iucv_query_maxconn(); - rc = iucv_enable(); - if (rc) - goto out; - } - if (dev->driver && dev->driver->pm && dev->driver->pm->restore) - rc = dev->driver->pm->restore(dev); -out: - return rc; -} - struct iucv_interface iucv_if = { .message_receive = iucv_message_receive, .__message_receive = __iucv_message_receive, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d148766f40d1..fdfef926c591 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -758,6 +758,7 @@ static const struct proto_ops l2tp_ip6_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index a42e4ed5ab0e..fd30ea61336e 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1593,7 +1593,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, dev->type == ARPHRD_IPGRE || dev->type == ARPHRD_IP6GRE || dev->type == ARPHRD_SIT || - dev->type == ARPHRD_TUNNEL) { + dev->type == ARPHRD_TUNNEL || + dev->type == ARPHRD_TUNNEL6) { mdev = mpls_add_dev(dev); if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 45497af23906..ece6f92cf7d1 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -516,7 +516,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return ret; } - ack_size = TCPOLEN_MPTCP_DSS_ACK64; + if (subflow->use_64bit_ack) { + ack_size = TCPOLEN_MPTCP_DSS_ACK64; + opts->ext_copy.data_ack = msk->ack_seq; + opts->ext_copy.ack64 = 1; + } else { + ack_size = TCPOLEN_MPTCP_DSS_ACK32; + opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); + opts->ext_copy.ack64 = 0; + } + opts->ext_copy.use_ack = 1; /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -524,10 +533,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, dss_size += ack_size; - opts->ext_copy.data_ack = msk->ack_seq; - opts->ext_copy.ack64 = 1; - opts->ext_copy.use_ack = 1; - *size = ALIGN(dss_size, 4); return true; } @@ -986,8 +991,13 @@ mp_capable_done: u8 flags = 0; if (mpext->use_ack) { - len += TCPOLEN_MPTCP_DSS_ACK64; - flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; + flags = MPTCP_DSS_HAS_ACK; + if (mpext->ack64) { + len += TCPOLEN_MPTCP_DSS_ACK64; + flags |= MPTCP_DSS_ACK64; + } else { + len += TCPOLEN_MPTCP_DSS_ACK32; + } } if (mpext->use_map) { @@ -1004,8 +1014,13 @@ mp_capable_done: *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); if (mpext->use_ack) { - put_unaligned_be64(mpext->data_ack, ptr); - ptr += 2; + if (mpext->ack64) { + put_unaligned_be64(mpext->data_ack, ptr); + ptr += 2; + } else { + put_unaligned_be32(mpext->data_ack32, ptr); + ptr += 1; + } } if (mpext->use_map) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1f52a0fa31ed..ba9d3d5c625f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -367,8 +367,10 @@ static void mptcp_stop_timer(struct sock *sk) static bool mptcp_ext_cache_refill(struct mptcp_sock *msk) { + const struct sock *sk = (const struct sock *)msk; + if (!msk->cached_ext) - msk->cached_ext = __skb_ext_alloc(); + msk->cached_ext = __skb_ext_alloc(sk->sk_allocation); return !!msk->cached_ext; } @@ -510,20 +512,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, * fooled into a warning if we don't init here */ pfrag = sk_page_frag(sk); - while ((!retransmission && !mptcp_page_frag_refill(ssk, pfrag)) || - !mptcp_ext_cache_refill(msk)) { - ret = sk_stream_wait_memory(ssk, timeo); - if (ret) - return ret; - - /* if sk_stream_wait_memory() sleeps snd_una can change - * significantly, refresh the rtx queue - */ - mptcp_clean_una(sk); - - if (unlikely(__mptcp_needs_tcp_fallback(msk))) - return 0; - } if (!retransmission) { write_seq = &msk->write_seq; page = pfrag->page; @@ -590,7 +578,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, * access the skb after the sendpages call */ ret = do_tcp_sendpages(ssk, page, offset, psize, - msg->msg_flags | MSG_SENDPAGE_NOTLAST); + msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT); if (ret <= 0) return ret; @@ -653,6 +641,15 @@ out: return ret; } +static void mptcp_nospace(struct mptcp_sock *msk, struct socket *sock) +{ + clear_bit(MPTCP_SEND_SPACE, &msk->flags); + smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ + + /* enables sk->write_space() callbacks */ + set_bit(SOCK_NOSPACE, &sock->flags); +} + static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -660,19 +657,17 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) sock_owned_by_me((const struct sock *)msk); + if (!mptcp_ext_cache_refill(msk)) + return NULL; + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (!sk_stream_memory_free(ssk)) { struct socket *sock = ssk->sk_socket; - if (sock) { - clear_bit(MPTCP_SEND_SPACE, &msk->flags); - smp_mb__after_atomic(); - - /* enables sk->write_space() callbacks */ - set_bit(SOCK_NOSPACE, &sock->flags); - } + if (sock) + mptcp_nospace(msk, sock); return NULL; } @@ -698,22 +693,19 @@ static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk) return; sock = READ_ONCE(ssk->sk_socket); - - if (sock) { - clear_bit(MPTCP_SEND_SPACE, &msk->flags); - smp_mb__after_atomic(); - /* set NOSPACE only after clearing SEND_SPACE flag */ - set_bit(SOCK_NOSPACE, &sock->flags); - } + if (sock) + mptcp_nospace(msk, sock); } static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { int mss_now = 0, size_goal = 0, ret = 0; struct mptcp_sock *msk = mptcp_sk(sk); + struct page_frag *pfrag; struct socket *ssock; size_t copied = 0; struct sock *ssk; + bool tx_ok; long timeo; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) @@ -738,11 +730,29 @@ fallback: return ret >= 0 ? ret + copied : (copied ? copied : ret); } + pfrag = sk_page_frag(sk); +restart: mptcp_clean_una(sk); +wait_for_sndbuf: __mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); - while (!sk_stream_memory_free(sk) || !ssk) { + while (!sk_stream_memory_free(sk) || + !ssk || + !mptcp_page_frag_refill(ssk, pfrag)) { + if (ssk) { + /* make sure retransmit timer is + * running before we wait for memory. + * + * The retransmit timer might be needed + * to make the peer send an up-to-date + * MPTCP Ack. + */ + mptcp_set_timeout(sk, ssk); + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + } + ret = sk_stream_wait_memory(sk, &timeo); if (ret) goto out; @@ -759,11 +769,18 @@ fallback: pr_debug("conn_list->subflow=%p", ssk); lock_sock(ssk); - while (msg_data_left(msg)) { + tx_ok = msg_data_left(msg); + while (tx_ok) { ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &timeo, &mss_now, &size_goal); - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN && timeo > 0) { + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + goto restart; + } break; + } if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) { /* Can happen for passive sockets: * 3WHS negotiated MPTCP, but first packet after is @@ -777,6 +794,50 @@ fallback: } copied += ret; + + tx_ok = msg_data_left(msg); + if (!tx_ok) + break; + + if (!sk_stream_memory_free(ssk) || + !mptcp_page_frag_refill(ssk, pfrag) || + !mptcp_ext_cache_refill(msk)) { + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + tcp_push(ssk, msg->msg_flags, mss_now, + tcp_sk(ssk)->nonagle, size_goal); + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + goto restart; + } + + /* memory is charged to mptcp level socket as well, i.e. + * if msg is very large, mptcp socket may run out of buffer + * space. mptcp_clean_una() will release data that has + * been acked at mptcp level in the mean time, so there is + * a good chance we can continue sending data right away. + * + * Normally, when the tcp subflow can accept more data, then + * so can the MPTCP socket. However, we need to cope with + * peers that might lag behind in their MPTCP-level + * acknowledgements, i.e. data might have been acked at + * tcp level only. So, we must also check the MPTCP socket + * limits before we send more data. + */ + if (unlikely(!sk_stream_memory_free(sk))) { + tcp_push(ssk, msg->msg_flags, mss_now, + tcp_sk(ssk)->nonagle, size_goal); + mptcp_clean_una(sk); + if (!sk_stream_memory_free(sk)) { + /* can't send more for now, need to wait for + * MPTCP-level ACKs from peer. + * + * Wakeup will happen via mptcp_clean_una(). + */ + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + goto wait_for_sndbuf; + } + } } mptcp_set_timeout(sk, ssk); @@ -1094,7 +1155,7 @@ static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; - int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0; + int orig_len, orig_offset, mss_now = 0, size_goal = 0; struct mptcp_data_frag *dfrag; u64 orig_write_seq; size_t copied = 0; @@ -1116,6 +1177,9 @@ static void mptcp_worker(struct work_struct *work) if (!dfrag) goto unlock; + if (!mptcp_ext_cache_refill(msk)) + goto reset_unlock; + ssk = mptcp_subflow_get_retrans(msk); if (!ssk) goto reset_unlock; @@ -1127,8 +1191,8 @@ static void mptcp_worker(struct work_struct *work) orig_offset = dfrag->offset; orig_write_seq = dfrag->data_seq; while (dfrag->data_len > 0) { - ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, &mss_now, - &size_goal); + int ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, + &mss_now, &size_goal); if (ret < 0) break; @@ -1136,6 +1200,9 @@ static void mptcp_worker(struct work_struct *work) copied += ret; dfrag->data_len -= ret; dfrag->offset += ret; + + if (!mptcp_ext_cache_refill(msk)) + break; } if (copied) tcp_push(ssk, msg.msg_flags, mss_now, tcp_sk(ssk)->nonagle, @@ -2001,6 +2068,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e4ca6320ce76..f5adca93e8fb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -290,6 +290,7 @@ struct mptcp_subflow_context { data_avail : 1, rx_eof : 1, data_fin_tx_enable : 1, + use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ can_ack : 1; /* only after processing the remote a key */ u64 data_fin_tx_seq; u32 remote_nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 53c75b0e5dce..0020d356233d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -667,9 +667,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk) if (!mpext->dsn64) { map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, mpext->data_seq); + subflow->use_64bit_ack = 0; pr_debug("expanded seq=%llu", subflow->map_seq); } else { map_seq = mpext->data_seq; + subflow->use_64bit_ack = 1; } if (subflow->map_valid) { diff --git a/net/psample/psample.c b/net/psample/psample.c index 6f2fbc6b9eb2..34a74043840b 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -14,6 +14,8 @@ #include <net/genetlink.h> #include <net/psample.h> #include <linux/spinlock.h> +#include <net/ip_tunnels.h> +#include <net/dst_metadata.h> #define PSAMPLE_MAX_PACKET_SIZE 0xffff @@ -207,10 +209,155 @@ void psample_group_put(struct psample_group *group) } EXPORT_SYMBOL_GPL(psample_group_put); +static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + unsigned short tun_proto = ip_tunnel_info_af(tun_info); + const void *tun_opts = ip_tunnel_info_opts(tun_info); + const struct ip_tunnel_key *tun_key = &tun_info->key; + int tun_opts_len = tun_info->options_len; + + if (tun_key->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id, + PSAMPLE_TUNNEL_KEY_ATTR_PAD)) + return -EMSGSIZE; + + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)) + return -EMSGSIZE; + + switch (tun_proto) { + case AF_INET: + if (tun_key->u.ipv4.src && + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, + tun_key->u.ipv4.src)) + return -EMSGSIZE; + if (tun_key->u.ipv4.dst && + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, + tun_key->u.ipv4.dst)) + return -EMSGSIZE; + break; + case AF_INET6: + if (!ipv6_addr_any(&tun_key->u.ipv6.src) && + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, + &tun_key->u.ipv6.src)) + return -EMSGSIZE; + if (!ipv6_addr_any(&tun_key->u.ipv6.dst) && + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, + &tun_key->u.ipv6.dst)) + return -EMSGSIZE; + break; + } + if (tun_key->tos && + nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + if (tun_key->tp_src && + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src)) + return -EMSGSIZE; + if (tun_key->tp_dst && + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_OAM) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM)) + return -EMSGSIZE; + if (tun_opts_len) { + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, + tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT && + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + tun_opts_len, tun_opts)) + return -EMSGSIZE; + } + + return 0; +} + +static int psample_ip_tun_to_nlattr(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + struct nlattr *nla; + int err; + + nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + err = __psample_ip_tun_to_nlattr(skb, tun_info); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } + + nla_nest_end(skb, nla); + + return 0; +} + +static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) +{ + unsigned short tun_proto = ip_tunnel_info_af(tun_info); + const struct ip_tunnel_key *tun_key = &tun_info->key; + int tun_opts_len = tun_info->options_len; + int sum = 0; + + if (tun_key->tun_flags & TUNNEL_KEY) + sum += nla_total_size(sizeof(u64)); + + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) + sum += nla_total_size(0); + + switch (tun_proto) { + case AF_INET: + if (tun_key->u.ipv4.src) + sum += nla_total_size(sizeof(u32)); + if (tun_key->u.ipv4.dst) + sum += nla_total_size(sizeof(u32)); + break; + case AF_INET6: + if (!ipv6_addr_any(&tun_key->u.ipv6.src)) + sum += nla_total_size(sizeof(struct in6_addr)); + if (!ipv6_addr_any(&tun_key->u.ipv6.dst)) + sum += nla_total_size(sizeof(struct in6_addr)); + break; + } + if (tun_key->tos) + sum += nla_total_size(sizeof(u8)); + sum += nla_total_size(sizeof(u8)); /* TTL */ + if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) + sum += nla_total_size(0); + if (tun_key->tun_flags & TUNNEL_CSUM) + sum += nla_total_size(0); + if (tun_key->tp_src) + sum += nla_total_size(sizeof(u16)); + if (tun_key->tp_dst) + sum += nla_total_size(sizeof(u16)); + if (tun_key->tun_flags & TUNNEL_OAM) + sum += nla_total_size(0); + if (tun_opts_len) { + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) + sum += nla_total_size(tun_opts_len); + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT) + sum += nla_total_size(tun_opts_len); + } + + return sum; +} + void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, u32 trunc_size, int in_ifindex, int out_ifindex, u32 sample_rate) { + struct ip_tunnel_info *tun_info; struct sk_buff *nl_skb; int data_len; int meta_len; @@ -224,6 +371,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, nla_total_size(sizeof(u32)) + /* group_num */ nla_total_size(sizeof(u32)); /* seq */ + tun_info = skb_tunnel_info(skb); + if (tun_info) + meta_len += psample_tunnel_meta_len(tun_info); + data_len = min(skb->len, trunc_size); if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE) data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN @@ -278,6 +429,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, goto error; } + if (tun_info) { + ret = psample_ip_tun_to_nlattr(nl_skb, tun_info); + if (unlikely(ret < 0)) + goto error; + } + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); diff --git a/net/rds/info.c b/net/rds/info.c index 03f6fd56d237..b6b46a8214a0 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -162,7 +162,6 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, struct rds_info_lengths lens; unsigned long nr_pages = 0; unsigned long start; - unsigned long i; rds_info_func func; struct page **pages = NULL; int ret; @@ -193,7 +192,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, ret = -ENOMEM; goto out; } - ret = get_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); + ret = pin_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); if (ret != nr_pages) { if (ret > 0) nr_pages = ret; @@ -235,8 +234,8 @@ call_func: ret = -EFAULT; out: - for (i = 0; pages && i < nr_pages; i++) - put_page(pages[i]); + if (pages) + unpin_user_pages(pages, nr_pages); kfree(pages); return ret; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c87af430107a..ccfa0ab3e7f4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1032,6 +1032,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/socket.c b/net/socket.c index 1c9a7260a41d..80422fc3c836 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3366,94 +3366,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return err; } -struct rtentry32 { - u32 rt_pad1; - struct sockaddr rt_dst; /* target address */ - struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ - struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ - /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ - unsigned short rt_irtt; /* Initial RTT */ -}; - -struct in6_rtmsg32 { - struct in6_addr rtmsg_dst; - struct in6_addr rtmsg_src; - struct in6_addr rtmsg_gateway; - u32 rtmsg_type; - u16 rtmsg_dst_len; - u16 rtmsg_src_len; - u32 rtmsg_metric; - u32 rtmsg_info; - u32 rtmsg_flags; - s32 rtmsg_ifindex; -}; - -static int routing_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, void __user *argp) -{ - int ret; - void *r = NULL; - struct in6_rtmsg r6; - struct rtentry r4; - char devname[16]; - u32 rtdev; - mm_segment_t old_fs = get_fs(); - - if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ - struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), - 3 * sizeof(struct in6_addr)); - ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); - - r = (void *) &r6; - } else { /* ipv4 */ - struct rtentry32 __user *ur4 = argp; - ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), - 3 * sizeof(struct sockaddr)); - ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); - ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); - ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); - ret |= get_user(r4.rt_window, &(ur4->rt_window)); - ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); - ret |= get_user(rtdev, &(ur4->rt_dev)); - if (rtdev) { - ret |= copy_from_user(devname, compat_ptr(rtdev), 15); - r4.rt_dev = (char __user __force *)devname; - devname[15] = 0; - } else - r4.rt_dev = NULL; - - r = (void *) &r4; - } - - if (ret) { - ret = -EFAULT; - goto out; - } - - set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs(old_fs); - -out: - return ret; -} - /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that * use compatible ioctls @@ -3492,9 +3404,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFMAP: case SIOCSIFMAP: return compat_sioc_ifmap(net, cmd, argp); - case SIOCADDRT: - case SIOCDELRT: - return routing_ioctl(net, sock, cmd, argp); case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: if (!sock->ops->gettstamp) |