forked from mirrors/nixpkgs
ad7b1e24c2
This pulls in updated Fan Networking patches from Ubuntu. (https://wiki.ubuntu.com/FanNetworking) closes #14328
1241 lines
38 KiB
Diff
1241 lines
38 KiB
Diff
From e64058be3b97c5bd3e034fc4ece21e306ef6f90b Mon Sep 17 00:00:00 2001
|
|
From: Jay Vosburgh <jay.vosburgh@canonical.com>
|
|
Date: Wed, 1 Apr 2015 16:11:09 -0700
|
|
Subject: [PATCH] UBUNTU: SAUCE: fan: tunnel multiple mapping mode (v3)
|
|
|
|
Switch to a single tunnel for all mappings, this removes the limitations
|
|
on how many mappings each tunnel can handle, and therefore how many Fan
|
|
slices each local address may hold.
|
|
|
|
NOTE: This introduces a new kernel netlink interface which needs updated
|
|
iproute2 support.
|
|
|
|
BugLink: http://bugs.launchpad.net/bugs/1470091
|
|
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
|
|
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
|
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
|
|
|
|
Conflicts:
|
|
include/net/ip_tunnels.h
|
|
---
|
|
include/net/ip_tunnels.h | 15 ++++
|
|
include/uapi/linux/if_tunnel.h | 20 +++++
|
|
net/ipv4/ip_tunnel.c | 7 +-
|
|
net/ipv4/ipip.c | 186 +++++++++++++++++++++++++++++++++++++++--
|
|
4 files changed, 222 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
|
|
index 62a750a..47fec59 100644
|
|
--- a/include/net/ip_tunnels.h
|
|
+++ b/include/net/ip_tunnels.h
|
|
@@ -91,6 +91,19 @@ struct ip_tunnel_dst {
|
|
};
|
|
|
|
struct metadata_dst;
|
|
+/* A fan overlay /8 (250.0.0.0/8, for example) maps to exactly one /16
|
|
+ * underlay (10.88.0.0/16, for example). Multiple local addresses within
|
|
+ * the /16 may be used, but a particular overlay may not span
|
|
+ * multiple underlay subnets.
|
|
+ *
|
|
+ * We store one underlay, indexed by the overlay's high order octet.
|
|
+ */
|
|
+#define FAN_OVERLAY_CNT 256
|
|
+
|
|
+struct ip_tunnel_fan {
|
|
+/* u32 __rcu *map;*/
|
|
+ u32 map[FAN_OVERLAY_CNT];
|
|
+};
|
|
|
|
struct ip_tunnel {
|
|
struct ip_tunnel __rcu *next;
|
|
@@ -123,6 +136,7 @@ struct ip_tunnel {
|
|
#endif
|
|
struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
|
|
unsigned int prl_count; /* # of entries in PRL */
|
|
+ struct ip_tunnel_fan fan;
|
|
int ip_tnl_net_id;
|
|
struct gro_cells gro_cells;
|
|
bool collect_md;
|
|
@@ -143,6 +157,7 @@ struct ip_tunnel {
|
|
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
|
|
|
|
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
|
|
+#define TUNNEL_FAN __cpu_to_be16(0x4000)
|
|
|
|
struct tnl_ptk_info {
|
|
__be16 flags;
|
|
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
|
|
index af4de90..85a3e4b 100644
|
|
--- a/include/uapi/linux/if_tunnel.h
|
|
+++ b/include/uapi/linux/if_tunnel.h
|
|
@@ -57,6 +57,10 @@ enum {
|
|
IFLA_IPTUN_ENCAP_FLAGS,
|
|
IFLA_IPTUN_ENCAP_SPORT,
|
|
IFLA_IPTUN_ENCAP_DPORT,
|
|
+
|
|
+ __IFLA_IPTUN_VENDOR_BREAK, /* Ensure new entries do not hit the below. */
|
|
+ IFLA_IPTUN_FAN_MAP = 33,
|
|
+
|
|
__IFLA_IPTUN_MAX,
|
|
};
|
|
#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
|
|
@@ -132,4 +136,20 @@ enum {
|
|
};
|
|
|
|
#define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1)
|
|
+
|
|
+enum {
|
|
+ IFLA_FAN_UNSPEC,
|
|
+ IFLA_FAN_MAPPING,
|
|
+ __IFLA_FAN_MAX,
|
|
+};
|
|
+
|
|
+#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1)
|
|
+
|
|
+struct ip_tunnel_fan_map {
|
|
+ __be32 underlay;
|
|
+ __be32 overlay;
|
|
+ __u16 underlay_prefix;
|
|
+ __u16 overlay_prefix;
|
|
+};
|
|
+
|
|
#endif /* _UAPI_IF_TUNNEL_H_ */
|
|
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
|
|
index cbb51f3..7a6174b 100644
|
|
--- a/net/ipv4/ip_tunnel.c
|
|
+++ b/net/ipv4/ip_tunnel.c
|
|
@@ -1110,6 +1110,11 @@ out:
|
|
}
|
|
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
|
|
|
|
+static int ip_tunnel_is_fan(struct ip_tunnel *tunnel)
|
|
+{
|
|
+ return tunnel->parms.i_flags & TUNNEL_FAN;
|
|
+}
|
|
+
|
|
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
|
|
struct ip_tunnel_parm *p)
|
|
{
|
|
@@ -1119,7 +1124,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
|
|
struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
|
|
|
|
if (dev == itn->fb_tunnel_dev)
|
|
- return -EINVAL;
|
|
+ return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL;
|
|
|
|
t = ip_tunnel_find(itn, p, dev->type);
|
|
|
|
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
|
|
index a09fb0d..56e8984 100644
|
|
--- a/net/ipv4/ipip.c
|
|
+++ b/net/ipv4/ipip.c
|
|
@@ -107,6 +107,7 @@
|
|
#include <linux/init.h>
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <linux/if_ether.h>
|
|
+#include <linux/inetdevice.h>
|
|
|
|
#include <net/sock.h>
|
|
#include <net/ip.h>
|
|
@@ -208,6 +209,40 @@ drop:
|
|
return 0;
|
|
}
|
|
|
|
+static int ipip_tunnel_is_fan(struct ip_tunnel *tunnel)
|
|
+{
|
|
+ return tunnel->parms.i_flags & TUNNEL_FAN;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Determine fan tunnel endpoint to send packet to, based on the inner IP
|
|
+ * address. For an overlay (inner) address Y.A.B.C, the transformation is
|
|
+ * F.G.A.B, where "F" and "G" are the first two octets of the underlay
|
|
+ * network (the network portion of a /16), "A" and "B" are the low order
|
|
+ * two octets of the underlay network host (the host portion of a /16),
|
|
+ * and "Y" is a configured first octet of the overlay network.
|
|
+ *
|
|
+ * E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay
|
|
+ * subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to
|
|
+ * 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts
|
|
+ * overlay network 99.6.7.0/24.
|
|
+ */
|
|
+static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
|
|
+{
|
|
+ unsigned int overlay;
|
|
+ u32 daddr, underlay;
|
|
+
|
|
+ daddr = ntohl(ip_hdr(skb)->daddr);
|
|
+ overlay = daddr >> 24;
|
|
+ underlay = tunnel->fan.map[overlay];
|
|
+ if (!underlay)
|
|
+ return -EINVAL;
|
|
+
|
|
+ *iph = tunnel->parms.iph;
|
|
+ iph->daddr = htonl(underlay | ((daddr >> 8) & 0x0000ffff));
|
|
+ return 0;
|
|
+}
|
|
+
|
|
/*
|
|
* This function assumes it is being called from dev_queue_xmit()
|
|
* and that skb is filled properly by that function.
|
|
@@ -215,7 +250,8 @@ drop:
|
|
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
{
|
|
struct ip_tunnel *tunnel = netdev_priv(dev);
|
|
- const struct iphdr *tiph = &tunnel->parms.iph;
|
|
+ const struct iphdr *tiph;
|
|
+ struct iphdr fiph;
|
|
|
|
if (unlikely(skb->protocol != htons(ETH_P_IP)))
|
|
goto tx_error;
|
|
@@ -224,6 +260,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
if (IS_ERR(skb))
|
|
goto out;
|
|
|
|
+ if (ipip_tunnel_is_fan(tunnel)) {
|
|
+ if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
|
|
+ goto tx_error;
|
|
+ tiph = &fiph;
|
|
+ } else {
|
|
+ tiph = &tunnel->parms.iph;
|
|
+ }
|
|
+
|
|
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
|
|
|
|
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
|
|
@@ -375,21 +419,88 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
|
|
return ret;
|
|
}
|
|
|
|
+static void ipip_fan_free_map(struct ip_tunnel *t)
|
|
+{
|
|
+ memset(&t->fan.map, 0, sizeof(t->fan.map));
|
|
+}
|
|
+
|
|
+static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map)
|
|
+{
|
|
+ u32 overlay, overlay_mask, underlay, underlay_mask;
|
|
+
|
|
+ if ((map->underlay_prefix && map->underlay_prefix != 16) ||
|
|
+ (map->overlay_prefix && map->overlay_prefix != 8))
|
|
+ return -EINVAL;
|
|
+
|
|
+ overlay = ntohl(map->overlay);
|
|
+ overlay_mask = ntohl(inet_make_mask(map->overlay_prefix));
|
|
+
|
|
+ underlay = ntohl(map->underlay);
|
|
+ underlay_mask = ntohl(inet_make_mask(map->underlay_prefix));
|
|
+
|
|
+ if ((overlay & ~overlay_mask) || (underlay & ~underlay_mask))
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (!(overlay & overlay_mask) && (underlay & underlay_mask))
|
|
+ return -EINVAL;
|
|
+
|
|
+ t->parms.i_flags |= TUNNEL_FAN;
|
|
+
|
|
+ /* Special case: overlay 0 and underlay 0 clears all mappings */
|
|
+ if (!overlay && !underlay) {
|
|
+ ipip_fan_free_map(t);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ overlay >>= (32 - map->overlay_prefix);
|
|
+ t->fan.map[overlay] = underlay;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
|
|
+ struct ip_tunnel_parm *parms)
|
|
+{
|
|
+ struct ip_tunnel_fan_map *map;
|
|
+ struct nlattr *attr;
|
|
+ int rem, rv;
|
|
+
|
|
+ if (!data[IFLA_IPTUN_FAN_MAP])
|
|
+ return 0;
|
|
+
|
|
+ if (parms->iph.daddr)
|
|
+ return -EINVAL;
|
|
+
|
|
+ nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
|
|
+ map = nla_data(attr);
|
|
+ rv = ipip_fan_set_map(t, map);
|
|
+ if (rv)
|
|
+ return rv;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int ipip_newlink(struct net *src_net, struct net_device *dev,
|
|
struct nlattr *tb[], struct nlattr *data[])
|
|
{
|
|
struct ip_tunnel_parm p;
|
|
struct ip_tunnel_encap ipencap;
|
|
+ struct ip_tunnel *t = netdev_priv(dev);
|
|
+ int err;
|
|
|
|
if (ipip_netlink_encap_parms(data, &ipencap)) {
|
|
- struct ip_tunnel *t = netdev_priv(dev);
|
|
- int err = ip_tunnel_encap_setup(t, &ipencap);
|
|
+ err = ip_tunnel_encap_setup(t, &ipencap);
|
|
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
|
|
ipip_netlink_parms(data, &p);
|
|
+ err = ipip_netlink_fan(data, t, &p);
|
|
+ if (err < 0)
|
|
+ return err;
|
|
return ip_tunnel_newlink(dev, tb, &p);
|
|
}
|
|
|
|
@@ -398,16 +509,20 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
|
|
{
|
|
struct ip_tunnel_parm p;
|
|
struct ip_tunnel_encap ipencap;
|
|
+ struct ip_tunnel *t = netdev_priv(dev);
|
|
+ int err;
|
|
|
|
if (ipip_netlink_encap_parms(data, &ipencap)) {
|
|
- struct ip_tunnel *t = netdev_priv(dev);
|
|
- int err = ip_tunnel_encap_setup(t, &ipencap);
|
|
+ err = ip_tunnel_encap_setup(t, &ipencap);
|
|
|
|
if (err < 0)
|
|
return err;
|
|
}
|
|
|
|
ipip_netlink_parms(data, &p);
|
|
+ err = ipip_netlink_fan(data, t, &p);
|
|
+ if (err < 0)
|
|
+ return err;
|
|
|
|
if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
|
|
(!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
|
|
@@ -439,6 +554,8 @@ static size_t ipip_get_size(const struct net_device *dev)
|
|
nla_total_size(2) +
|
|
/* IFLA_IPTUN_ENCAP_DPORT */
|
|
nla_total_size(2) +
|
|
+ /* IFLA_IPTUN_FAN_MAP */
|
|
+ nla_total_size(sizeof(struct ip_tunnel_fan_map)) * 256 +
|
|
0;
|
|
}
|
|
|
|
@@ -466,6 +583,29 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
|
|
tunnel->encap.flags))
|
|
goto nla_put_failure;
|
|
|
|
+ if (tunnel->parms.i_flags & TUNNEL_FAN) {
|
|
+ struct nlattr *fan_nest;
|
|
+ int i;
|
|
+
|
|
+ fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
|
|
+ if (!fan_nest)
|
|
+ goto nla_put_failure;
|
|
+ for (i = 0; i < 256; i++) {
|
|
+ if (tunnel->fan.map[i]) {
|
|
+ struct ip_tunnel_fan_map map;
|
|
+
|
|
+ map.underlay = htonl(tunnel->fan.map[i]);
|
|
+ map.underlay_prefix = 16;
|
|
+ map.overlay = htonl(i << 24);
|
|
+ map.overlay_prefix = 8;
|
|
+ if (nla_put(skb, IFLA_FAN_MAPPING,
|
|
+ sizeof(map), &map))
|
|
+ goto nla_put_failure;
|
|
+ }
|
|
+ }
|
|
+ nla_nest_end(skb, fan_nest);
|
|
+ }
|
|
+
|
|
return 0;
|
|
|
|
nla_put_failure:
|
|
@@ -483,6 +623,9 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
|
|
[IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
|
|
[IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
|
|
[IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
|
|
+
|
|
+ [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
|
|
+ [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
|
|
};
|
|
|
|
static struct rtnl_link_ops ipip_link_ops __read_mostly = {
|
|
@@ -523,6 +666,23 @@ static struct pernet_operations ipip_net_ops = {
|
|
.size = sizeof(struct ip_tunnel_net),
|
|
};
|
|
|
|
+#ifdef CONFIG_SYSCTL
|
|
+static struct ctl_table_header *ipip_fan_header;
|
|
+static unsigned int ipip_fan_version = 3;
|
|
+
|
|
+static struct ctl_table ipip_fan_sysctls[] = {
|
|
+ {
|
|
+ .procname = "version",
|
|
+ .data = &ipip_fan_version,
|
|
+ .maxlen = sizeof(ipip_fan_version),
|
|
+ .mode = 0444,
|
|
+ .proc_handler = proc_dointvec,
|
|
+ },
|
|
+ {},
|
|
+};
|
|
+
|
|
+#endif /* CONFIG_SYSCTL */
|
|
+
|
|
static int __init ipip_init(void)
|
|
{
|
|
int err;
|
|
@@ -541,9 +701,22 @@ static int __init ipip_init(void)
|
|
if (err < 0)
|
|
goto rtnl_link_failed;
|
|
|
|
+#ifdef CONFIG_SYSCTL
|
|
+ ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
|
|
+ ipip_fan_sysctls);
|
|
+ if (!ipip_fan_header) {
|
|
+ err = -ENOMEM;
|
|
+ goto sysctl_failed;
|
|
+ }
|
|
+#endif /* CONFIG_SYSCTL */
|
|
+
|
|
out:
|
|
return err;
|
|
|
|
+#ifdef CONFIG_SYSCTL
|
|
+sysctl_failed:
|
|
+ rtnl_link_unregister(&ipip_link_ops);
|
|
+#endif /* CONFIG_SYSCTL */
|
|
rtnl_link_failed:
|
|
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
|
|
xfrm_tunnel_failed:
|
|
@@ -553,6 +726,9 @@ xfrm_tunnel_failed:
|
|
|
|
static void __exit ipip_fini(void)
|
|
{
|
|
+#ifdef CONFIG_SYSCTL
|
|
+ unregister_net_sysctl_table(ipip_fan_header);
|
|
+#endif /* CONFIG_SYSCTL */
|
|
rtnl_link_unregister(&ipip_link_ops);
|
|
if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
|
|
pr_info("%s: can't deregister tunnel\n", __func__);
|
|
--
|
|
2.7.4
|
|
|
|
From 14aba409d044e3a314c09c650e1c42de699700b8 Mon Sep 17 00:00:00 2001
|
|
From: Jay Vosburgh <jay.vosburgh@canonical.com>
|
|
Date: Wed, 11 Nov 2015 13:04:50 +0000
|
|
Subject: [PATCH] UBUNTU: SAUCE: fan: add VXLAN implementation
|
|
|
|
Generify the fan mapping support and utilise that to implement fan
|
|
mappings over vxlan transport.
|
|
|
|
Expose the existance of this functionality (when the module is loaded)
|
|
via an additional sysctl marker.
|
|
|
|
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
|
|
[apw@canonical.com: added feature marker for fan over vxlan.]
|
|
Signed-off-by: Andy Whitcroft <apw@canonical.com>
|
|
---
|
|
drivers/net/vxlan.c | 245 +++++++++++++++++++++++++++++++++++++++++
|
|
include/net/ip_tunnels.h | 19 +++-
|
|
include/net/vxlan.h | 2 +
|
|
include/uapi/linux/if_link.h | 1 +
|
|
include/uapi/linux/if_tunnel.h | 2 +-
|
|
net/ipv4/ip_tunnel.c | 7 +-
|
|
net/ipv4/ipip.c | 242 +++++++++++++++++++++++++++++++---------
|
|
7 files changed, 453 insertions(+), 65 deletions(-)
|
|
|
|
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
|
|
index 405a7b6..a17cfd0 100644
|
|
--- a/drivers/net/vxlan.c
|
|
+++ b/drivers/net/vxlan.c
|
|
@@ -23,6 +23,7 @@
|
|
#include <linux/udp.h>
|
|
#include <linux/igmp.h>
|
|
#include <linux/etherdevice.h>
|
|
+#include <linux/inetdevice.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <linux/hash.h>
|
|
@@ -106,6 +107,167 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
|
|
ip_tunnel_collect_metadata();
|
|
}
|
|
|
|
+static struct ip_fan_map *vxlan_fan_find_map(struct vxlan_dev *vxlan, __be32 daddr)
|
|
+{
|
|
+ struct ip_fan_map *fan_map;
|
|
+
|
|
+ rcu_read_lock();
|
|
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
|
|
+ if (fan_map->overlay ==
|
|
+ (daddr & inet_make_mask(fan_map->overlay_prefix))) {
|
|
+ rcu_read_unlock();
|
|
+ return fan_map;
|
|
+ }
|
|
+ }
|
|
+ rcu_read_unlock();
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static void vxlan_fan_flush_map(struct vxlan_dev *vxlan)
|
|
+{
|
|
+ struct ip_fan_map *fan_map;
|
|
+
|
|
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
|
|
+ list_del_rcu(&fan_map->list);
|
|
+ kfree_rcu(fan_map, rcu);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int vxlan_fan_del_map(struct vxlan_dev *vxlan, __be32 overlay)
|
|
+{
|
|
+ struct ip_fan_map *fan_map;
|
|
+
|
|
+ fan_map = vxlan_fan_find_map(vxlan, overlay);
|
|
+ if (!fan_map)
|
|
+ return -ENOENT;
|
|
+
|
|
+ list_del_rcu(&fan_map->list);
|
|
+ kfree_rcu(fan_map, rcu);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int vxlan_fan_add_map(struct vxlan_dev *vxlan, struct ifla_fan_map *map)
|
|
+{
|
|
+ __be32 overlay_mask, underlay_mask;
|
|
+ struct ip_fan_map *fan_map;
|
|
+
|
|
+ overlay_mask = inet_make_mask(map->overlay_prefix);
|
|
+ underlay_mask = inet_make_mask(map->underlay_prefix);
|
|
+
|
|
+ netdev_dbg(vxlan->dev, "vfam: map: o %x/%d u %x/%d om %x um %x\n",
|
|
+ map->overlay, map->overlay_prefix,
|
|
+ map->underlay, map->underlay_prefix,
|
|
+ overlay_mask, underlay_mask);
|
|
+
|
|
+ if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
|
|
+ return -EINVAL;
|
|
+
|
|
+ /* Special case: overlay 0 and underlay 0: flush all mappings */
|
|
+ if (!map->overlay && !map->underlay) {
|
|
+ vxlan_fan_flush_map(vxlan);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* Special case: overlay set and underlay 0: clear map for overlay */
|
|
+ if (!map->underlay)
|
|
+ return vxlan_fan_del_map(vxlan, map->overlay);
|
|
+
|
|
+ if (vxlan_fan_find_map(vxlan, map->overlay))
|
|
+ return -EEXIST;
|
|
+
|
|
+ fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
|
|
+ fan_map->underlay = map->underlay;
|
|
+ fan_map->overlay = map->overlay;
|
|
+ fan_map->underlay_prefix = map->underlay_prefix;
|
|
+ fan_map->overlay_mask = ntohl(overlay_mask);
|
|
+ fan_map->overlay_prefix = map->overlay_prefix;
|
|
+
|
|
+ list_add_tail_rcu(&fan_map->list, &vxlan->fan.fan_maps);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int vxlan_parse_fan_map(struct nlattr *data[], struct vxlan_dev *vxlan)
|
|
+{
|
|
+ struct ifla_fan_map *map;
|
|
+ struct nlattr *attr;
|
|
+ int rem, rv;
|
|
+
|
|
+ nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
|
|
+ map = nla_data(attr);
|
|
+ rv = vxlan_fan_add_map(vxlan, map);
|
|
+ if (rv)
|
|
+ return rv;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int vxlan_fan_build_rdst(struct vxlan_dev *vxlan, struct sk_buff *skb,
|
|
+ struct vxlan_rdst *fan_rdst)
|
|
+{
|
|
+ struct ip_fan_map *f_map;
|
|
+ union vxlan_addr *va;
|
|
+ u32 daddr, underlay;
|
|
+ struct arphdr *arp;
|
|
+ void *arp_ptr;
|
|
+ struct ethhdr *eth;
|
|
+ struct iphdr *iph;
|
|
+
|
|
+ eth = eth_hdr(skb);
|
|
+ switch (eth->h_proto) {
|
|
+ case htons(ETH_P_IP):
|
|
+ iph = ip_hdr(skb);
|
|
+ if (!iph)
|
|
+ return -EINVAL;
|
|
+ daddr = iph->daddr;
|
|
+ break;
|
|
+ case htons(ETH_P_ARP):
|
|
+ arp = arp_hdr(skb);
|
|
+ if (!arp)
|
|
+ return -EINVAL;
|
|
+ arp_ptr = arp + 1;
|
|
+ netdev_dbg(vxlan->dev,
|
|
+ "vfbr: arp sha %pM sip %pI4 tha %pM tip %pI4\n",
|
|
+ arp_ptr, arp_ptr + skb->dev->addr_len,
|
|
+ arp_ptr + skb->dev->addr_len + 4,
|
|
+ arp_ptr + (skb->dev->addr_len * 2) + 4);
|
|
+ arp_ptr += (skb->dev->addr_len * 2) + 4;
|
|
+ memcpy(&daddr, arp_ptr, 4);
|
|
+ break;
|
|
+ default:
|
|
+ netdev_dbg(vxlan->dev, "vfbr: unknown eth p %x\n", eth->h_proto);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ f_map = vxlan_fan_find_map(vxlan, daddr);
|
|
+ if (!f_map)
|
|
+ return -EINVAL;
|
|
+
|
|
+ daddr = ntohl(daddr);
|
|
+ underlay = ntohl(f_map->underlay);
|
|
+ if (!underlay)
|
|
+ return -EINVAL;
|
|
+
|
|
+ memset(fan_rdst, 0, sizeof(*fan_rdst));
|
|
+ va = &fan_rdst->remote_ip;
|
|
+ va->sa.sa_family = AF_INET;
|
|
+ fan_rdst->remote_vni = vxlan->default_dst.remote_vni;
|
|
+ va->sin.sin_addr.s_addr = htonl(underlay |
|
|
+ ((daddr & ~f_map->overlay_mask) >>
|
|
+ (32 - f_map->overlay_prefix -
|
|
+ (32 - f_map->underlay_prefix))));
|
|
+ netdev_dbg(vxlan->dev, "vfbr: daddr %x ul %x dst %x\n",
|
|
+ daddr, underlay, va->sin.sin_addr.s_addr);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
static inline
|
|
bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
|
|
@@ -2029,6 +2191,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
goto rt_tx_error;
|
|
}
|
|
|
|
+ if (fan_has_map(&vxlan->fan) && rt->rt_flags & RTCF_LOCAL) {
|
|
+ netdev_dbg(dev, "discard fan to localhost %pI4\n",
|
|
+ &dst->sin.sin_addr.s_addr);
|
|
+ ip_rt_put(rt);
|
|
+ goto tx_free;
|
|
+ }
|
|
+
|
|
/* Bypass encapsulation if the destination is local */
|
|
if (rt->rt_flags & RTCF_LOCAL &&
|
|
!(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
|
|
@@ -2169,6 +2338,20 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
return NETDEV_TX_OK;
|
|
}
|
|
|
|
+ if (fan_has_map(&vxlan->fan)) {
|
|
+ struct vxlan_rdst fan_rdst;
|
|
+
|
|
+ netdev_dbg(vxlan->dev, "vxlan_xmit p %x d %pM\n",
|
|
+ eth->h_proto, eth->h_dest);
|
|
+ if (vxlan_fan_build_rdst(vxlan, skb, &fan_rdst)) {
|
|
+ dev->stats.tx_dropped++;
|
|
+ kfree_skb(skb);
|
|
+ return NETDEV_TX_OK;
|
|
+ }
|
|
+ vxlan_xmit_one(skb, dev, &fan_rdst, 0);
|
|
+ return NETDEV_TX_OK;
|
|
+ }
|
|
+
|
|
f = vxlan_find_mac(vxlan, eth->h_dest);
|
|
did_rsc = false;
|
|
|
|
@@ -2532,6 +2715,8 @@ static void vxlan_setup(struct net_device *dev)
|
|
|
|
for (h = 0; h < FDB_HASH_SIZE; ++h)
|
|
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
|
|
+
|
|
+ INIT_LIST_HEAD(&vxlan->fan.fan_maps);
|
|
}
|
|
|
|
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
|
|
@@ -2881,6 +3066,7 @@ EXPORT_SYMBOL_GPL(vxlan_dev_create);
|
|
static int vxlan_newlink(struct net *src_net, struct net_device *dev,
|
|
struct nlattr *tb[], struct nlattr *data[])
|
|
{
|
|
+ struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
struct vxlan_config conf;
|
|
int err;
|
|
|
|
@@ -2899,6 +3085,12 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
|
|
conf.remote_ip.sa.sa_family = AF_INET6;
|
|
}
|
|
|
|
+ if (data[IFLA_VXLAN_FAN_MAP]) {
|
|
+ err = vxlan_parse_fan_map(data, vxlan);
|
|
+ if (err)
|
|
+ return err;
|
|
+ }
|
|
+
|
|
if (data[IFLA_VXLAN_LOCAL]) {
|
|
conf.saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
|
|
conf.saddr.sa.sa_family = AF_INET;
|
|
@@ -3037,6 +3229,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
|
|
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
|
|
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
|
|
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
|
|
+ nla_total_size(sizeof(struct ip_fan_map) * 256) +
|
|
0;
|
|
}
|
|
|
|
@@ -3083,6 +3276,26 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
|
|
}
|
|
}
|
|
|
|
+ if (fan_has_map(&vxlan->fan)) {
|
|
+ struct nlattr *fan_nest;
|
|
+ struct ip_fan_map *fan_map;
|
|
+
|
|
+ fan_nest = nla_nest_start(skb, IFLA_VXLAN_FAN_MAP);
|
|
+ if (!fan_nest)
|
|
+ goto nla_put_failure;
|
|
+ list_for_each_entry_rcu(fan_map, &vxlan->fan.fan_maps, list) {
|
|
+ struct ifla_fan_map map;
|
|
+
|
|
+ map.underlay = fan_map->underlay;
|
|
+ map.underlay_prefix = fan_map->underlay_prefix;
|
|
+ map.overlay = fan_map->overlay;
|
|
+ map.overlay_prefix = fan_map->overlay_prefix;
|
|
+ if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
|
|
+ goto nla_put_failure;
|
|
+ }
|
|
+ nla_nest_end(skb, fan_nest);
|
|
+ }
|
|
+
|
|
if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
|
|
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
|
|
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
|
|
@@ -3201,6 +3414,22 @@ static __net_init int vxlan_init_net(struct net *net)
|
|
return 0;
|
|
}
|
|
|
|
+#ifdef CONFIG_SYSCTL
|
|
+static struct ctl_table_header *vxlan_fan_header;
|
|
+static unsigned int vxlan_fan_version = 4;
|
|
+
|
|
+static struct ctl_table vxlan_fan_sysctls[] = {
|
|
+ {
|
|
+ .procname = "vxlan",
|
|
+ .data = &vxlan_fan_version,
|
|
+ .maxlen = sizeof(vxlan_fan_version),
|
|
+ .mode = 0444,
|
|
+ .proc_handler = proc_dointvec,
|
|
+ },
|
|
+ {},
|
|
+};
|
|
+#endif /* CONFIG_SYSCTL */
|
|
+
|
|
static void __net_exit vxlan_exit_net(struct net *net)
|
|
{
|
|
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
@@ -3256,7 +3485,20 @@ static int __init vxlan_init_module(void)
|
|
if (rc)
|
|
goto out3;
|
|
|
|
+#ifdef CONFIG_SYSCTL
|
|
+ vxlan_fan_header = register_net_sysctl(&init_net, "net/fan",
|
|
+ vxlan_fan_sysctls);
|
|
+ if (!vxlan_fan_header) {
|
|
+ rc = -ENOMEM;
|
|
+ goto sysctl_failed;
|
|
+ }
|
|
+#endif /* CONFIG_SYSCTL */
|
|
+
|
|
return 0;
|
|
+#ifdef CONFIG_SYSCTL
|
|
+sysctl_failed:
|
|
+ rtnl_link_unregister(&vxlan_link_ops);
|
|
+#endif /* CONFIG_SYSCTL */
|
|
out3:
|
|
unregister_netdevice_notifier(&vxlan_notifier_block);
|
|
out2:
|
|
@@ -3269,6 +3511,9 @@ late_initcall(vxlan_init_module);
|
|
|
|
static void __exit vxlan_cleanup_module(void)
|
|
{
|
|
+#ifdef CONFIG_SYSCTL
|
|
+ unregister_net_sysctl_table(vxlan_fan_header);
|
|
+#endif /* CONFIG_SYSCTL */
|
|
rtnl_link_unregister(&vxlan_link_ops);
|
|
unregister_netdevice_notifier(&vxlan_notifier_block);
|
|
destroy_workqueue(vxlan_wq);
|
|
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
|
|
index 47fec59..28a38e5 100644
|
|
--- a/include/net/ip_tunnels.h
|
|
+++ b/include/net/ip_tunnels.h
|
|
@@ -100,9 +100,18 @@ struct metadata_dst;
|
|
*/
|
|
#define FAN_OVERLAY_CNT 256
|
|
|
|
+struct ip_fan_map {
|
|
+ __be32 underlay;
|
|
+ __be32 overlay;
|
|
+ u16 underlay_prefix;
|
|
+ u16 overlay_prefix;
|
|
+ u32 overlay_mask;
|
|
+ struct list_head list;
|
|
+ struct rcu_head rcu;
|
|
+};
|
|
+
|
|
struct ip_tunnel_fan {
|
|
-/* u32 __rcu *map;*/
|
|
- u32 map[FAN_OVERLAY_CNT];
|
|
+ struct list_head fan_maps;
|
|
};
|
|
|
|
struct ip_tunnel {
|
|
@@ -157,7 +166,11 @@ struct ip_tunnel {
|
|
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
|
|
|
|
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
|
|
-#define TUNNEL_FAN __cpu_to_be16(0x4000)
|
|
+
|
|
+static inline int fan_has_map(const struct ip_tunnel_fan *fan)
|
|
+{
|
|
+ return !list_empty(&fan->fan_maps);
|
|
+}
|
|
|
|
struct tnl_ptk_info {
|
|
__be16 flags;
|
|
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
|
|
index e289ada..542f421 100644
|
|
--- a/include/net/vxlan.h
|
|
+++ b/include/net/vxlan.h
|
|
@@ -161,6 +161,8 @@ struct vxlan_dev {
|
|
struct vxlan_rdst default_dst; /* default destination */
|
|
u32 flags; /* VXLAN_F_* in vxlan.h */
|
|
|
|
+ struct ip_tunnel_fan fan;
|
|
+
|
|
struct timer_list age_timer;
|
|
spinlock_t hash_lock;
|
|
unsigned int addrcnt;
|
|
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
|
|
index 5ad5737..6cde3bf 100644
|
|
--- a/include/uapi/linux/if_link.h
|
|
+++ b/include/uapi/linux/if_link.h
|
|
@@ -443,6 +443,7 @@ enum {
|
|
IFLA_VXLAN_GBP,
|
|
IFLA_VXLAN_REMCSUM_NOPARTIAL,
|
|
IFLA_VXLAN_COLLECT_METADATA,
|
|
+ IFLA_VXLAN_FAN_MAP = 33,
|
|
__IFLA_VXLAN_MAX
|
|
};
|
|
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
|
|
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
|
|
index 85a3e4b..d36b150 100644
|
|
--- a/include/uapi/linux/if_tunnel.h
|
|
+++ b/include/uapi/linux/if_tunnel.h
|
|
@@ -145,7 +145,7 @@ enum {
|
|
|
|
#define IFLA_FAN_MAX (__IFLA_FAN_MAX - 1)
|
|
|
|
-struct ip_tunnel_fan_map {
|
|
+struct ifla_fan_map {
|
|
__be32 underlay;
|
|
__be32 overlay;
|
|
__u16 underlay_prefix;
|
|
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
|
|
index 7a6174b..c821bf1 100644
|
|
--- a/net/ipv4/ip_tunnel.c
|
|
+++ b/net/ipv4/ip_tunnel.c
|
|
@@ -1110,11 +1110,6 @@ out:
|
|
}
|
|
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
|
|
|
|
-static int ip_tunnel_is_fan(struct ip_tunnel *tunnel)
|
|
-{
|
|
- return tunnel->parms.i_flags & TUNNEL_FAN;
|
|
-}
|
|
-
|
|
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
|
|
struct ip_tunnel_parm *p)
|
|
{
|
|
@@ -1124,7 +1119,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
|
|
struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
|
|
|
|
if (dev == itn->fb_tunnel_dev)
|
|
- return ip_tunnel_is_fan(tunnel) ? 0 : -EINVAL;
|
|
+ return fan_has_map(&tunnel->fan) ? 0 : -EINVAL;
|
|
|
|
t = ip_tunnel_find(itn, p, dev->type);
|
|
|
|
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
|
|
index 56e8984..3877b0e 100644
|
|
--- a/net/ipv4/ipip.c
|
|
+++ b/net/ipv4/ipip.c
|
|
@@ -108,6 +108,7 @@
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/inetdevice.h>
|
|
+#include <linux/rculist.h>
|
|
|
|
#include <net/sock.h>
|
|
#include <net/ip.h>
|
|
@@ -209,37 +210,144 @@ drop:
|
|
return 0;
|
|
}
|
|
|
|
-static int ipip_tunnel_is_fan(struct ip_tunnel *tunnel)
|
|
+static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
|
|
{
|
|
- return tunnel->parms.i_flags & TUNNEL_FAN;
|
|
+ struct ip_fan_map *fan_map;
|
|
+
|
|
+ rcu_read_lock();
|
|
+ list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
|
|
+ if (fan_map->overlay ==
|
|
+ (daddr & inet_make_mask(fan_map->overlay_prefix))) {
|
|
+ rcu_read_unlock();
|
|
+ return fan_map;
|
|
+ }
|
|
+ }
|
|
+ rcu_read_unlock();
|
|
+
|
|
+ return NULL;
|
|
}
|
|
|
|
-/*
|
|
- * Determine fan tunnel endpoint to send packet to, based on the inner IP
|
|
- * address. For an overlay (inner) address Y.A.B.C, the transformation is
|
|
- * F.G.A.B, where "F" and "G" are the first two octets of the underlay
|
|
- * network (the network portion of a /16), "A" and "B" are the low order
|
|
- * two octets of the underlay network host (the host portion of a /16),
|
|
- * and "Y" is a configured first octet of the overlay network.
|
|
+/* Determine fan tunnel endpoint to send packet to, based on the inner IP
|
|
+ * address.
|
|
+ *
|
|
+ * Given a /8 overlay and /16 underlay, for an overlay (inner) address
|
|
+ * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
|
|
+ * two octets of the underlay network (the network portion of a /16), "A"
|
|
+ * and "B" are the low order two octets of the underlay network host (the
|
|
+ * host portion of a /16), and "Y" is a configured first octet of the
|
|
+ * overlay network.
|
|
+ *
|
|
+ * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
|
|
+ * host overlay subnet 99.3.4.0/24. An overlay network datagram from
|
|
+ * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
|
|
+ * which hosts overlay network subnet 99.6.7.0/24. This transformation is
|
|
+ * described in detail further below.
|
|
+ *
|
|
+ * Using netmasks for the overlay and underlay other than /8 and /16, as
|
|
+ * shown above, can yield larger (or smaller) overlay subnets, with the
|
|
+ * trade-off of allowing fewer (or more) underlay hosts to participate.
|
|
+ *
|
|
+ * The size of each overlay network subnet is defined by the total of the
|
|
+ * network mask of the overlay plus the size of host portion of the
|
|
+ * underlay network. In the above example, /8 + /16 = /24.
|
|
+ *
|
|
+ * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
|
|
+ * this case, the network portion of the underlay is 10.99.224.0/20, and
|
|
+ * the host portion is 0.0.14.5 (12 bits). To determine the overlay
|
|
+ * network subnet, the 12 bits of host portion are left shifted 12 bits
|
|
+ * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
|
|
+ * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
|
|
+ * 12 bits underlay. This yields 12 bits in the overlay network portion,
|
|
+ * allowing for 4094 addresses in each overlay network subnet. The
|
|
+ * trade-off is that fewer hosts may participate in the underlay network,
|
|
+ * as its host address size has shrunk from 16 bits (65534 addresses) in
|
|
+ * the first example to 12 bits (4094 addresses) here.
|
|
+ *
|
|
+ * For fewer hosts per overlay subnet (permitting a larger number of
|
|
+ * underlay hosts to participate), the underlay netmask may be made
|
|
+ * smaller.
|
|
+ *
|
|
+ * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
|
|
+ * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
|
|
+ * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
|
|
+ * the lowest order bit of the /8 overlay). This yields an overlay subnet
|
|
+ * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
|
|
+ * the underlay). This provides more addresses for the underlay network
|
|
+ * (approximately 2^20), but each host's segment of the overlay provides
|
|
+ * only 4 bits of addresses (14 usable).
|
|
+ *
|
|
+ * It is also possible to adjust the overlay subnet.
|
|
+ *
|
|
+ * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
|
|
+ * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
|
|
+ * shifted 15 bits (/20 - /5), yielding an overlay network of
|
|
+ * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
|
|
+ * overlay network of 242.107.128.0/17.
|
|
+ *
|
|
+ * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
|
|
+ * underlay host 10.224.220.10, the underlay host portion (.10) is left
|
|
+ * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
|
|
+ * This would permit 254 addresses on the underlay, with each overlay
|
|
+ * segment providing approximately 2^14 - 2 addresses (16382).
|
|
+ *
|
|
+ * For packets being encapsulated, the overlay network destination IP
|
|
+ * address is deconstructed into its overlay and underlay-derived
|
|
+ * portions. The underlay portion (determined by the overlay mask and
|
|
+ * overlay subnet mask) is right shifted according to the size of the
|
|
+ * underlay network mask. This value is then ORed with the network
|
|
+ * portion of the underlay network to produce the underlay network
|
|
+ * destination for the encapsulated datagram.
|
|
+ *
|
|
+ * For example, using the initial example of underlay 10.88.3.4/16 and
|
|
+ * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
|
|
+ * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
|
|
+ * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
|
|
+ * of the address extracted. This is a number of bits equal to underlay
|
|
+ * network host portion. In the destination address, the highest order of
|
|
+ * these bits is one bit lower than the lowest order bit from the overlay
|
|
+ * network mask.
|
|
+ *
|
|
+ * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
|
|
+ * underlay mask is /16 (leaving 16 bits for the host portion). The bits
|
|
+ * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
|
|
+ * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
|
|
+ * which is 1 bit lower than the lowest order overlay address bit).
|
|
*
|
|
- * E.g., underlay host 10.88.3.4 with an overlay of 99 would host overlay
|
|
- * subnet 99.3.4.0/24. An overlay network datagram from 99.3.4.5 to
|
|
- * 99.6.7.8, would be directed to underlay host 10.88.6.7, which hosts
|
|
- * overlay network 99.6.7.0/24.
|
|
+ * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
|
|
+ * This value is then ORed with the underlay network portion,
|
|
+ * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
|
|
+ * the encapuslated datagram.
|
|
+ *
|
|
+ * Another transform using the final example: overlay 100.64.0.0/10 and
|
|
+ * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
|
|
+ * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
|
|
+ * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
|
|
+ * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
|
|
+ * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
|
|
+ * network portion and the underlay host portion) bits, yielding 0.0.0.11.
|
|
+ * This is ORed with the underlay network portion, 10.224.220.0/24, giving
|
|
+ * the underlay destination of 10.224.220.11 for overlay destination
|
|
+ * 100.66.200.5.
|
|
*/
|
|
static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
|
|
{
|
|
- unsigned int overlay;
|
|
+ struct ip_fan_map *f_map;
|
|
u32 daddr, underlay;
|
|
|
|
+ f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
|
|
+ if (!f_map)
|
|
+ return -ENOENT;
|
|
+
|
|
daddr = ntohl(ip_hdr(skb)->daddr);
|
|
- overlay = daddr >> 24;
|
|
- underlay = tunnel->fan.map[overlay];
|
|
+ underlay = ntohl(f_map->underlay);
|
|
if (!underlay)
|
|
return -EINVAL;
|
|
|
|
*iph = tunnel->parms.iph;
|
|
- iph->daddr = htonl(underlay | ((daddr >> 8) & 0x0000ffff));
|
|
+ iph->daddr = htonl(underlay |
|
|
+ ((daddr & ~f_map->overlay_mask) >>
|
|
+ (32 - f_map->overlay_prefix -
|
|
+ (32 - f_map->underlay_prefix))));
|
|
return 0;
|
|
}
|
|
|
|
@@ -260,7 +368,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
if (IS_ERR(skb))
|
|
goto out;
|
|
|
|
- if (ipip_tunnel_is_fan(tunnel)) {
|
|
+ if (fan_has_map(&tunnel->fan)) {
|
|
if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
|
|
goto tx_error;
|
|
tiph = &fiph;
|
|
@@ -325,6 +433,8 @@ static const struct net_device_ops ipip_netdev_ops = {
|
|
|
|
static void ipip_tunnel_setup(struct net_device *dev)
|
|
{
|
|
+ struct ip_tunnel *t = netdev_priv(dev);
|
|
+
|
|
dev->netdev_ops = &ipip_netdev_ops;
|
|
|
|
dev->type = ARPHRD_TUNNEL;
|
|
@@ -336,6 +446,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
|
|
dev->features |= IPIP_FEATURES;
|
|
dev->hw_features |= IPIP_FEATURES;
|
|
ip_tunnel_setup(dev, ipip_net_id);
|
|
+ INIT_LIST_HEAD(&t->fan.fan_maps);
|
|
}
|
|
|
|
static int ipip_tunnel_init(struct net_device *dev)
|
|
@@ -419,41 +530,65 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
|
|
return ret;
|
|
}
|
|
|
|
-static void ipip_fan_free_map(struct ip_tunnel *t)
|
|
+static void ipip_fan_flush_map(struct ip_tunnel *t)
|
|
{
|
|
- memset(&t->fan.map, 0, sizeof(t->fan.map));
|
|
+ struct ip_fan_map *fan_map;
|
|
+
|
|
+ list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
|
|
+ list_del_rcu(&fan_map->list);
|
|
+ kfree_rcu(fan_map, rcu);
|
|
+ }
|
|
}
|
|
|
|
-static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map)
|
|
+static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
|
|
{
|
|
- u32 overlay, overlay_mask, underlay, underlay_mask;
|
|
+ struct ip_fan_map *fan_map;
|
|
|
|
- if ((map->underlay_prefix && map->underlay_prefix != 16) ||
|
|
- (map->overlay_prefix && map->overlay_prefix != 8))
|
|
- return -EINVAL;
|
|
+ fan_map = ipip_fan_find_map(t, overlay);
|
|
+ if (!fan_map)
|
|
+ return -ENOENT;
|
|
+
|
|
+ list_del_rcu(&fan_map->list);
|
|
+ kfree_rcu(fan_map, rcu);
|
|
|
|
- overlay = ntohl(map->overlay);
|
|
- overlay_mask = ntohl(inet_make_mask(map->overlay_prefix));
|
|
+ return 0;
|
|
+}
|
|
|
|
- underlay = ntohl(map->underlay);
|
|
- underlay_mask = ntohl(inet_make_mask(map->underlay_prefix));
|
|
+static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
|
|
+{
|
|
+ __be32 overlay_mask, underlay_mask;
|
|
+ struct ip_fan_map *fan_map;
|
|
|
|
- if ((overlay & ~overlay_mask) || (underlay & ~underlay_mask))
|
|
- return -EINVAL;
|
|
+ overlay_mask = inet_make_mask(map->overlay_prefix);
|
|
+ underlay_mask = inet_make_mask(map->underlay_prefix);
|
|
|
|
- if (!(overlay & overlay_mask) && (underlay & underlay_mask))
|
|
+ if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
|
|
return -EINVAL;
|
|
|
|
- t->parms.i_flags |= TUNNEL_FAN;
|
|
+ if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
|
|
+ return -EINVAL;
|
|
|
|
- /* Special case: overlay 0 and underlay 0 clears all mappings */
|
|
- if (!overlay && !underlay) {
|
|
- ipip_fan_free_map(t);
|
|
+ /* Special case: overlay 0 and underlay 0: flush all mappings */
|
|
+ if (!map->overlay && !map->underlay) {
|
|
+ ipip_fan_flush_map(t);
|
|
return 0;
|
|
}
|
|
+
|
|
+ /* Special case: overlay set and underlay 0: clear map for overlay */
|
|
+ if (!map->underlay)
|
|
+ return ipip_fan_del_map(t, map->overlay);
|
|
+
|
|
+ if (ipip_fan_find_map(t, map->overlay))
|
|
+ return -EEXIST;
|
|
+
|
|
+ fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
|
|
+ fan_map->underlay = map->underlay;
|
|
+ fan_map->overlay = map->overlay;
|
|
+ fan_map->underlay_prefix = map->underlay_prefix;
|
|
+ fan_map->overlay_mask = ntohl(overlay_mask);
|
|
+ fan_map->overlay_prefix = map->overlay_prefix;
|
|
|
|
- overlay >>= (32 - map->overlay_prefix);
|
|
- t->fan.map[overlay] = underlay;
|
|
+ list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
|
|
|
|
return 0;
|
|
}
|
|
@@ -462,7 +597,7 @@ static int ipip_fan_set_map(struct ip_tunnel *t, struct ip_tunnel_fan_map *map)
|
|
static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
|
|
struct ip_tunnel_parm *parms)
|
|
{
|
|
- struct ip_tunnel_fan_map *map;
|
|
+ struct ifla_fan_map *map;
|
|
struct nlattr *attr;
|
|
int rem, rv;
|
|
|
|
@@ -474,7 +609,7 @@ static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
|
|
|
|
nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
|
|
map = nla_data(attr);
|
|
- rv = ipip_fan_set_map(t, map);
|
|
+ rv = ipip_fan_add_map(t, map);
|
|
if (rv)
|
|
return rv;
|
|
}
|
|
@@ -555,7 +690,7 @@ static size_t ipip_get_size(const struct net_device *dev)
|
|
/* IFLA_IPTUN_ENCAP_DPORT */
|
|
nla_total_size(2) +
|
|
/* IFLA_IPTUN_FAN_MAP */
|
|
- nla_total_size(sizeof(struct ip_tunnel_fan_map)) * 256 +
|
|
+ nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
|
|
0;
|
|
}
|
|
|
|
@@ -583,25 +718,22 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
|
|
tunnel->encap.flags))
|
|
goto nla_put_failure;
|
|
|
|
- if (tunnel->parms.i_flags & TUNNEL_FAN) {
|
|
+ if (fan_has_map(&tunnel->fan)) {
|
|
struct nlattr *fan_nest;
|
|
- int i;
|
|
+ struct ip_fan_map *fan_map;
|
|
|
|
fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
|
|
if (!fan_nest)
|
|
goto nla_put_failure;
|
|
- for (i = 0; i < 256; i++) {
|
|
- if (tunnel->fan.map[i]) {
|
|
- struct ip_tunnel_fan_map map;
|
|
-
|
|
- map.underlay = htonl(tunnel->fan.map[i]);
|
|
- map.underlay_prefix = 16;
|
|
- map.overlay = htonl(i << 24);
|
|
- map.overlay_prefix = 8;
|
|
- if (nla_put(skb, IFLA_FAN_MAPPING,
|
|
- sizeof(map), &map))
|
|
- goto nla_put_failure;
|
|
- }
|
|
+ list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
|
|
+ struct ifla_fan_map map;
|
|
+
|
|
+ map.underlay = fan_map->underlay;
|
|
+ map.underlay_prefix = fan_map->underlay_prefix;
|
|
+ map.overlay = fan_map->overlay;
|
|
+ map.overlay_prefix = fan_map->overlay_prefix;
|
|
+ if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
|
|
+ goto nla_put_failure;
|
|
}
|
|
nla_nest_end(skb, fan_nest);
|
|
}
|
|
--
|
|
2.7.4
|
|
|