linux/drivers/net/vxlan.c
<<
>>
Prefs
   1/*
   2 * VXLAN: Virtual eXtensible Local Area Network
   3 *
   4 * Copyright (c) 2012 Vyatta Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 *
  10 * TODO
  11 *  - use IANA UDP port number (when defined)
  12 *  - IPv6 (not in RFC)
  13 */
  14
  15#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  16
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/module.h>
  20#include <linux/errno.h>
  21#include <linux/slab.h>
  22#include <linux/skbuff.h>
  23#include <linux/rculist.h>
  24#include <linux/netdevice.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/udp.h>
  28#include <linux/igmp.h>
  29#include <linux/etherdevice.h>
  30#include <linux/if_ether.h>
  31#include <linux/hash.h>
  32#include <net/ip.h>
  33#include <net/icmp.h>
  34#include <net/udp.h>
  35#include <net/rtnetlink.h>
  36#include <net/route.h>
  37#include <net/dsfield.h>
  38#include <net/inet_ecn.h>
  39#include <net/net_namespace.h>
  40#include <net/netns/generic.h>
  41
  42#define VXLAN_VERSION   "0.1"
  43
  44#define VNI_HASH_BITS   10
  45#define VNI_HASH_SIZE   (1<<VNI_HASH_BITS)
  46#define FDB_HASH_BITS   8
  47#define FDB_HASH_SIZE   (1<<FDB_HASH_BITS)
  48#define FDB_AGE_DEFAULT 300 /* 5 min */
  49#define FDB_AGE_INTERVAL (10 * HZ)      /* rescan interval */
  50
  51#define VXLAN_N_VID     (1u << 24)
  52#define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
  53/* IP header + UDP + VXLAN + Ethernet header */
  54#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
  55
  56#define VXLAN_FLAGS 0x08000000  /* struct vxlanhdr.vx_flags required value. */
  57
  58/* VXLAN protocol header */
  59struct vxlanhdr {
  60        __be32 vx_flags;
  61        __be32 vx_vni;
  62};
  63
  64/* UDP port for VXLAN traffic. */
  65static unsigned int vxlan_port __read_mostly = 8472;
  66module_param_named(udp_port, vxlan_port, uint, 0444);
  67MODULE_PARM_DESC(udp_port, "Destination UDP port");
  68
  69static bool log_ecn_error = true;
  70module_param(log_ecn_error, bool, 0644);
  71MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
  72
  73/* per-net private data for this module */
  74static unsigned int vxlan_net_id;
  75struct vxlan_net {
  76        struct socket     *sock;        /* UDP encap socket */
  77        struct hlist_head vni_list[VNI_HASH_SIZE];
  78};
  79
  80/* Forwarding table entry */
  81struct vxlan_fdb {
  82        struct hlist_node hlist;        /* linked list of entries */
  83        struct rcu_head   rcu;
  84        unsigned long     updated;      /* jiffies */
  85        unsigned long     used;
  86        __be32            remote_ip;
  87        u16               state;        /* see ndm_state */
  88        u8                eth_addr[ETH_ALEN];
  89};
  90
  91/* Per-cpu network traffic stats */
  92struct vxlan_stats {
  93        u64                     rx_packets;
  94        u64                     rx_bytes;
  95        u64                     tx_packets;
  96        u64                     tx_bytes;
  97        struct u64_stats_sync   syncp;
  98};
  99
 100/* Pseudo network device */
 101struct vxlan_dev {
 102        struct hlist_node hlist;
 103        struct net_device *dev;
 104        struct vxlan_stats __percpu *stats;
 105        __u32             vni;          /* virtual network id */
 106        __be32            gaddr;        /* multicast group */
 107        __be32            saddr;        /* source address */
 108        unsigned int      link;         /* link to multicast over */
 109        __u16             port_min;     /* source port range */
 110        __u16             port_max;
 111        __u8              tos;          /* TOS override */
 112        __u8              ttl;
 113        bool              learn;
 114
 115        unsigned long     age_interval;
 116        struct timer_list age_timer;
 117        spinlock_t        hash_lock;
 118        unsigned int      addrcnt;
 119        unsigned int      addrmax;
 120        unsigned int      addrexceeded;
 121
 122        struct hlist_head fdb_head[FDB_HASH_SIZE];
 123};
 124
 125/* salt for hash table */
 126static u32 vxlan_salt __read_mostly;
 127
 128static inline struct hlist_head *vni_head(struct net *net, u32 id)
 129{
 130        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 131
 132        return &vn->vni_list[hash_32(id, VNI_HASH_BITS)];
 133}
 134
 135/* Look up VNI in a per net namespace table */
 136static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id)
 137{
 138        struct vxlan_dev *vxlan;
 139        struct hlist_node *node;
 140
 141        hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) {
 142                if (vxlan->vni == id)
 143                        return vxlan;
 144        }
 145
 146        return NULL;
 147}
 148
 149/* Fill in neighbour message in skbuff. */
 150static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 151                           const struct vxlan_fdb *fdb,
 152                           u32 portid, u32 seq, int type, unsigned int flags)
 153{
 154        unsigned long now = jiffies;
 155        struct nda_cacheinfo ci;
 156        struct nlmsghdr *nlh;
 157        struct ndmsg *ndm;
 158
 159        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
 160        if (nlh == NULL)
 161                return -EMSGSIZE;
 162
 163        ndm = nlmsg_data(nlh);
 164        memset(ndm, 0, sizeof(*ndm));
 165        ndm->ndm_family = AF_BRIDGE;
 166        ndm->ndm_state = fdb->state;
 167        ndm->ndm_ifindex = vxlan->dev->ifindex;
 168        ndm->ndm_flags = NTF_SELF;
 169        ndm->ndm_type = NDA_DST;
 170
 171        if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 172                goto nla_put_failure;
 173
 174        if (nla_put_be32(skb, NDA_DST, fdb->remote_ip))
 175                goto nla_put_failure;
 176
 177        ci.ndm_used      = jiffies_to_clock_t(now - fdb->used);
 178        ci.ndm_confirmed = 0;
 179        ci.ndm_updated   = jiffies_to_clock_t(now - fdb->updated);
 180        ci.ndm_refcnt    = 0;
 181
 182        if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 183                goto nla_put_failure;
 184
 185        return nlmsg_end(skb, nlh);
 186
 187nla_put_failure:
 188        nlmsg_cancel(skb, nlh);
 189        return -EMSGSIZE;
 190}
 191
 192static inline size_t vxlan_nlmsg_size(void)
 193{
 194        return NLMSG_ALIGN(sizeof(struct ndmsg))
 195                + nla_total_size(ETH_ALEN) /* NDA_LLADDR */
 196                + nla_total_size(sizeof(__be32)) /* NDA_DST */
 197                + nla_total_size(sizeof(struct nda_cacheinfo));
 198}
 199
 200static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
 201                             const struct vxlan_fdb *fdb, int type)
 202{
 203        struct net *net = dev_net(vxlan->dev);
 204        struct sk_buff *skb;
 205        int err = -ENOBUFS;
 206
 207        skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
 208        if (skb == NULL)
 209                goto errout;
 210
 211        err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0);
 212        if (err < 0) {
 213                /* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
 214                WARN_ON(err == -EMSGSIZE);
 215                kfree_skb(skb);
 216                goto errout;
 217        }
 218
 219        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
 220        return;
 221errout:
 222        if (err < 0)
 223                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 224}
 225
 226/* Hash Ethernet address */
 227static u32 eth_hash(const unsigned char *addr)
 228{
 229        u64 value = get_unaligned((u64 *)addr);
 230
 231        /* only want 6 bytes */
 232#ifdef __BIG_ENDIAN
 233        value >>= 16;
 234#else
 235        value <<= 16;
 236#endif
 237        return hash_64(value, FDB_HASH_BITS);
 238}
 239
 240/* Hash chain to use given mac address */
 241static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
 242                                                const u8 *mac)
 243{
 244        return &vxlan->fdb_head[eth_hash(mac)];
 245}
 246
 247/* Look up Ethernet address in forwarding table */
 248static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
 249                                        const u8 *mac)
 250
 251{
 252        struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
 253        struct vxlan_fdb *f;
 254        struct hlist_node *node;
 255
 256        hlist_for_each_entry_rcu(f, node, head, hlist) {
 257                if (compare_ether_addr(mac, f->eth_addr) == 0)
 258                        return f;
 259        }
 260
 261        return NULL;
 262}
 263
 264/* Add new entry to forwarding table -- assumes lock held */
 265static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 266                            const u8 *mac, __be32 ip,
 267                            __u16 state, __u16 flags)
 268{
 269        struct vxlan_fdb *f;
 270        int notify = 0;
 271
 272        f = vxlan_find_mac(vxlan, mac);
 273        if (f) {
 274                if (flags & NLM_F_EXCL) {
 275                        netdev_dbg(vxlan->dev,
 276                                   "lost race to create %pM\n", mac);
 277                        return -EEXIST;
 278                }
 279                if (f->state != state) {
 280                        f->state = state;
 281                        f->updated = jiffies;
 282                        notify = 1;
 283                }
 284        } else {
 285                if (!(flags & NLM_F_CREATE))
 286                        return -ENOENT;
 287
 288                if (vxlan->addrmax && vxlan->addrcnt >= vxlan->addrmax)
 289                        return -ENOSPC;
 290
 291                netdev_dbg(vxlan->dev, "add %pM -> %pI4\n", mac, &ip);
 292                f = kmalloc(sizeof(*f), GFP_ATOMIC);
 293                if (!f)
 294                        return -ENOMEM;
 295
 296                notify = 1;
 297                f->remote_ip = ip;
 298                f->state = state;
 299                f->updated = f->used = jiffies;
 300                memcpy(f->eth_addr, mac, ETH_ALEN);
 301
 302                ++vxlan->addrcnt;
 303                hlist_add_head_rcu(&f->hlist,
 304                                   vxlan_fdb_head(vxlan, mac));
 305        }
 306
 307        if (notify)
 308                vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH);
 309
 310        return 0;
 311}
 312
 313static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
 314{
 315        netdev_dbg(vxlan->dev,
 316                    "delete %pM\n", f->eth_addr);
 317
 318        --vxlan->addrcnt;
 319        vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH);
 320
 321        hlist_del_rcu(&f->hlist);
 322        kfree_rcu(f, rcu);
 323}
 324
 325/* Add static entry (via netlink) */
 326static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 327                         struct net_device *dev,
 328                         const unsigned char *addr, u16 flags)
 329{
 330        struct vxlan_dev *vxlan = netdev_priv(dev);
 331        __be32 ip;
 332        int err;
 333
 334        if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
 335                pr_info("RTM_NEWNEIGH with invalid state %#x\n",
 336                        ndm->ndm_state);
 337                return -EINVAL;
 338        }
 339
 340        if (tb[NDA_DST] == NULL)
 341                return -EINVAL;
 342
 343        if (nla_len(tb[NDA_DST]) != sizeof(__be32))
 344                return -EAFNOSUPPORT;
 345
 346        ip = nla_get_be32(tb[NDA_DST]);
 347
 348        spin_lock_bh(&vxlan->hash_lock);
 349        err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags);
 350        spin_unlock_bh(&vxlan->hash_lock);
 351
 352        return err;
 353}
 354
 355/* Delete entry (via netlink) */
 356static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
 357                            const unsigned char *addr)
 358{
 359        struct vxlan_dev *vxlan = netdev_priv(dev);
 360        struct vxlan_fdb *f;
 361        int err = -ENOENT;
 362
 363        spin_lock_bh(&vxlan->hash_lock);
 364        f = vxlan_find_mac(vxlan, addr);
 365        if (f) {
 366                vxlan_fdb_destroy(vxlan, f);
 367                err = 0;
 368        }
 369        spin_unlock_bh(&vxlan->hash_lock);
 370
 371        return err;
 372}
 373
 374/* Dump forwarding table */
 375static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 376                          struct net_device *dev, int idx)
 377{
 378        struct vxlan_dev *vxlan = netdev_priv(dev);
 379        unsigned int h;
 380
 381        for (h = 0; h < FDB_HASH_SIZE; ++h) {
 382                struct vxlan_fdb *f;
 383                struct hlist_node *n;
 384                int err;
 385
 386                hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) {
 387                        if (idx < cb->args[0])
 388                                goto skip;
 389
 390                        err = vxlan_fdb_info(skb, vxlan, f,
 391                                             NETLINK_CB(cb->skb).portid,
 392                                             cb->nlh->nlmsg_seq,
 393                                             RTM_NEWNEIGH,
 394                                             NLM_F_MULTI);
 395                        if (err < 0)
 396                                break;
 397skip:
 398                        ++idx;
 399                }
 400        }
 401
 402        return idx;
 403}
 404
 405/* Watch incoming packets to learn mapping between Ethernet address
 406 * and Tunnel endpoint.
 407 */
 408static void vxlan_snoop(struct net_device *dev,
 409                        __be32 src_ip, const u8 *src_mac)
 410{
 411        struct vxlan_dev *vxlan = netdev_priv(dev);
 412        struct vxlan_fdb *f;
 413        int err;
 414
 415        f = vxlan_find_mac(vxlan, src_mac);
 416        if (likely(f)) {
 417                f->used = jiffies;
 418                if (likely(f->remote_ip == src_ip))
 419                        return;
 420
 421                if (net_ratelimit())
 422                        netdev_info(dev,
 423                                    "%pM migrated from %pI4 to %pI4\n",
 424                                    src_mac, &f->remote_ip, &src_ip);
 425
 426                f->remote_ip = src_ip;
 427                f->updated = jiffies;
 428        } else {
 429                /* learned new entry */
 430                spin_lock(&vxlan->hash_lock);
 431                err = vxlan_fdb_create(vxlan, src_mac, src_ip,
 432                                       NUD_REACHABLE,
 433                                       NLM_F_EXCL|NLM_F_CREATE);
 434                spin_unlock(&vxlan->hash_lock);
 435        }
 436}
 437
 438
 439/* See if multicast group is already in use by other ID */
 440static bool vxlan_group_used(struct vxlan_net *vn,
 441                             const struct vxlan_dev *this)
 442{
 443        const struct vxlan_dev *vxlan;
 444        struct hlist_node *node;
 445        unsigned h;
 446
 447        for (h = 0; h < VNI_HASH_SIZE; ++h)
 448                hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) {
 449                        if (vxlan == this)
 450                                continue;
 451
 452                        if (!netif_running(vxlan->dev))
 453                                continue;
 454
 455                        if (vxlan->gaddr == this->gaddr)
 456                                return true;
 457                }
 458
 459        return false;
 460}
 461
 462/* kernel equivalent to IP_ADD_MEMBERSHIP */
 463static int vxlan_join_group(struct net_device *dev)
 464{
 465        struct vxlan_dev *vxlan = netdev_priv(dev);
 466        struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 467        struct sock *sk = vn->sock->sk;
 468        struct ip_mreqn mreq = {
 469                .imr_multiaddr.s_addr = vxlan->gaddr,
 470        };
 471        int err;
 472
 473        /* Already a member of group */
 474        if (vxlan_group_used(vn, vxlan))
 475                return 0;
 476
 477        /* Need to drop RTNL to call multicast join */
 478        rtnl_unlock();
 479        lock_sock(sk);
 480        err = ip_mc_join_group(sk, &mreq);
 481        release_sock(sk);
 482        rtnl_lock();
 483
 484        return err;
 485}
 486
 487
 488/* kernel equivalent to IP_DROP_MEMBERSHIP */
 489static int vxlan_leave_group(struct net_device *dev)
 490{
 491        struct vxlan_dev *vxlan = netdev_priv(dev);
 492        struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 493        int err = 0;
 494        struct sock *sk = vn->sock->sk;
 495        struct ip_mreqn mreq = {
 496                .imr_multiaddr.s_addr = vxlan->gaddr,
 497        };
 498
 499        /* Only leave group when last vxlan is done. */
 500        if (vxlan_group_used(vn, vxlan))
 501                return 0;
 502
 503        /* Need to drop RTNL to call multicast leave */
 504        rtnl_unlock();
 505        lock_sock(sk);
 506        err = ip_mc_leave_group(sk, &mreq);
 507        release_sock(sk);
 508        rtnl_lock();
 509
 510        return err;
 511}
 512
 513/* Callback from net/ipv4/udp.c to receive packets */
 514static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 515{
 516        struct iphdr *oip;
 517        struct vxlanhdr *vxh;
 518        struct vxlan_dev *vxlan;
 519        struct vxlan_stats *stats;
 520        __u32 vni;
 521        int err;
 522
 523        /* pop off outer UDP header */
 524        __skb_pull(skb, sizeof(struct udphdr));
 525
 526        /* Need Vxlan and inner Ethernet header to be present */
 527        if (!pskb_may_pull(skb, sizeof(struct vxlanhdr)))
 528                goto error;
 529
 530        /* Drop packets with reserved bits set */
 531        vxh = (struct vxlanhdr *) skb->data;
 532        if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
 533            (vxh->vx_vni & htonl(0xff))) {
 534                netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
 535                           ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
 536                goto error;
 537        }
 538
 539        __skb_pull(skb, sizeof(struct vxlanhdr));
 540
 541        /* Is this VNI defined? */
 542        vni = ntohl(vxh->vx_vni) >> 8;
 543        vxlan = vxlan_find_vni(sock_net(sk), vni);
 544        if (!vxlan) {
 545                netdev_dbg(skb->dev, "unknown vni %d\n", vni);
 546                goto drop;
 547        }
 548
 549        if (!pskb_may_pull(skb, ETH_HLEN)) {
 550                vxlan->dev->stats.rx_length_errors++;
 551                vxlan->dev->stats.rx_errors++;
 552                goto drop;
 553        }
 554
 555        /* Re-examine inner Ethernet packet */
 556        oip = ip_hdr(skb);
 557        skb->protocol = eth_type_trans(skb, vxlan->dev);
 558
 559        /* Ignore packet loops (and multicast echo) */
 560        if (compare_ether_addr(eth_hdr(skb)->h_source,
 561                               vxlan->dev->dev_addr) == 0)
 562                goto drop;
 563
 564        if (vxlan->learn)
 565                vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source);
 566
 567        __skb_tunnel_rx(skb, vxlan->dev);
 568        skb_reset_network_header(skb);
 569        skb->ip_summed = CHECKSUM_NONE;
 570
 571        err = IP_ECN_decapsulate(oip, skb);
 572        if (unlikely(err)) {
 573                if (log_ecn_error)
 574                        net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 575                                             &oip->saddr, oip->tos);
 576                if (err > 1) {
 577                        ++vxlan->dev->stats.rx_frame_errors;
 578                        ++vxlan->dev->stats.rx_errors;
 579                        goto drop;
 580                }
 581        }
 582
 583        stats = this_cpu_ptr(vxlan->stats);
 584        u64_stats_update_begin(&stats->syncp);
 585        stats->rx_packets++;
 586        stats->rx_bytes += skb->len;
 587        u64_stats_update_end(&stats->syncp);
 588
 589        netif_rx(skb);
 590
 591        return 0;
 592error:
 593        /* Put UDP header back */
 594        __skb_push(skb, sizeof(struct udphdr));
 595
 596        return 1;
 597drop:
 598        /* Consume bad packet */
 599        kfree_skb(skb);
 600        return 0;
 601}
 602
 603/* Extract dsfield from inner protocol */
 604static inline u8 vxlan_get_dsfield(const struct iphdr *iph,
 605                                   const struct sk_buff *skb)
 606{
 607        if (skb->protocol == htons(ETH_P_IP))
 608                return iph->tos;
 609        else if (skb->protocol == htons(ETH_P_IPV6))
 610                return ipv6_get_dsfield((const struct ipv6hdr *)iph);
 611        else
 612                return 0;
 613}
 614
 615/* Propogate ECN bits out */
 616static inline u8 vxlan_ecn_encap(u8 tos,
 617                                 const struct iphdr *iph,
 618                                 const struct sk_buff *skb)
 619{
 620        u8 inner = vxlan_get_dsfield(iph, skb);
 621
 622        return INET_ECN_encapsulate(tos, inner);
 623}
 624
 625static __be32 vxlan_find_dst(struct vxlan_dev *vxlan, struct sk_buff *skb)
 626{
 627        const struct ethhdr *eth = (struct ethhdr *) skb->data;
 628        const struct vxlan_fdb *f;
 629
 630        if (is_multicast_ether_addr(eth->h_dest))
 631                return vxlan->gaddr;
 632
 633        f = vxlan_find_mac(vxlan, eth->h_dest);
 634        if (f)
 635                return f->remote_ip;
 636        else
 637                return vxlan->gaddr;
 638
 639}
 640
 641static void vxlan_sock_free(struct sk_buff *skb)
 642{
 643        sock_put(skb->sk);
 644}
 645
 646/* On transmit, associate with the tunnel socket */
 647static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
 648{
 649        struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 650        struct sock *sk = vn->sock->sk;
 651
 652        skb_orphan(skb);
 653        sock_hold(sk);
 654        skb->sk = sk;
 655        skb->destructor = vxlan_sock_free;
 656}
 657
 658/* Compute source port for outgoing packet
 659 *   first choice to use L4 flow hash since it will spread
 660 *     better and maybe available from hardware
 661 *   secondary choice is to use jhash on the Ethernet header
 662 */
 663static u16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb)
 664{
 665        unsigned int range = (vxlan->port_max - vxlan->port_min) + 1;
 666        u32 hash;
 667
 668        hash = skb_get_rxhash(skb);
 669        if (!hash)
 670                hash = jhash(skb->data, 2 * ETH_ALEN,
 671                             (__force u32) skb->protocol);
 672
 673        return (((u64) hash * range) >> 32) + vxlan->port_min;
 674}
 675
 676/* Transmit local packets over Vxlan
 677 *
 678 * Outer IP header inherits ECN and DF from inner header.
 679 * Outer UDP destination is the VXLAN assigned port.
 680 *           source port is based on hash of flow
 681 */
 682static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 683{
 684        struct vxlan_dev *vxlan = netdev_priv(dev);
 685        struct rtable *rt;
 686        const struct iphdr *old_iph;
 687        struct iphdr *iph;
 688        struct vxlanhdr *vxh;
 689        struct udphdr *uh;
 690        struct flowi4 fl4;
 691        unsigned int pkt_len = skb->len;
 692        __be32 dst;
 693        __u16 src_port;
 694        __be16 df = 0;
 695        __u8 tos, ttl;
 696        int err;
 697
 698        dst = vxlan_find_dst(vxlan, skb);
 699        if (!dst)
 700                goto drop;
 701
 702        /* Need space for new headers (invalidates iph ptr) */
 703        if (skb_cow_head(skb, VXLAN_HEADROOM))
 704                goto drop;
 705
 706        old_iph = ip_hdr(skb);
 707
 708        ttl = vxlan->ttl;
 709        if (!ttl && IN_MULTICAST(ntohl(dst)))
 710                ttl = 1;
 711
 712        tos = vxlan->tos;
 713        if (tos == 1)
 714                tos = vxlan_get_dsfield(old_iph, skb);
 715
 716        src_port = vxlan_src_port(vxlan, skb);
 717
 718        memset(&fl4, 0, sizeof(fl4));
 719        fl4.flowi4_oif = vxlan->link;
 720        fl4.flowi4_tos = RT_TOS(tos);
 721        fl4.daddr = dst;
 722        fl4.saddr = vxlan->saddr;
 723
 724        rt = ip_route_output_key(dev_net(dev), &fl4);
 725        if (IS_ERR(rt)) {
 726                netdev_dbg(dev, "no route to %pI4\n", &dst);
 727                dev->stats.tx_carrier_errors++;
 728                goto tx_error;
 729        }
 730
 731        if (rt->dst.dev == dev) {
 732                netdev_dbg(dev, "circular route to %pI4\n", &dst);
 733                ip_rt_put(rt);
 734                dev->stats.collisions++;
 735                goto tx_error;
 736        }
 737
 738        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 739        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 740                              IPSKB_REROUTED);
 741        skb_dst_drop(skb);
 742        skb_dst_set(skb, &rt->dst);
 743
 744        vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 745        vxh->vx_flags = htonl(VXLAN_FLAGS);
 746        vxh->vx_vni = htonl(vxlan->vni << 8);
 747
 748        __skb_push(skb, sizeof(*uh));
 749        skb_reset_transport_header(skb);
 750        uh = udp_hdr(skb);
 751
 752        uh->dest = htons(vxlan_port);
 753        uh->source = htons(src_port);
 754
 755        uh->len = htons(skb->len);
 756        uh->check = 0;
 757
 758        __skb_push(skb, sizeof(*iph));
 759        skb_reset_network_header(skb);
 760        iph             = ip_hdr(skb);
 761        iph->version    = 4;
 762        iph->ihl        = sizeof(struct iphdr) >> 2;
 763        iph->frag_off   = df;
 764        iph->protocol   = IPPROTO_UDP;
 765        iph->tos        = vxlan_ecn_encap(tos, old_iph, skb);
 766        iph->daddr      = dst;
 767        iph->saddr      = fl4.saddr;
 768        iph->ttl        = ttl ? : ip4_dst_hoplimit(&rt->dst);
 769
 770        vxlan_set_owner(dev, skb);
 771
 772        /* See __IPTUNNEL_XMIT */
 773        skb->ip_summed = CHECKSUM_NONE;
 774        ip_select_ident(iph, &rt->dst, NULL);
 775
 776        err = ip_local_out(skb);
 777        if (likely(net_xmit_eval(err) == 0)) {
 778                struct vxlan_stats *stats = this_cpu_ptr(vxlan->stats);
 779
 780                u64_stats_update_begin(&stats->syncp);
 781                stats->tx_packets++;
 782                stats->tx_bytes += pkt_len;
 783                u64_stats_update_end(&stats->syncp);
 784        } else {
 785                dev->stats.tx_errors++;
 786                dev->stats.tx_aborted_errors++;
 787        }
 788        return NETDEV_TX_OK;
 789
 790drop:
 791        dev->stats.tx_dropped++;
 792        goto tx_free;
 793
 794tx_error:
 795        dev->stats.tx_errors++;
 796tx_free:
 797        dev_kfree_skb(skb);
 798        return NETDEV_TX_OK;
 799}
 800
 801/* Walk the forwarding table and purge stale entries */
 802static void vxlan_cleanup(unsigned long arg)
 803{
 804        struct vxlan_dev *vxlan = (struct vxlan_dev *) arg;
 805        unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
 806        unsigned int h;
 807
 808        if (!netif_running(vxlan->dev))
 809                return;
 810
 811        spin_lock_bh(&vxlan->hash_lock);
 812        for (h = 0; h < FDB_HASH_SIZE; ++h) {
 813                struct hlist_node *p, *n;
 814                hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
 815                        struct vxlan_fdb *f
 816                                = container_of(p, struct vxlan_fdb, hlist);
 817                        unsigned long timeout;
 818
 819                        if (f->state & NUD_PERMANENT)
 820                                continue;
 821
 822                        timeout = f->used + vxlan->age_interval * HZ;
 823                        if (time_before_eq(timeout, jiffies)) {
 824                                netdev_dbg(vxlan->dev,
 825                                           "garbage collect %pM\n",
 826                                           f->eth_addr);
 827                                f->state = NUD_STALE;
 828                                vxlan_fdb_destroy(vxlan, f);
 829                        } else if (time_before(timeout, next_timer))
 830                                next_timer = timeout;
 831                }
 832        }
 833        spin_unlock_bh(&vxlan->hash_lock);
 834
 835        mod_timer(&vxlan->age_timer, next_timer);
 836}
 837
 838/* Setup stats when device is created */
 839static int vxlan_init(struct net_device *dev)
 840{
 841        struct vxlan_dev *vxlan = netdev_priv(dev);
 842
 843        vxlan->stats = alloc_percpu(struct vxlan_stats);
 844        if (!vxlan->stats)
 845                return -ENOMEM;
 846
 847        return 0;
 848}
 849
 850/* Start ageing timer and join group when device is brought up */
 851static int vxlan_open(struct net_device *dev)
 852{
 853        struct vxlan_dev *vxlan = netdev_priv(dev);
 854        int err;
 855
 856        if (vxlan->gaddr) {
 857                err = vxlan_join_group(dev);
 858                if (err)
 859                        return err;
 860        }
 861
 862        if (vxlan->age_interval)
 863                mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
 864
 865        return 0;
 866}
 867
 868/* Purge the forwarding table */
 869static void vxlan_flush(struct vxlan_dev *vxlan)
 870{
 871        unsigned h;
 872
 873        spin_lock_bh(&vxlan->hash_lock);
 874        for (h = 0; h < FDB_HASH_SIZE; ++h) {
 875                struct hlist_node *p, *n;
 876                hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
 877                        struct vxlan_fdb *f
 878                                = container_of(p, struct vxlan_fdb, hlist);
 879                        vxlan_fdb_destroy(vxlan, f);
 880                }
 881        }
 882        spin_unlock_bh(&vxlan->hash_lock);
 883}
 884
 885/* Cleanup timer and forwarding table on shutdown */
 886static int vxlan_stop(struct net_device *dev)
 887{
 888        struct vxlan_dev *vxlan = netdev_priv(dev);
 889
 890        if (vxlan->gaddr)
 891                vxlan_leave_group(dev);
 892
 893        del_timer_sync(&vxlan->age_timer);
 894
 895        vxlan_flush(vxlan);
 896
 897        return 0;
 898}
 899
 900/* Merge per-cpu statistics */
 901static struct rtnl_link_stats64 *vxlan_stats64(struct net_device *dev,
 902                                               struct rtnl_link_stats64 *stats)
 903{
 904        struct vxlan_dev *vxlan = netdev_priv(dev);
 905        struct vxlan_stats tmp, sum = { 0 };
 906        unsigned int cpu;
 907
 908        for_each_possible_cpu(cpu) {
 909                unsigned int start;
 910                const struct vxlan_stats *stats
 911                        = per_cpu_ptr(vxlan->stats, cpu);
 912
 913                do {
 914                        start = u64_stats_fetch_begin_bh(&stats->syncp);
 915                        memcpy(&tmp, stats, sizeof(tmp));
 916                } while (u64_stats_fetch_retry_bh(&stats->syncp, start));
 917
 918                sum.tx_bytes   += tmp.tx_bytes;
 919                sum.tx_packets += tmp.tx_packets;
 920                sum.rx_bytes   += tmp.rx_bytes;
 921                sum.rx_packets += tmp.rx_packets;
 922        }
 923
 924        stats->tx_bytes   = sum.tx_bytes;
 925        stats->tx_packets = sum.tx_packets;
 926        stats->rx_bytes   = sum.rx_bytes;
 927        stats->rx_packets = sum.rx_packets;
 928
 929        stats->multicast = dev->stats.multicast;
 930        stats->rx_length_errors = dev->stats.rx_length_errors;
 931        stats->rx_frame_errors = dev->stats.rx_frame_errors;
 932        stats->rx_errors = dev->stats.rx_errors;
 933
 934        stats->tx_dropped = dev->stats.tx_dropped;
 935        stats->tx_carrier_errors  = dev->stats.tx_carrier_errors;
 936        stats->tx_aborted_errors  = dev->stats.tx_aborted_errors;
 937        stats->collisions  = dev->stats.collisions;
 938        stats->tx_errors = dev->stats.tx_errors;
 939
 940        return stats;
 941}
 942
 943/* Stub, nothing needs to be done. */
 944static void vxlan_set_multicast_list(struct net_device *dev)
 945{
 946}
 947
 948static const struct net_device_ops vxlan_netdev_ops = {
 949        .ndo_init               = vxlan_init,
 950        .ndo_open               = vxlan_open,
 951        .ndo_stop               = vxlan_stop,
 952        .ndo_start_xmit         = vxlan_xmit,
 953        .ndo_get_stats64        = vxlan_stats64,
 954        .ndo_set_rx_mode        = vxlan_set_multicast_list,
 955        .ndo_change_mtu         = eth_change_mtu,
 956        .ndo_validate_addr      = eth_validate_addr,
 957        .ndo_set_mac_address    = eth_mac_addr,
 958        .ndo_fdb_add            = vxlan_fdb_add,
 959        .ndo_fdb_del            = vxlan_fdb_delete,
 960        .ndo_fdb_dump           = vxlan_fdb_dump,
 961};
 962
 963/* Info for udev, that this is a virtual tunnel endpoint */
 964static struct device_type vxlan_type = {
 965        .name = "vxlan",
 966};
 967
 968static void vxlan_free(struct net_device *dev)
 969{
 970        struct vxlan_dev *vxlan = netdev_priv(dev);
 971
 972        free_percpu(vxlan->stats);
 973        free_netdev(dev);
 974}
 975
 976/* Initialize the device structure. */
 977static void vxlan_setup(struct net_device *dev)
 978{
 979        struct vxlan_dev *vxlan = netdev_priv(dev);
 980        unsigned h;
 981        int low, high;
 982
 983        eth_hw_addr_random(dev);
 984        ether_setup(dev);
 985        dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM;
 986
 987        dev->netdev_ops = &vxlan_netdev_ops;
 988        dev->destructor = vxlan_free;
 989        SET_NETDEV_DEVTYPE(dev, &vxlan_type);
 990
 991        dev->tx_queue_len = 0;
 992        dev->features   |= NETIF_F_LLTX;
 993        dev->features   |= NETIF_F_NETNS_LOCAL;
 994        dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 995
 996        spin_lock_init(&vxlan->hash_lock);
 997
 998        init_timer_deferrable(&vxlan->age_timer);
 999        vxlan->age_timer.function = vxlan_cleanup;
1000        vxlan->age_timer.data = (unsigned long) vxlan;
1001
1002        inet_get_local_port_range(&low, &high);
1003        vxlan->port_min = low;
1004        vxlan->port_max = high;
1005
1006        vxlan->dev = dev;
1007
1008        for (h = 0; h < FDB_HASH_SIZE; ++h)
1009                INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
1010}
1011
1012static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
1013        [IFLA_VXLAN_ID]         = { .type = NLA_U32 },
1014        [IFLA_VXLAN_GROUP]      = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1015        [IFLA_VXLAN_LINK]       = { .type = NLA_U32 },
1016        [IFLA_VXLAN_LOCAL]      = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1017        [IFLA_VXLAN_TOS]        = { .type = NLA_U8 },
1018        [IFLA_VXLAN_TTL]        = { .type = NLA_U8 },
1019        [IFLA_VXLAN_LEARNING]   = { .type = NLA_U8 },
1020        [IFLA_VXLAN_AGEING]     = { .type = NLA_U32 },
1021        [IFLA_VXLAN_LIMIT]      = { .type = NLA_U32 },
1022        [IFLA_VXLAN_PORT_RANGE] = { .len  = sizeof(struct ifla_vxlan_port_range) },
1023};
1024
1025static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
1026{
1027        if (tb[IFLA_ADDRESS]) {
1028                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1029                        pr_debug("invalid link address (not ethernet)\n");
1030                        return -EINVAL;
1031                }
1032
1033                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1034                        pr_debug("invalid all zero ethernet address\n");
1035                        return -EADDRNOTAVAIL;
1036                }
1037        }
1038
1039        if (!data)
1040                return -EINVAL;
1041
1042        if (data[IFLA_VXLAN_ID]) {
1043                __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
1044                if (id >= VXLAN_VID_MASK)
1045                        return -ERANGE;
1046        }
1047
1048        if (data[IFLA_VXLAN_GROUP]) {
1049                __be32 gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
1050                if (!IN_MULTICAST(ntohl(gaddr))) {
1051                        pr_debug("group address is not IPv4 multicast\n");
1052                        return -EADDRNOTAVAIL;
1053                }
1054        }
1055
1056        if (data[IFLA_VXLAN_PORT_RANGE]) {
1057                const struct ifla_vxlan_port_range *p
1058                        = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
1059
1060                if (ntohs(p->high) < ntohs(p->low)) {
1061                        pr_debug("port range %u .. %u not valid\n",
1062                                 ntohs(p->low), ntohs(p->high));
1063                        return -EINVAL;
1064                }
1065        }
1066
1067        return 0;
1068}
1069
1070static int vxlan_newlink(struct net *net, struct net_device *dev,
1071                         struct nlattr *tb[], struct nlattr *data[])
1072{
1073        struct vxlan_dev *vxlan = netdev_priv(dev);
1074        __u32 vni;
1075        int err;
1076
1077        if (!data[IFLA_VXLAN_ID])
1078                return -EINVAL;
1079
1080        vni = nla_get_u32(data[IFLA_VXLAN_ID]);
1081        if (vxlan_find_vni(net, vni)) {
1082                pr_info("duplicate VNI %u\n", vni);
1083                return -EEXIST;
1084        }
1085        vxlan->vni = vni;
1086
1087        if (data[IFLA_VXLAN_GROUP])
1088                vxlan->gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
1089
1090        if (data[IFLA_VXLAN_LOCAL])
1091                vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
1092
1093        if (data[IFLA_VXLAN_LINK] &&
1094            (vxlan->link = nla_get_u32(data[IFLA_VXLAN_LINK]))) {
1095                struct net_device *lowerdev
1096                         = __dev_get_by_index(net, vxlan->link);
1097
1098                if (!lowerdev) {
1099                        pr_info("ifindex %d does not exist\n", vxlan->link);
1100                        return -ENODEV;
1101                }
1102
1103                if (!tb[IFLA_MTU])
1104                        dev->mtu = lowerdev->mtu - VXLAN_HEADROOM;
1105
1106                /* update header length based on lower device */
1107                dev->hard_header_len = lowerdev->hard_header_len +
1108                                       VXLAN_HEADROOM;
1109        }
1110
1111        if (data[IFLA_VXLAN_TOS])
1112                vxlan->tos  = nla_get_u8(data[IFLA_VXLAN_TOS]);
1113
1114        if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
1115                vxlan->learn = true;
1116
1117        if (data[IFLA_VXLAN_AGEING])
1118                vxlan->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
1119        else
1120                vxlan->age_interval = FDB_AGE_DEFAULT;
1121
1122        if (data[IFLA_VXLAN_LIMIT])
1123                vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
1124
1125        if (data[IFLA_VXLAN_PORT_RANGE]) {
1126                const struct ifla_vxlan_port_range *p
1127                        = nla_data(data[IFLA_VXLAN_PORT_RANGE]);
1128                vxlan->port_min = ntohs(p->low);
1129                vxlan->port_max = ntohs(p->high);
1130        }
1131
1132        err = register_netdevice(dev);
1133        if (!err)
1134                hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni));
1135
1136        return err;
1137}
1138
1139static void vxlan_dellink(struct net_device *dev, struct list_head *head)
1140{
1141        struct vxlan_dev *vxlan = netdev_priv(dev);
1142
1143        hlist_del_rcu(&vxlan->hlist);
1144
1145        unregister_netdevice_queue(dev, head);
1146}
1147
1148static size_t vxlan_get_size(const struct net_device *dev)
1149{
1150
1151        return nla_total_size(sizeof(__u32)) +  /* IFLA_VXLAN_ID */
1152                nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */
1153                nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */
1154                nla_total_size(sizeof(__be32))+ /* IFLA_VXLAN_LOCAL */
1155                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TTL */
1156                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_TOS */
1157                nla_total_size(sizeof(__u8)) +  /* IFLA_VXLAN_LEARNING */
1158                nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */
1159                nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */
1160                nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
1161                0;
1162}
1163
1164static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
1165{
1166        const struct vxlan_dev *vxlan = netdev_priv(dev);
1167        struct ifla_vxlan_port_range ports = {
1168                .low =  htons(vxlan->port_min),
1169                .high = htons(vxlan->port_max),
1170        };
1171
1172        if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni))
1173                goto nla_put_failure;
1174
1175        if (vxlan->gaddr && nla_put_be32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr))
1176                goto nla_put_failure;
1177
1178        if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link))
1179                goto nla_put_failure;
1180
1181        if (vxlan->saddr && nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr))
1182                goto nla_put_failure;
1183
1184        if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) ||
1185            nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) ||
1186            nla_put_u8(skb, IFLA_VXLAN_LEARNING, vxlan->learn) ||
1187            nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) ||
1188            nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax))
1189                goto nla_put_failure;
1190
1191        if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
1192                goto nla_put_failure;
1193
1194        return 0;
1195
1196nla_put_failure:
1197        return -EMSGSIZE;
1198}
1199
1200static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
1201        .kind           = "vxlan",
1202        .maxtype        = IFLA_VXLAN_MAX,
1203        .policy         = vxlan_policy,
1204        .priv_size      = sizeof(struct vxlan_dev),
1205        .setup          = vxlan_setup,
1206        .validate       = vxlan_validate,
1207        .newlink        = vxlan_newlink,
1208        .dellink        = vxlan_dellink,
1209        .get_size       = vxlan_get_size,
1210        .fill_info      = vxlan_fill_info,
1211};
1212
1213static __net_init int vxlan_init_net(struct net *net)
1214{
1215        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1216        struct sock *sk;
1217        struct sockaddr_in vxlan_addr = {
1218                .sin_family = AF_INET,
1219                .sin_addr.s_addr = htonl(INADDR_ANY),
1220        };
1221        int rc;
1222        unsigned h;
1223
1224        /* Create UDP socket for encapsulation receive. */
1225        rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
1226        if (rc < 0) {
1227                pr_debug("UDP socket create failed\n");
1228                return rc;
1229        }
1230        /* Put in proper namespace */
1231        sk = vn->sock->sk;
1232        sk_change_net(sk, net);
1233
1234        vxlan_addr.sin_port = htons(vxlan_port);
1235
1236        rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr,
1237                         sizeof(vxlan_addr));
1238        if (rc < 0) {
1239                pr_debug("bind for UDP socket %pI4:%u (%d)\n",
1240                         &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
1241                sk_release_kernel(sk);
1242                vn->sock = NULL;
1243                return rc;
1244        }
1245
1246        /* Disable multicast loopback */
1247        inet_sk(sk)->mc_loop = 0;
1248
1249        /* Mark socket as an encapsulation socket. */
1250        udp_sk(sk)->encap_type = 1;
1251        udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
1252        udp_encap_enable();
1253
1254        for (h = 0; h < VNI_HASH_SIZE; ++h)
1255                INIT_HLIST_HEAD(&vn->vni_list[h]);
1256
1257        return 0;
1258}
1259
1260static __net_exit void vxlan_exit_net(struct net *net)
1261{
1262        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
1263
1264        if (vn->sock) {
1265                sk_release_kernel(vn->sock->sk);
1266                vn->sock = NULL;
1267        }
1268}
1269
1270static struct pernet_operations vxlan_net_ops = {
1271        .init = vxlan_init_net,
1272        .exit = vxlan_exit_net,
1273        .id   = &vxlan_net_id,
1274        .size = sizeof(struct vxlan_net),
1275};
1276
1277static int __init vxlan_init_module(void)
1278{
1279        int rc;
1280
1281        get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
1282
1283        rc = register_pernet_device(&vxlan_net_ops);
1284        if (rc)
1285                goto out1;
1286
1287        rc = rtnl_link_register(&vxlan_link_ops);
1288        if (rc)
1289                goto out2;
1290
1291        return 0;
1292
1293out2:
1294        unregister_pernet_device(&vxlan_net_ops);
1295out1:
1296        return rc;
1297}
1298module_init(vxlan_init_module);
1299
1300static void __exit vxlan_cleanup_module(void)
1301{
1302        rtnl_link_unregister(&vxlan_link_ops);
1303        unregister_pernet_device(&vxlan_net_ops);
1304}
1305module_exit(vxlan_cleanup_module);
1306
1307MODULE_LICENSE("GPL");
1308MODULE_VERSION(VXLAN_VERSION);
1309MODULE_AUTHOR("Stephen Hemminger <shemminger@vyatta.com>");
1310MODULE_ALIAS_RTNL_LINK("vxlan");
1311
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.