linux/net/ipv4/ip_vti.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3: IP/IP protocol decoder modified to support
   3 *                  virtual tunnel interface
   4 *
   5 *      Authors:
   6 *              Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 */
  14
  15/*
  16   This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  17
  18   For comments look at net/ipv4/ip_gre.c --ANK
  19 */
  20
  21
  22#include <linux/capability.h>
  23#include <linux/module.h>
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/mroute.h>
  34#include <linux/init.h>
  35#include <linux/netfilter_ipv4.h>
  36#include <linux/if_ether.h>
  37
  38#include <net/sock.h>
  39#include <net/ip.h>
  40#include <net/icmp.h>
  41#include <net/ipip.h>
  42#include <net/inet_ecn.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/netns/generic.h>
  46
  47#define HASH_SIZE  16
  48#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
  49
  50static struct rtnl_link_ops vti_link_ops __read_mostly;
  51
  52static int vti_net_id __read_mostly;
  53struct vti_net {
  54        struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
  55        struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
  56        struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
  57        struct ip_tunnel __rcu *tunnels_wc[1];
  58        struct ip_tunnel __rcu **tunnels[4];
  59
  60        struct net_device *fb_tunnel_dev;
  61};
  62
  63static int vti_fb_tunnel_init(struct net_device *dev);
  64static int vti_tunnel_init(struct net_device *dev);
  65static void vti_tunnel_setup(struct net_device *dev);
  66static void vti_dev_free(struct net_device *dev);
  67static int vti_tunnel_bind_dev(struct net_device *dev);
  68
  69/* Locking : hash tables are protected by RCU and RTNL */
  70
  71#define for_each_ip_tunnel_rcu(start) \
  72        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
  73
  74/* often modified stats are per cpu, other are shared (netdev->stats) */
  75struct pcpu_tstats {
  76        u64     rx_packets;
  77        u64     rx_bytes;
  78        u64     tx_packets;
  79        u64     tx_bytes;
  80        struct  u64_stats_sync  syncp;
  81};
  82
  83#define VTI_XMIT(stats1, stats2) do {                           \
  84        int err;                                                \
  85        int pkt_len = skb->len;                                 \
  86        err = dst_output(skb);                                  \
  87        if (net_xmit_eval(err) == 0) {                          \
  88                u64_stats_update_begin(&(stats1)->syncp);       \
  89                (stats1)->tx_bytes += pkt_len;                  \
  90                (stats1)->tx_packets++;                         \
  91                u64_stats_update_end(&(stats1)->syncp);         \
  92        } else {                                                \
  93                (stats2)->tx_errors++;                          \
  94                (stats2)->tx_aborted_errors++;                  \
  95        }                                                       \
  96} while (0)
  97
  98
  99static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
 100                                                 struct rtnl_link_stats64 *tot)
 101{
 102        int i;
 103
 104        for_each_possible_cpu(i) {
 105                const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 106                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
 107                unsigned int start;
 108
 109                do {
 110                        start = u64_stats_fetch_begin_bh(&tstats->syncp);
 111                        rx_packets = tstats->rx_packets;
 112                        tx_packets = tstats->tx_packets;
 113                        rx_bytes = tstats->rx_bytes;
 114                        tx_bytes = tstats->tx_bytes;
 115                } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
 116
 117                tot->rx_packets += rx_packets;
 118                tot->tx_packets += tx_packets;
 119                tot->rx_bytes   += rx_bytes;
 120                tot->tx_bytes   += tx_bytes;
 121        }
 122
 123        tot->multicast = dev->stats.multicast;
 124        tot->rx_crc_errors = dev->stats.rx_crc_errors;
 125        tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
 126        tot->rx_length_errors = dev->stats.rx_length_errors;
 127        tot->rx_errors = dev->stats.rx_errors;
 128        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 129        tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 130        tot->tx_dropped = dev->stats.tx_dropped;
 131        tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
 132        tot->tx_errors = dev->stats.tx_errors;
 133
 134        return tot;
 135}
 136
 137static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
 138                                           __be32 remote, __be32 local)
 139{
 140        unsigned h0 = HASH(remote);
 141        unsigned h1 = HASH(local);
 142        struct ip_tunnel *t;
 143        struct vti_net *ipn = net_generic(net, vti_net_id);
 144
 145        for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 146                if (local == t->parms.iph.saddr &&
 147                    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 148                        return t;
 149        for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 150                if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 151                        return t;
 152
 153        for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 154                if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 155                        return t;
 156
 157        for_each_ip_tunnel_rcu(ipn->tunnels_wc[0])
 158                if (t && (t->dev->flags&IFF_UP))
 159                        return t;
 160        return NULL;
 161}
 162
 163static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
 164                                             struct ip_tunnel_parm *parms)
 165{
 166        __be32 remote = parms->iph.daddr;
 167        __be32 local = parms->iph.saddr;
 168        unsigned h = 0;
 169        int prio = 0;
 170
 171        if (remote) {
 172                prio |= 2;
 173                h ^= HASH(remote);
 174        }
 175        if (local) {
 176                prio |= 1;
 177                h ^= HASH(local);
 178        }
 179        return &ipn->tunnels[prio][h];
 180}
 181
 182static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
 183                                                  struct ip_tunnel *t)
 184{
 185        return __vti_bucket(ipn, &t->parms);
 186}
 187
 188static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
 189{
 190        struct ip_tunnel __rcu **tp;
 191        struct ip_tunnel *iter;
 192
 193        for (tp = vti_bucket(ipn, t);
 194             (iter = rtnl_dereference(*tp)) != NULL;
 195             tp = &iter->next) {
 196                if (t == iter) {
 197                        rcu_assign_pointer(*tp, t->next);
 198                        break;
 199                }
 200        }
 201}
 202
 203static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
 204{
 205        struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
 206
 207        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 208        rcu_assign_pointer(*tp, t);
 209}
 210
 211static struct ip_tunnel *vti_tunnel_locate(struct net *net,
 212                                           struct ip_tunnel_parm *parms,
 213                                           int create)
 214{
 215        __be32 remote = parms->iph.daddr;
 216        __be32 local = parms->iph.saddr;
 217        struct ip_tunnel *t, *nt;
 218        struct ip_tunnel __rcu **tp;
 219        struct net_device *dev;
 220        char name[IFNAMSIZ];
 221        struct vti_net *ipn = net_generic(net, vti_net_id);
 222
 223        for (tp = __vti_bucket(ipn, parms);
 224             (t = rtnl_dereference(*tp)) != NULL;
 225             tp = &t->next) {
 226                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 227                        return t;
 228        }
 229        if (!create)
 230                return NULL;
 231
 232        if (parms->name[0])
 233                strlcpy(name, parms->name, IFNAMSIZ);
 234        else
 235                strcpy(name, "vti%d");
 236
 237        dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
 238        if (dev == NULL)
 239                return NULL;
 240
 241        dev_net_set(dev, net);
 242
 243        nt = netdev_priv(dev);
 244        nt->parms = *parms;
 245        dev->rtnl_link_ops = &vti_link_ops;
 246
 247        vti_tunnel_bind_dev(dev);
 248
 249        if (register_netdevice(dev) < 0)
 250                goto failed_free;
 251
 252        dev_hold(dev);
 253        vti_tunnel_link(ipn, nt);
 254        return nt;
 255
 256failed_free:
 257        free_netdev(dev);
 258        return NULL;
 259}
 260
 261static void vti_tunnel_uninit(struct net_device *dev)
 262{
 263        struct net *net = dev_net(dev);
 264        struct vti_net *ipn = net_generic(net, vti_net_id);
 265
 266        vti_tunnel_unlink(ipn, netdev_priv(dev));
 267        dev_put(dev);
 268}
 269
 270static int vti_err(struct sk_buff *skb, u32 info)
 271{
 272
 273        /* All the routers (except for Linux) return only
 274         * 8 bytes of packet payload. It means, that precise relaying of
 275         * ICMP in the real Internet is absolutely infeasible.
 276         */
 277        struct iphdr *iph = (struct iphdr *)skb->data;
 278        const int type = icmp_hdr(skb)->type;
 279        const int code = icmp_hdr(skb)->code;
 280        struct ip_tunnel *t;
 281        int err;
 282
 283        switch (type) {
 284        default:
 285        case ICMP_PARAMETERPROB:
 286                return 0;
 287
 288        case ICMP_DEST_UNREACH:
 289                switch (code) {
 290                case ICMP_SR_FAILED:
 291                case ICMP_PORT_UNREACH:
 292                        /* Impossible event. */
 293                        return 0;
 294                default:
 295                        /* All others are translated to HOST_UNREACH. */
 296                        break;
 297                }
 298                break;
 299        case ICMP_TIME_EXCEEDED:
 300                if (code != ICMP_EXC_TTL)
 301                        return 0;
 302                break;
 303        }
 304
 305        err = -ENOENT;
 306
 307        rcu_read_lock();
 308        t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 309        if (t == NULL)
 310                goto out;
 311
 312        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 313                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 314                                 t->parms.link, 0, IPPROTO_IPIP, 0);
 315                err = 0;
 316                goto out;
 317        }
 318
 319        err = 0;
 320        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 321                goto out;
 322
 323        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 324                t->err_count++;
 325        else
 326                t->err_count = 1;
 327        t->err_time = jiffies;
 328out:
 329        rcu_read_unlock();
 330        return err;
 331}
 332
 333/* We dont digest the packet therefore let the packet pass */
 334static int vti_rcv(struct sk_buff *skb)
 335{
 336        struct ip_tunnel *tunnel;
 337        const struct iphdr *iph = ip_hdr(skb);
 338
 339        rcu_read_lock();
 340        tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 341        if (tunnel != NULL) {
 342                struct pcpu_tstats *tstats;
 343
 344                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 345                        return -1;
 346
 347                tstats = this_cpu_ptr(tunnel->dev->tstats);
 348                u64_stats_update_begin(&tstats->syncp);
 349                tstats->rx_packets++;
 350                tstats->rx_bytes += skb->len;
 351                u64_stats_update_end(&tstats->syncp);
 352
 353                skb->mark = 0;
 354                secpath_reset(skb);
 355                skb->dev = tunnel->dev;
 356                rcu_read_unlock();
 357                return 1;
 358        }
 359        rcu_read_unlock();
 360
 361        return -1;
 362}
 363
 364/* This function assumes it is being called from dev_queue_xmit()
 365 * and that skb is filled properly by that function.
 366 */
 367
 368static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 369{
 370        struct ip_tunnel *tunnel = netdev_priv(dev);
 371        struct pcpu_tstats *tstats;
 372        struct iphdr  *tiph = &tunnel->parms.iph;
 373        u8     tos;
 374        struct rtable *rt;              /* Route to the other host */
 375        struct net_device *tdev;        /* Device to other host */
 376        struct iphdr  *old_iph = ip_hdr(skb);
 377        __be32 dst = tiph->daddr;
 378        struct flowi4 fl4;
 379
 380        if (skb->protocol != htons(ETH_P_IP))
 381                goto tx_error;
 382
 383        tos = old_iph->tos;
 384
 385        memset(&fl4, 0, sizeof(fl4));
 386        flowi4_init_output(&fl4, tunnel->parms.link,
 387                           htonl(tunnel->parms.i_key), RT_TOS(tos),
 388                           RT_SCOPE_UNIVERSE,
 389                           IPPROTO_IPIP, 0,
 390                           dst, tiph->saddr, 0, 0);
 391        rt = ip_route_output_key(dev_net(dev), &fl4);
 392        if (IS_ERR(rt)) {
 393                dev->stats.tx_carrier_errors++;
 394                goto tx_error_icmp;
 395        }
 396        /* if there is no transform then this tunnel is not functional.
 397         * Or if the xfrm is not mode tunnel.
 398         */
 399        if (!rt->dst.xfrm ||
 400            rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
 401                dev->stats.tx_carrier_errors++;
 402                goto tx_error_icmp;
 403        }
 404        tdev = rt->dst.dev;
 405
 406        if (tdev == dev) {
 407                ip_rt_put(rt);
 408                dev->stats.collisions++;
 409                goto tx_error;
 410        }
 411
 412        if (tunnel->err_count > 0) {
 413                if (time_before(jiffies,
 414                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 415                        tunnel->err_count--;
 416                        dst_link_failure(skb);
 417                } else
 418                        tunnel->err_count = 0;
 419        }
 420
 421        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 422                              IPSKB_REROUTED);
 423        skb_dst_drop(skb);
 424        skb_dst_set(skb, &rt->dst);
 425        nf_reset(skb);
 426        skb->dev = skb_dst(skb)->dev;
 427
 428        tstats = this_cpu_ptr(dev->tstats);
 429        VTI_XMIT(tstats, &dev->stats);
 430        return NETDEV_TX_OK;
 431
 432tx_error_icmp:
 433        dst_link_failure(skb);
 434tx_error:
 435        dev->stats.tx_errors++;
 436        dev_kfree_skb(skb);
 437        return NETDEV_TX_OK;
 438}
 439
 440static int vti_tunnel_bind_dev(struct net_device *dev)
 441{
 442        struct net_device *tdev = NULL;
 443        struct ip_tunnel *tunnel;
 444        struct iphdr *iph;
 445
 446        tunnel = netdev_priv(dev);
 447        iph = &tunnel->parms.iph;
 448
 449        if (iph->daddr) {
 450                struct rtable *rt;
 451                struct flowi4 fl4;
 452                memset(&fl4, 0, sizeof(fl4));
 453                flowi4_init_output(&fl4, tunnel->parms.link,
 454                                   htonl(tunnel->parms.i_key),
 455                                   RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
 456                                   IPPROTO_IPIP, 0,
 457                                   iph->daddr, iph->saddr, 0, 0);
 458                rt = ip_route_output_key(dev_net(dev), &fl4);
 459                if (!IS_ERR(rt)) {
 460                        tdev = rt->dst.dev;
 461                        ip_rt_put(rt);
 462                }
 463                dev->flags |= IFF_POINTOPOINT;
 464        }
 465
 466        if (!tdev && tunnel->parms.link)
 467                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 468
 469        if (tdev) {
 470                dev->hard_header_len = tdev->hard_header_len +
 471                                       sizeof(struct iphdr);
 472                dev->mtu = tdev->mtu;
 473        }
 474        dev->iflink = tunnel->parms.link;
 475        return dev->mtu;
 476}
 477
 478static int
 479vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 480{
 481        int err = 0;
 482        struct ip_tunnel_parm p;
 483        struct ip_tunnel *t;
 484        struct net *net = dev_net(dev);
 485        struct vti_net *ipn = net_generic(net, vti_net_id);
 486
 487        switch (cmd) {
 488        case SIOCGETTUNNEL:
 489                t = NULL;
 490                if (dev == ipn->fb_tunnel_dev) {
 491                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
 492                                           sizeof(p))) {
 493                                err = -EFAULT;
 494                                break;
 495                        }
 496                        t = vti_tunnel_locate(net, &p, 0);
 497                }
 498                if (t == NULL)
 499                        t = netdev_priv(dev);
 500                memcpy(&p, &t->parms, sizeof(p));
 501                p.i_flags |= GRE_KEY | VTI_ISVTI;
 502                p.o_flags |= GRE_KEY;
 503                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 504                        err = -EFAULT;
 505                break;
 506
 507        case SIOCADDTUNNEL:
 508        case SIOCCHGTUNNEL:
 509                err = -EPERM;
 510                if (!capable(CAP_NET_ADMIN))
 511                        goto done;
 512
 513                err = -EFAULT;
 514                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 515                        goto done;
 516
 517                err = -EINVAL;
 518                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 519                    p.iph.ihl != 5)
 520                        goto done;
 521
 522                t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 523
 524                if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 525                        if (t != NULL) {
 526                                if (t->dev != dev) {
 527                                        err = -EEXIST;
 528                                        break;
 529                                }
 530                        } else {
 531                                if (((dev->flags&IFF_POINTOPOINT) &&
 532                                    !p.iph.daddr) ||
 533                                    (!(dev->flags&IFF_POINTOPOINT) &&
 534                                    p.iph.daddr)) {
 535                                        err = -EINVAL;
 536                                        break;
 537                                }
 538                                t = netdev_priv(dev);
 539                                vti_tunnel_unlink(ipn, t);
 540                                synchronize_net();
 541                                t->parms.iph.saddr = p.iph.saddr;
 542                                t->parms.iph.daddr = p.iph.daddr;
 543                                t->parms.i_key = p.i_key;
 544                                t->parms.o_key = p.o_key;
 545                                t->parms.iph.protocol = IPPROTO_IPIP;
 546                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
 547                                memcpy(dev->broadcast, &p.iph.daddr, 4);
 548                                vti_tunnel_link(ipn, t);
 549                                netdev_state_change(dev);
 550                        }
 551                }
 552
 553                if (t) {
 554                        err = 0;
 555                        if (cmd == SIOCCHGTUNNEL) {
 556                                t->parms.i_key = p.i_key;
 557                                t->parms.o_key = p.o_key;
 558                                if (t->parms.link != p.link) {
 559                                        t->parms.link = p.link;
 560                                        vti_tunnel_bind_dev(dev);
 561                                        netdev_state_change(dev);
 562                                }
 563                        }
 564                        p.i_flags |= GRE_KEY | VTI_ISVTI;
 565                        p.o_flags |= GRE_KEY;
 566                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
 567                                         sizeof(p)))
 568                                err = -EFAULT;
 569                } else
 570                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 571                break;
 572
 573        case SIOCDELTUNNEL:
 574                err = -EPERM;
 575                if (!capable(CAP_NET_ADMIN))
 576                        goto done;
 577
 578                if (dev == ipn->fb_tunnel_dev) {
 579                        err = -EFAULT;
 580                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
 581                                           sizeof(p)))
 582                                goto done;
 583                        err = -ENOENT;
 584
 585                        t = vti_tunnel_locate(net, &p, 0);
 586                        if (t == NULL)
 587                                goto done;
 588                        err = -EPERM;
 589                        if (t->dev == ipn->fb_tunnel_dev)
 590                                goto done;
 591                        dev = t->dev;
 592                }
 593                unregister_netdevice(dev);
 594                err = 0;
 595                break;
 596
 597        default:
 598                err = -EINVAL;
 599        }
 600
 601done:
 602        return err;
 603}
 604
 605static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 606{
 607        if (new_mtu < 68 || new_mtu > 0xFFF8)
 608                return -EINVAL;
 609        dev->mtu = new_mtu;
 610        return 0;
 611}
 612
 613static const struct net_device_ops vti_netdev_ops = {
 614        .ndo_init       = vti_tunnel_init,
 615        .ndo_uninit     = vti_tunnel_uninit,
 616        .ndo_start_xmit = vti_tunnel_xmit,
 617        .ndo_do_ioctl   = vti_tunnel_ioctl,
 618        .ndo_change_mtu = vti_tunnel_change_mtu,
 619        .ndo_get_stats64 = vti_get_stats64,
 620};
 621
 622static void vti_dev_free(struct net_device *dev)
 623{
 624        free_percpu(dev->tstats);
 625        free_netdev(dev);
 626}
 627
 628static void vti_tunnel_setup(struct net_device *dev)
 629{
 630        dev->netdev_ops         = &vti_netdev_ops;
 631        dev->destructor         = vti_dev_free;
 632
 633        dev->type               = ARPHRD_TUNNEL;
 634        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 635        dev->mtu                = ETH_DATA_LEN;
 636        dev->flags              = IFF_NOARP;
 637        dev->iflink             = 0;
 638        dev->addr_len           = 4;
 639        dev->features           |= NETIF_F_NETNS_LOCAL;
 640        dev->features           |= NETIF_F_LLTX;
 641        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 642}
 643
 644static int vti_tunnel_init(struct net_device *dev)
 645{
 646        struct ip_tunnel *tunnel = netdev_priv(dev);
 647
 648        tunnel->dev = dev;
 649        strcpy(tunnel->parms.name, dev->name);
 650
 651        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 652        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 653
 654        dev->tstats = alloc_percpu(struct pcpu_tstats);
 655        if (!dev->tstats)
 656                return -ENOMEM;
 657
 658        return 0;
 659}
 660
 661static int __net_init vti_fb_tunnel_init(struct net_device *dev)
 662{
 663        struct ip_tunnel *tunnel = netdev_priv(dev);
 664        struct iphdr *iph = &tunnel->parms.iph;
 665        struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
 666
 667        tunnel->dev = dev;
 668        strcpy(tunnel->parms.name, dev->name);
 669
 670        iph->version            = 4;
 671        iph->protocol           = IPPROTO_IPIP;
 672        iph->ihl                = 5;
 673
 674        dev->tstats = alloc_percpu(struct pcpu_tstats);
 675        if (!dev->tstats)
 676                return -ENOMEM;
 677
 678        dev_hold(dev);
 679        rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 680        return 0;
 681}
 682
 683static struct xfrm_tunnel vti_handler __read_mostly = {
 684        .handler        =       vti_rcv,
 685        .err_handler    =       vti_err,
 686        .priority       =       1,
 687};
 688
 689static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
 690{
 691        int prio;
 692
 693        for (prio = 1; prio < 4; prio++) {
 694                int h;
 695                for (h = 0; h < HASH_SIZE; h++) {
 696                        struct ip_tunnel *t;
 697
 698                        t = rtnl_dereference(ipn->tunnels[prio][h]);
 699                        while (t != NULL) {
 700                                unregister_netdevice_queue(t->dev, head);
 701                                t = rtnl_dereference(t->next);
 702                        }
 703                }
 704        }
 705}
 706
 707static int __net_init vti_init_net(struct net *net)
 708{
 709        int err;
 710        struct vti_net *ipn = net_generic(net, vti_net_id);
 711
 712        ipn->tunnels[0] = ipn->tunnels_wc;
 713        ipn->tunnels[1] = ipn->tunnels_l;
 714        ipn->tunnels[2] = ipn->tunnels_r;
 715        ipn->tunnels[3] = ipn->tunnels_r_l;
 716
 717        ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 718                                          "ip_vti0",
 719                                          vti_tunnel_setup);
 720        if (!ipn->fb_tunnel_dev) {
 721                err = -ENOMEM;
 722                goto err_alloc_dev;
 723        }
 724        dev_net_set(ipn->fb_tunnel_dev, net);
 725
 726        err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
 727        if (err)
 728                goto err_reg_dev;
 729        ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
 730
 731        err = register_netdev(ipn->fb_tunnel_dev);
 732        if (err)
 733                goto err_reg_dev;
 734        return 0;
 735
 736err_reg_dev:
 737        vti_dev_free(ipn->fb_tunnel_dev);
 738err_alloc_dev:
 739        /* nothing */
 740        return err;
 741}
 742
 743static void __net_exit vti_exit_net(struct net *net)
 744{
 745        struct vti_net *ipn = net_generic(net, vti_net_id);
 746        LIST_HEAD(list);
 747
 748        rtnl_lock();
 749        vti_destroy_tunnels(ipn, &list);
 750        unregister_netdevice_many(&list);
 751        rtnl_unlock();
 752}
 753
 754static struct pernet_operations vti_net_ops = {
 755        .init = vti_init_net,
 756        .exit = vti_exit_net,
 757        .id   = &vti_net_id,
 758        .size = sizeof(struct vti_net),
 759};
 760
 761static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 762{
 763        return 0;
 764}
 765
 766static void vti_netlink_parms(struct nlattr *data[],
 767                              struct ip_tunnel_parm *parms)
 768{
 769        memset(parms, 0, sizeof(*parms));
 770
 771        parms->iph.protocol = IPPROTO_IPIP;
 772
 773        if (!data)
 774                return;
 775
 776        if (data[IFLA_VTI_LINK])
 777                parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
 778
 779        if (data[IFLA_VTI_IKEY])
 780                parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
 781
 782        if (data[IFLA_VTI_OKEY])
 783                parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
 784
 785        if (data[IFLA_VTI_LOCAL])
 786                parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
 787
 788        if (data[IFLA_VTI_REMOTE])
 789                parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
 790
 791}
 792
 793static int vti_newlink(struct net *src_net, struct net_device *dev,
 794                       struct nlattr *tb[], struct nlattr *data[])
 795{
 796        struct ip_tunnel *nt;
 797        struct net *net = dev_net(dev);
 798        struct vti_net *ipn = net_generic(net, vti_net_id);
 799        int mtu;
 800        int err;
 801
 802        nt = netdev_priv(dev);
 803        vti_netlink_parms(data, &nt->parms);
 804
 805        if (vti_tunnel_locate(net, &nt->parms, 0))
 806                return -EEXIST;
 807
 808        mtu = vti_tunnel_bind_dev(dev);
 809        if (!tb[IFLA_MTU])
 810                dev->mtu = mtu;
 811
 812        err = register_netdevice(dev);
 813        if (err)
 814                goto out;
 815
 816        dev_hold(dev);
 817        vti_tunnel_link(ipn, nt);
 818
 819out:
 820        return err;
 821}
 822
 823static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
 824                          struct nlattr *data[])
 825{
 826        struct ip_tunnel *t, *nt;
 827        struct net *net = dev_net(dev);
 828        struct vti_net *ipn = net_generic(net, vti_net_id);
 829        struct ip_tunnel_parm p;
 830        int mtu;
 831
 832        if (dev == ipn->fb_tunnel_dev)
 833                return -EINVAL;
 834
 835        nt = netdev_priv(dev);
 836        vti_netlink_parms(data, &p);
 837
 838        t = vti_tunnel_locate(net, &p, 0);
 839
 840        if (t) {
 841                if (t->dev != dev)
 842                        return -EEXIST;
 843        } else {
 844                t = nt;
 845
 846                vti_tunnel_unlink(ipn, t);
 847                t->parms.iph.saddr = p.iph.saddr;
 848                t->parms.iph.daddr = p.iph.daddr;
 849                t->parms.i_key = p.i_key;
 850                t->parms.o_key = p.o_key;
 851                if (dev->type != ARPHRD_ETHER) {
 852                        memcpy(dev->dev_addr, &p.iph.saddr, 4);
 853                        memcpy(dev->broadcast, &p.iph.daddr, 4);
 854                }
 855                vti_tunnel_link(ipn, t);
 856                netdev_state_change(dev);
 857        }
 858
 859        if (t->parms.link != p.link) {
 860                t->parms.link = p.link;
 861                mtu = vti_tunnel_bind_dev(dev);
 862                if (!tb[IFLA_MTU])
 863                        dev->mtu = mtu;
 864                netdev_state_change(dev);
 865        }
 866
 867        return 0;
 868}
 869
 870static size_t vti_get_size(const struct net_device *dev)
 871{
 872        return
 873                /* IFLA_VTI_LINK */
 874                nla_total_size(4) +
 875                /* IFLA_VTI_IKEY */
 876                nla_total_size(4) +
 877                /* IFLA_VTI_OKEY */
 878                nla_total_size(4) +
 879                /* IFLA_VTI_LOCAL */
 880                nla_total_size(4) +
 881                /* IFLA_VTI_REMOTE */
 882                nla_total_size(4) +
 883                0;
 884}
 885
 886static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
 887{
 888        struct ip_tunnel *t = netdev_priv(dev);
 889        struct ip_tunnel_parm *p = &t->parms;
 890
 891        nla_put_u32(skb, IFLA_VTI_LINK, p->link);
 892        nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
 893        nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
 894        nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
 895        nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
 896
 897        return 0;
 898}
 899
 900static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
 901        [IFLA_VTI_LINK]         = { .type = NLA_U32 },
 902        [IFLA_VTI_IKEY]         = { .type = NLA_U32 },
 903        [IFLA_VTI_OKEY]         = { .type = NLA_U32 },
 904        [IFLA_VTI_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 905        [IFLA_VTI_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 906};
 907
 908static struct rtnl_link_ops vti_link_ops __read_mostly = {
 909        .kind           = "vti",
 910        .maxtype        = IFLA_VTI_MAX,
 911        .policy         = vti_policy,
 912        .priv_size      = sizeof(struct ip_tunnel),
 913        .setup          = vti_tunnel_setup,
 914        .validate       = vti_tunnel_validate,
 915        .newlink        = vti_newlink,
 916        .changelink     = vti_changelink,
 917        .get_size       = vti_get_size,
 918        .fill_info      = vti_fill_info,
 919};
 920
 921static int __init vti_init(void)
 922{
 923        int err;
 924
 925        pr_info("IPv4 over IPSec tunneling driver\n");
 926
 927        err = register_pernet_device(&vti_net_ops);
 928        if (err < 0)
 929                return err;
 930        err = xfrm4_mode_tunnel_input_register(&vti_handler);
 931        if (err < 0) {
 932                unregister_pernet_device(&vti_net_ops);
 933                pr_info(KERN_INFO "vti init: can't register tunnel\n");
 934        }
 935
 936        err = rtnl_link_register(&vti_link_ops);
 937        if (err < 0)
 938                goto rtnl_link_failed;
 939
 940        return err;
 941
 942rtnl_link_failed:
 943        xfrm4_mode_tunnel_input_deregister(&vti_handler);
 944        unregister_pernet_device(&vti_net_ops);
 945        return err;
 946}
 947
 948static void __exit vti_fini(void)
 949{
 950        rtnl_link_unregister(&vti_link_ops);
 951        if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
 952                pr_info("vti close: can't deregister tunnel\n");
 953
 954        unregister_pernet_device(&vti_net_ops);
 955}
 956
 957module_init(vti_init);
 958module_exit(vti_fini);
 959MODULE_LICENSE("GPL");
 960MODULE_ALIAS_RTNL_LINK("vti");
 961MODULE_ALIAS_NETDEV("ip_vti0");
 962
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.