linux/net/ipv4/ipip.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     IP/IP protocol decoder.
   3 *
   4 *      Authors:
   5 *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6 *
   7 *      Fixes:
   8 *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9 *                                      a module taking up 2 pages).
  10 *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11 *                                      to keep ip_forward happy.
  12 *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13 *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14 *              David Woodhouse :       Perform some basic ICMP handling.
  15 *                                      IPIP Routing without decapsulation.
  16 *              Carlos Picoto   :       GRE over IP support
  17 *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18 *                                      I do not want to merge them together.
  19 *
  20 *      This program is free software; you can redistribute it and/or
  21 *      modify it under the terms of the GNU General Public License
  22 *      as published by the Free Software Foundation; either version
  23 *      2 of the License, or (at your option) any later version.
  24 *
  25 */
  26
  27/* tunnel.c: an IP tunnel driver
  28
  29        The purpose of this driver is to provide an IP tunnel through
  30        which you can tunnel network traffic transparently across subnets.
  31
  32        This was written by looking at Nick Holloway's dummy driver
  33        Thanks for the great code!
  34
  35                -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37        Minor tweaks:
  38                Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                dev->hard_header/hard_header_len changed to use no headers.
  40                Comments/bracketing tweaked.
  41                Made the tunnels use dev->name not tunnel: when error reporting.
  42                Added tx_dropped stat
  43
  44                -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46        Reworked:
  47                Changed to tunnel to destination gateway in addition to the
  48                        tunnel's pointopoint address
  49                Almost completely rewritten
  50                Note:  There is currently no firewall or ICMP handling done.
  51
  52                -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54*/
  55
  56/* Things I wish I had known when writing the tunnel driver:
  57
  58        When the tunnel_xmit() function is called, the skb contains the
  59        packet to be sent (plus a great deal of extra info), and dev
  60        contains the tunnel device that _we_ are.
  61
  62        When we are passed a packet, we are expected to fill in the
  63        source address with our source IP address.
  64
  65        What is the proper way to allocate, copy and free a buffer?
  66        After you allocate it, it is a "0 length" chunk of memory
  67        starting at zero.  If you want to add headers to the buffer
  68        later, you'll have to call "skb_reserve(skb, amount)" with
  69        the amount of memory you want reserved.  Then, you call
  70        "skb_put(skb, amount)" with the amount of space you want in
  71        the buffer.  skb_put() returns a pointer to the top (#0) of
  72        that buffer.  skb->len is set to the amount of space you have
  73        "allocated" with skb_put().  You can then write up to skb->len
  74        bytes to that buffer.  If you need more, you can call skb_put()
  75        again with the additional amount of space you need.  You can
  76        find out how much more space you can allocate by calling
  77        "skb_tailroom(skb)".
  78        Now, to add header space, call "skb_push(skb, header_len)".
  79        This creates space at the beginning of the buffer and returns
  80        a pointer to this new space.  If later you need to strip a
  81        header from a buffer, call "skb_pull(skb, header_len)".
  82        skb_headroom() will return how much space is left at the top
  83        of the buffer (before the main data).  Remember, this headroom
  84        space must be reserved before the skb_put() function is called.
  85        */
  86
  87/*
  88   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90   For comments look at net/ipv4/ip_gre.c --ANK
  91 */
  92
  93
  94#include <linux/capability.h>
  95#include <linux/module.h>
  96#include <linux/types.h>
  97#include <linux/kernel.h>
  98#include <linux/slab.h>
  99#include <asm/uaccess.h>
 100#include <linux/skbuff.h>
 101#include <linux/netdevice.h>
 102#include <linux/in.h>
 103#include <linux/tcp.h>
 104#include <linux/udp.h>
 105#include <linux/if_arp.h>
 106#include <linux/mroute.h>
 107#include <linux/init.h>
 108#include <linux/netfilter_ipv4.h>
 109#include <linux/if_ether.h>
 110
 111#include <net/sock.h>
 112#include <net/ip.h>
 113#include <net/icmp.h>
 114#include <net/ipip.h>
 115#include <net/inet_ecn.h>
 116#include <net/xfrm.h>
 117#include <net/net_namespace.h>
 118#include <net/netns/generic.h>
 119
 120#define HASH_SIZE  16
 121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123static int ipip_net_id __read_mostly;
 124struct ipip_net {
 125        struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
 126        struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
 127        struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
 128        struct ip_tunnel __rcu *tunnels_wc[1];
 129        struct ip_tunnel __rcu **tunnels[4];
 130
 131        struct net_device *fb_tunnel_dev;
 132};
 133
 134static int ipip_tunnel_init(struct net_device *dev);
 135static void ipip_tunnel_setup(struct net_device *dev);
 136static void ipip_dev_free(struct net_device *dev);
 137
 138/*
 139 * Locking : hash tables are protected by RCU and RTNL
 140 */
 141
 142#define for_each_ip_tunnel_rcu(start) \
 143        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 144
 145/* often modified stats are per cpu, other are shared (netdev->stats) */
 146struct pcpu_tstats {
 147        u64     rx_packets;
 148        u64     rx_bytes;
 149        u64     tx_packets;
 150        u64     tx_bytes;
 151        struct u64_stats_sync   syncp;
 152};
 153
 154static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
 155                                                  struct rtnl_link_stats64 *tot)
 156{
 157        int i;
 158
 159        for_each_possible_cpu(i) {
 160                const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 161                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
 162                unsigned int start;
 163
 164                do {
 165                        start = u64_stats_fetch_begin_bh(&tstats->syncp);
 166                        rx_packets = tstats->rx_packets;
 167                        tx_packets = tstats->tx_packets;
 168                        rx_bytes = tstats->rx_bytes;
 169                        tx_bytes = tstats->tx_bytes;
 170                } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
 171
 172                tot->rx_packets += rx_packets;
 173                tot->tx_packets += tx_packets;
 174                tot->rx_bytes   += rx_bytes;
 175                tot->tx_bytes   += tx_bytes;
 176        }
 177
 178        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 179        tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 180        tot->tx_dropped = dev->stats.tx_dropped;
 181        tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
 182        tot->tx_errors = dev->stats.tx_errors;
 183        tot->collisions = dev->stats.collisions;
 184
 185        return tot;
 186}
 187
 188static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
 189                __be32 remote, __be32 local)
 190{
 191        unsigned int h0 = HASH(remote);
 192        unsigned int h1 = HASH(local);
 193        struct ip_tunnel *t;
 194        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 195
 196        for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 197                if (local == t->parms.iph.saddr &&
 198                    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 199                        return t;
 200
 201        for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 202                if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 203                        return t;
 204
 205        for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 206                if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 207                        return t;
 208
 209        t = rcu_dereference(ipn->tunnels_wc[0]);
 210        if (t && (t->dev->flags&IFF_UP))
 211                return t;
 212        return NULL;
 213}
 214
 215static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
 216                struct ip_tunnel_parm *parms)
 217{
 218        __be32 remote = parms->iph.daddr;
 219        __be32 local = parms->iph.saddr;
 220        unsigned int h = 0;
 221        int prio = 0;
 222
 223        if (remote) {
 224                prio |= 2;
 225                h ^= HASH(remote);
 226        }
 227        if (local) {
 228                prio |= 1;
 229                h ^= HASH(local);
 230        }
 231        return &ipn->tunnels[prio][h];
 232}
 233
 234static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
 235                struct ip_tunnel *t)
 236{
 237        return __ipip_bucket(ipn, &t->parms);
 238}
 239
 240static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 241{
 242        struct ip_tunnel __rcu **tp;
 243        struct ip_tunnel *iter;
 244
 245        for (tp = ipip_bucket(ipn, t);
 246             (iter = rtnl_dereference(*tp)) != NULL;
 247             tp = &iter->next) {
 248                if (t == iter) {
 249                        rcu_assign_pointer(*tp, t->next);
 250                        break;
 251                }
 252        }
 253}
 254
 255static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 256{
 257        struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 258
 259        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 260        rcu_assign_pointer(*tp, t);
 261}
 262
 263static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
 264                struct ip_tunnel_parm *parms, int create)
 265{
 266        __be32 remote = parms->iph.daddr;
 267        __be32 local = parms->iph.saddr;
 268        struct ip_tunnel *t, *nt;
 269        struct ip_tunnel __rcu **tp;
 270        struct net_device *dev;
 271        char name[IFNAMSIZ];
 272        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 273
 274        for (tp = __ipip_bucket(ipn, parms);
 275                 (t = rtnl_dereference(*tp)) != NULL;
 276                 tp = &t->next) {
 277                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 278                        return t;
 279        }
 280        if (!create)
 281                return NULL;
 282
 283        if (parms->name[0])
 284                strlcpy(name, parms->name, IFNAMSIZ);
 285        else
 286                strcpy(name, "tunl%d");
 287
 288        dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 289        if (dev == NULL)
 290                return NULL;
 291
 292        dev_net_set(dev, net);
 293
 294        nt = netdev_priv(dev);
 295        nt->parms = *parms;
 296
 297        if (ipip_tunnel_init(dev) < 0)
 298                goto failed_free;
 299
 300        if (register_netdevice(dev) < 0)
 301                goto failed_free;
 302
 303        strcpy(nt->parms.name, dev->name);
 304
 305        dev_hold(dev);
 306        ipip_tunnel_link(ipn, nt);
 307        return nt;
 308
 309failed_free:
 310        ipip_dev_free(dev);
 311        return NULL;
 312}
 313
 314/* called with RTNL */
 315static void ipip_tunnel_uninit(struct net_device *dev)
 316{
 317        struct net *net = dev_net(dev);
 318        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 319
 320        if (dev == ipn->fb_tunnel_dev)
 321                RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
 322        else
 323                ipip_tunnel_unlink(ipn, netdev_priv(dev));
 324        dev_put(dev);
 325}
 326
 327static int ipip_err(struct sk_buff *skb, u32 info)
 328{
 329
 330/* All the routers (except for Linux) return only
 331   8 bytes of packet payload. It means, that precise relaying of
 332   ICMP in the real Internet is absolutely infeasible.
 333 */
 334        const struct iphdr *iph = (const struct iphdr *)skb->data;
 335        const int type = icmp_hdr(skb)->type;
 336        const int code = icmp_hdr(skb)->code;
 337        struct ip_tunnel *t;
 338        int err;
 339
 340        switch (type) {
 341        default:
 342        case ICMP_PARAMETERPROB:
 343                return 0;
 344
 345        case ICMP_DEST_UNREACH:
 346                switch (code) {
 347                case ICMP_SR_FAILED:
 348                case ICMP_PORT_UNREACH:
 349                        /* Impossible event. */
 350                        return 0;
 351                default:
 352                        /* All others are translated to HOST_UNREACH.
 353                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 354                           I believe they are just ether pollution. --ANK
 355                         */
 356                        break;
 357                }
 358                break;
 359        case ICMP_TIME_EXCEEDED:
 360                if (code != ICMP_EXC_TTL)
 361                        return 0;
 362                break;
 363        case ICMP_REDIRECT:
 364                break;
 365        }
 366
 367        err = -ENOENT;
 368
 369        rcu_read_lock();
 370        t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 371        if (t == NULL)
 372                goto out;
 373
 374        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 375                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 376                                 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
 377                err = 0;
 378                goto out;
 379        }
 380
 381        if (type == ICMP_REDIRECT) {
 382                ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
 383                              IPPROTO_IPIP, 0);
 384                err = 0;
 385                goto out;
 386        }
 387
 388        if (t->parms.iph.daddr == 0)
 389                goto out;
 390
 391        err = 0;
 392        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 393                goto out;
 394
 395        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 396                t->err_count++;
 397        else
 398                t->err_count = 1;
 399        t->err_time = jiffies;
 400out:
 401        rcu_read_unlock();
 402        return err;
 403}
 404
 405static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 406                                        struct sk_buff *skb)
 407{
 408        struct iphdr *inner_iph = ip_hdr(skb);
 409
 410        if (INET_ECN_is_ce(outer_iph->tos))
 411                IP_ECN_set_ce(inner_iph);
 412}
 413
 414static int ipip_rcv(struct sk_buff *skb)
 415{
 416        struct ip_tunnel *tunnel;
 417        const struct iphdr *iph = ip_hdr(skb);
 418
 419        rcu_read_lock();
 420        tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 421        if (tunnel != NULL) {
 422                struct pcpu_tstats *tstats;
 423
 424                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 425                        rcu_read_unlock();
 426                        kfree_skb(skb);
 427                        return 0;
 428                }
 429
 430                secpath_reset(skb);
 431
 432                skb->mac_header = skb->network_header;
 433                skb_reset_network_header(skb);
 434                skb->protocol = htons(ETH_P_IP);
 435                skb->pkt_type = PACKET_HOST;
 436
 437                tstats = this_cpu_ptr(tunnel->dev->tstats);
 438                u64_stats_update_begin(&tstats->syncp);
 439                tstats->rx_packets++;
 440                tstats->rx_bytes += skb->len;
 441                u64_stats_update_end(&tstats->syncp);
 442
 443                __skb_tunnel_rx(skb, tunnel->dev);
 444
 445                ipip_ecn_decapsulate(iph, skb);
 446
 447                netif_rx(skb);
 448
 449                rcu_read_unlock();
 450                return 0;
 451        }
 452        rcu_read_unlock();
 453
 454        return -1;
 455}
 456
 457/*
 458 *      This function assumes it is being called from dev_queue_xmit()
 459 *      and that skb is filled properly by that function.
 460 */
 461
 462static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 463{
 464        struct ip_tunnel *tunnel = netdev_priv(dev);
 465        struct pcpu_tstats *tstats;
 466        const struct iphdr  *tiph = &tunnel->parms.iph;
 467        u8     tos = tunnel->parms.iph.tos;
 468        __be16 df = tiph->frag_off;
 469        struct rtable *rt;                      /* Route to the other host */
 470        struct net_device *tdev;                /* Device to other host */
 471        const struct iphdr  *old_iph = ip_hdr(skb);
 472        struct iphdr  *iph;                     /* Our new IP header */
 473        unsigned int max_headroom;              /* The extra header space needed */
 474        __be32 dst = tiph->daddr;
 475        struct flowi4 fl4;
 476        int    mtu;
 477
 478        if (skb->protocol != htons(ETH_P_IP))
 479                goto tx_error;
 480
 481        if (tos & 1)
 482                tos = old_iph->tos;
 483
 484        if (!dst) {
 485                /* NBMA tunnel */
 486                if ((rt = skb_rtable(skb)) == NULL) {
 487                        dev->stats.tx_fifo_errors++;
 488                        goto tx_error;
 489                }
 490                dst = rt_nexthop(rt, old_iph->daddr);
 491        }
 492
 493        rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 494                                   dst, tiph->saddr,
 495                                   0, 0,
 496                                   IPPROTO_IPIP, RT_TOS(tos),
 497                                   tunnel->parms.link);
 498        if (IS_ERR(rt)) {
 499                dev->stats.tx_carrier_errors++;
 500                goto tx_error_icmp;
 501        }
 502        tdev = rt->dst.dev;
 503
 504        if (tdev == dev) {
 505                ip_rt_put(rt);
 506                dev->stats.collisions++;
 507                goto tx_error;
 508        }
 509
 510        df |= old_iph->frag_off & htons(IP_DF);
 511
 512        if (df) {
 513                mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 514
 515                if (mtu < 68) {
 516                        dev->stats.collisions++;
 517                        ip_rt_put(rt);
 518                        goto tx_error;
 519                }
 520
 521                if (skb_dst(skb))
 522                        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 523
 524                if ((old_iph->frag_off & htons(IP_DF)) &&
 525                    mtu < ntohs(old_iph->tot_len)) {
 526                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 527                                  htonl(mtu));
 528                        ip_rt_put(rt);
 529                        goto tx_error;
 530                }
 531        }
 532
 533        if (tunnel->err_count > 0) {
 534                if (time_before(jiffies,
 535                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 536                        tunnel->err_count--;
 537                        dst_link_failure(skb);
 538                } else
 539                        tunnel->err_count = 0;
 540        }
 541
 542        /*
 543         * Okay, now see if we can stuff it in the buffer as-is.
 544         */
 545        max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 546
 547        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 548            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 549                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 550                if (!new_skb) {
 551                        ip_rt_put(rt);
 552                        dev->stats.tx_dropped++;
 553                        dev_kfree_skb(skb);
 554                        return NETDEV_TX_OK;
 555                }
 556                if (skb->sk)
 557                        skb_set_owner_w(new_skb, skb->sk);
 558                dev_kfree_skb(skb);
 559                skb = new_skb;
 560                old_iph = ip_hdr(skb);
 561        }
 562
 563        skb->transport_header = skb->network_header;
 564        skb_push(skb, sizeof(struct iphdr));
 565        skb_reset_network_header(skb);
 566        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 567        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 568                              IPSKB_REROUTED);
 569        skb_dst_drop(skb);
 570        skb_dst_set(skb, &rt->dst);
 571
 572        /*
 573         *      Push down and install the IPIP header.
 574         */
 575
 576        iph                     =       ip_hdr(skb);
 577        iph->version            =       4;
 578        iph->ihl                =       sizeof(struct iphdr)>>2;
 579        iph->frag_off           =       df;
 580        iph->protocol           =       IPPROTO_IPIP;
 581        iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 582        iph->daddr              =       fl4.daddr;
 583        iph->saddr              =       fl4.saddr;
 584
 585        if ((iph->ttl = tiph->ttl) == 0)
 586                iph->ttl        =       old_iph->ttl;
 587
 588        nf_reset(skb);
 589        tstats = this_cpu_ptr(dev->tstats);
 590        __IPTUNNEL_XMIT(tstats, &dev->stats);
 591        return NETDEV_TX_OK;
 592
 593tx_error_icmp:
 594        dst_link_failure(skb);
 595tx_error:
 596        dev->stats.tx_errors++;
 597        dev_kfree_skb(skb);
 598        return NETDEV_TX_OK;
 599}
 600
 601static void ipip_tunnel_bind_dev(struct net_device *dev)
 602{
 603        struct net_device *tdev = NULL;
 604        struct ip_tunnel *tunnel;
 605        const struct iphdr *iph;
 606
 607        tunnel = netdev_priv(dev);
 608        iph = &tunnel->parms.iph;
 609
 610        if (iph->daddr) {
 611                struct rtable *rt;
 612                struct flowi4 fl4;
 613
 614                rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 615                                           iph->daddr, iph->saddr,
 616                                           0, 0,
 617                                           IPPROTO_IPIP,
 618                                           RT_TOS(iph->tos),
 619                                           tunnel->parms.link);
 620                if (!IS_ERR(rt)) {
 621                        tdev = rt->dst.dev;
 622                        ip_rt_put(rt);
 623                }
 624                dev->flags |= IFF_POINTOPOINT;
 625        }
 626
 627        if (!tdev && tunnel->parms.link)
 628                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 629
 630        if (tdev) {
 631                dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 632                dev->mtu = tdev->mtu - sizeof(struct iphdr);
 633        }
 634        dev->iflink = tunnel->parms.link;
 635}
 636
 637static int
 638ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 639{
 640        int err = 0;
 641        struct ip_tunnel_parm p;
 642        struct ip_tunnel *t;
 643        struct net *net = dev_net(dev);
 644        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 645
 646        switch (cmd) {
 647        case SIOCGETTUNNEL:
 648                t = NULL;
 649                if (dev == ipn->fb_tunnel_dev) {
 650                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 651                                err = -EFAULT;
 652                                break;
 653                        }
 654                        t = ipip_tunnel_locate(net, &p, 0);
 655                }
 656                if (t == NULL)
 657                        t = netdev_priv(dev);
 658                memcpy(&p, &t->parms, sizeof(p));
 659                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 660                        err = -EFAULT;
 661                break;
 662
 663        case SIOCADDTUNNEL:
 664        case SIOCCHGTUNNEL:
 665                err = -EPERM;
 666                if (!capable(CAP_NET_ADMIN))
 667                        goto done;
 668
 669                err = -EFAULT;
 670                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 671                        goto done;
 672
 673                err = -EINVAL;
 674                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 675                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 676                        goto done;
 677                if (p.iph.ttl)
 678                        p.iph.frag_off |= htons(IP_DF);
 679
 680                t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 681
 682                if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 683                        if (t != NULL) {
 684                                if (t->dev != dev) {
 685                                        err = -EEXIST;
 686                                        break;
 687                                }
 688                        } else {
 689                                if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 690                                    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 691                                        err = -EINVAL;
 692                                        break;
 693                                }
 694                                t = netdev_priv(dev);
 695                                ipip_tunnel_unlink(ipn, t);
 696                                synchronize_net();
 697                                t->parms.iph.saddr = p.iph.saddr;
 698                                t->parms.iph.daddr = p.iph.daddr;
 699                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
 700                                memcpy(dev->broadcast, &p.iph.daddr, 4);
 701                                ipip_tunnel_link(ipn, t);
 702                                netdev_state_change(dev);
 703                        }
 704                }
 705
 706                if (t) {
 707                        err = 0;
 708                        if (cmd == SIOCCHGTUNNEL) {
 709                                t->parms.iph.ttl = p.iph.ttl;
 710                                t->parms.iph.tos = p.iph.tos;
 711                                t->parms.iph.frag_off = p.iph.frag_off;
 712                                if (t->parms.link != p.link) {
 713                                        t->parms.link = p.link;
 714                                        ipip_tunnel_bind_dev(dev);
 715                                        netdev_state_change(dev);
 716                                }
 717                        }
 718                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 719                                err = -EFAULT;
 720                } else
 721                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 722                break;
 723
 724        case SIOCDELTUNNEL:
 725                err = -EPERM;
 726                if (!capable(CAP_NET_ADMIN))
 727                        goto done;
 728
 729                if (dev == ipn->fb_tunnel_dev) {
 730                        err = -EFAULT;
 731                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 732                                goto done;
 733                        err = -ENOENT;
 734                        if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 735                                goto done;
 736                        err = -EPERM;
 737                        if (t->dev == ipn->fb_tunnel_dev)
 738                                goto done;
 739                        dev = t->dev;
 740                }
 741                unregister_netdevice(dev);
 742                err = 0;
 743                break;
 744
 745        default:
 746                err = -EINVAL;
 747        }
 748
 749done:
 750        return err;
 751}
 752
 753static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 754{
 755        if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 756                return -EINVAL;
 757        dev->mtu = new_mtu;
 758        return 0;
 759}
 760
 761static const struct net_device_ops ipip_netdev_ops = {
 762        .ndo_uninit     = ipip_tunnel_uninit,
 763        .ndo_start_xmit = ipip_tunnel_xmit,
 764        .ndo_do_ioctl   = ipip_tunnel_ioctl,
 765        .ndo_change_mtu = ipip_tunnel_change_mtu,
 766        .ndo_get_stats64 = ipip_get_stats64,
 767};
 768
 769static void ipip_dev_free(struct net_device *dev)
 770{
 771        free_percpu(dev->tstats);
 772        free_netdev(dev);
 773}
 774
 775static void ipip_tunnel_setup(struct net_device *dev)
 776{
 777        dev->netdev_ops         = &ipip_netdev_ops;
 778        dev->destructor         = ipip_dev_free;
 779
 780        dev->type               = ARPHRD_TUNNEL;
 781        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 782        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 783        dev->flags              = IFF_NOARP;
 784        dev->iflink             = 0;
 785        dev->addr_len           = 4;
 786        dev->features           |= NETIF_F_NETNS_LOCAL;
 787        dev->features           |= NETIF_F_LLTX;
 788        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 789}
 790
 791static int ipip_tunnel_init(struct net_device *dev)
 792{
 793        struct ip_tunnel *tunnel = netdev_priv(dev);
 794
 795        tunnel->dev = dev;
 796
 797        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 798        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 799
 800        ipip_tunnel_bind_dev(dev);
 801
 802        dev->tstats = alloc_percpu(struct pcpu_tstats);
 803        if (!dev->tstats)
 804                return -ENOMEM;
 805
 806        return 0;
 807}
 808
 809static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
 810{
 811        struct ip_tunnel *tunnel = netdev_priv(dev);
 812        struct iphdr *iph = &tunnel->parms.iph;
 813        struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 814
 815        tunnel->dev = dev;
 816        strcpy(tunnel->parms.name, dev->name);
 817
 818        iph->version            = 4;
 819        iph->protocol           = IPPROTO_IPIP;
 820        iph->ihl                = 5;
 821
 822        dev->tstats = alloc_percpu(struct pcpu_tstats);
 823        if (!dev->tstats)
 824                return -ENOMEM;
 825
 826        dev_hold(dev);
 827        rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 828        return 0;
 829}
 830
 831static struct xfrm_tunnel ipip_handler __read_mostly = {
 832        .handler        =       ipip_rcv,
 833        .err_handler    =       ipip_err,
 834        .priority       =       1,
 835};
 836
 837static const char banner[] __initconst =
 838        KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 839
 840static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 841{
 842        int prio;
 843
 844        for (prio = 1; prio < 4; prio++) {
 845                int h;
 846                for (h = 0; h < HASH_SIZE; h++) {
 847                        struct ip_tunnel *t;
 848
 849                        t = rtnl_dereference(ipn->tunnels[prio][h]);
 850                        while (t != NULL) {
 851                                unregister_netdevice_queue(t->dev, head);
 852                                t = rtnl_dereference(t->next);
 853                        }
 854                }
 855        }
 856}
 857
 858static int __net_init ipip_init_net(struct net *net)
 859{
 860        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 861        struct ip_tunnel *t;
 862        int err;
 863
 864        ipn->tunnels[0] = ipn->tunnels_wc;
 865        ipn->tunnels[1] = ipn->tunnels_l;
 866        ipn->tunnels[2] = ipn->tunnels_r;
 867        ipn->tunnels[3] = ipn->tunnels_r_l;
 868
 869        ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 870                                           "tunl0",
 871                                           ipip_tunnel_setup);
 872        if (!ipn->fb_tunnel_dev) {
 873                err = -ENOMEM;
 874                goto err_alloc_dev;
 875        }
 876        dev_net_set(ipn->fb_tunnel_dev, net);
 877
 878        err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
 879        if (err)
 880                goto err_reg_dev;
 881
 882        if ((err = register_netdev(ipn->fb_tunnel_dev)))
 883                goto err_reg_dev;
 884
 885        t = netdev_priv(ipn->fb_tunnel_dev);
 886
 887        strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
 888        return 0;
 889
 890err_reg_dev:
 891        ipip_dev_free(ipn->fb_tunnel_dev);
 892err_alloc_dev:
 893        /* nothing */
 894        return err;
 895}
 896
 897static void __net_exit ipip_exit_net(struct net *net)
 898{
 899        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 900        LIST_HEAD(list);
 901
 902        rtnl_lock();
 903        ipip_destroy_tunnels(ipn, &list);
 904        unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
 905        unregister_netdevice_many(&list);
 906        rtnl_unlock();
 907}
 908
 909static struct pernet_operations ipip_net_ops = {
 910        .init = ipip_init_net,
 911        .exit = ipip_exit_net,
 912        .id   = &ipip_net_id,
 913        .size = sizeof(struct ipip_net),
 914};
 915
 916static int __init ipip_init(void)
 917{
 918        int err;
 919
 920        printk(banner);
 921
 922        err = register_pernet_device(&ipip_net_ops);
 923        if (err < 0)
 924                return err;
 925        err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 926        if (err < 0) {
 927                unregister_pernet_device(&ipip_net_ops);
 928                pr_info("%s: can't register tunnel\n", __func__);
 929        }
 930        return err;
 931}
 932
 933static void __exit ipip_fini(void)
 934{
 935        if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 936                pr_info("%s: can't deregister tunnel\n", __func__);
 937
 938        unregister_pernet_device(&ipip_net_ops);
 939}
 940
 941module_init(ipip_init);
 942module_exit(ipip_fini);
 943MODULE_LICENSE("GPL");
 944MODULE_ALIAS_NETDEV("tunl0");
 945
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.