linux/net/ipv4/ipip.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     IP/IP protocol decoder.
   3 *
   4 *      Authors:
   5 *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6 *
   7 *      Fixes:
   8 *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9 *                                      a module taking up 2 pages).
  10 *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11 *                                      to keep ip_forward happy.
  12 *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13 *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14 *              David Woodhouse :       Perform some basic ICMP handling.
  15 *                                      IPIP Routing without decapsulation.
  16 *              Carlos Picoto   :       GRE over IP support
  17 *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18 *                                      I do not want to merge them together.
  19 *
  20 *      This program is free software; you can redistribute it and/or
  21 *      modify it under the terms of the GNU General Public License
  22 *      as published by the Free Software Foundation; either version
  23 *      2 of the License, or (at your option) any later version.
  24 *
  25 */
  26
  27/* tunnel.c: an IP tunnel driver
  28
  29        The purpose of this driver is to provide an IP tunnel through
  30        which you can tunnel network traffic transparently across subnets.
  31
  32        This was written by looking at Nick Holloway's dummy driver
  33        Thanks for the great code!
  34
  35                -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37        Minor tweaks:
  38                Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                dev->hard_header/hard_header_len changed to use no headers.
  40                Comments/bracketing tweaked.
  41                Made the tunnels use dev->name not tunnel: when error reporting.
  42                Added tx_dropped stat
  43
  44                -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46        Reworked:
  47                Changed to tunnel to destination gateway in addition to the
  48                        tunnel's pointopoint address
  49                Almost completely rewritten
  50                Note:  There is currently no firewall or ICMP handling done.
  51
  52                -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54*/
  55
  56/* Things I wish I had known when writing the tunnel driver:
  57
  58        When the tunnel_xmit() function is called, the skb contains the
  59        packet to be sent (plus a great deal of extra info), and dev
  60        contains the tunnel device that _we_ are.
  61
  62        When we are passed a packet, we are expected to fill in the
  63        source address with our source IP address.
  64
  65        What is the proper way to allocate, copy and free a buffer?
  66        After you allocate it, it is a "0 length" chunk of memory
  67        starting at zero.  If you want to add headers to the buffer
  68        later, you'll have to call "skb_reserve(skb, amount)" with
  69        the amount of memory you want reserved.  Then, you call
  70        "skb_put(skb, amount)" with the amount of space you want in
  71        the buffer.  skb_put() returns a pointer to the top (#0) of
  72        that buffer.  skb->len is set to the amount of space you have
  73        "allocated" with skb_put().  You can then write up to skb->len
  74        bytes to that buffer.  If you need more, you can call skb_put()
  75        again with the additional amount of space you need.  You can
  76        find out how much more space you can allocate by calling
  77        "skb_tailroom(skb)".
  78        Now, to add header space, call "skb_push(skb, header_len)".
  79        This creates space at the beginning of the buffer and returns
  80        a pointer to this new space.  If later you need to strip a
  81        header from a buffer, call "skb_pull(skb, header_len)".
  82        skb_headroom() will return how much space is left at the top
  83        of the buffer (before the main data).  Remember, this headroom
  84        space must be reserved before the skb_put() function is called.
  85        */
  86
  87/*
  88   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90   For comments look at net/ipv4/ip_gre.c --ANK
  91 */
  92
  93
  94#include <linux/capability.h>
  95#include <linux/module.h>
  96#include <linux/types.h>
  97#include <linux/kernel.h>
  98#include <linux/slab.h>
  99#include <asm/uaccess.h>
 100#include <linux/skbuff.h>
 101#include <linux/netdevice.h>
 102#include <linux/in.h>
 103#include <linux/tcp.h>
 104#include <linux/udp.h>
 105#include <linux/if_arp.h>
 106#include <linux/mroute.h>
 107#include <linux/init.h>
 108#include <linux/netfilter_ipv4.h>
 109#include <linux/if_ether.h>
 110
 111#include <net/sock.h>
 112#include <net/ip.h>
 113#include <net/icmp.h>
 114#include <net/ipip.h>
 115#include <net/inet_ecn.h>
 116#include <net/xfrm.h>
 117#include <net/net_namespace.h>
 118#include <net/netns/generic.h>
 119
 120#define HASH_SIZE  16
 121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123static bool log_ecn_error = true;
 124module_param(log_ecn_error, bool, 0644);
 125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 126
 127static int ipip_net_id __read_mostly;
 128struct ipip_net {
 129        struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
 130        struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
 131        struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
 132        struct ip_tunnel __rcu *tunnels_wc[1];
 133        struct ip_tunnel __rcu **tunnels[4];
 134
 135        struct net_device *fb_tunnel_dev;
 136};
 137
 138static int ipip_tunnel_init(struct net_device *dev);
 139static void ipip_tunnel_setup(struct net_device *dev);
 140static void ipip_dev_free(struct net_device *dev);
 141
 142/*
 143 * Locking : hash tables are protected by RCU and RTNL
 144 */
 145
 146#define for_each_ip_tunnel_rcu(start) \
 147        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 148
 149/* often modified stats are per cpu, other are shared (netdev->stats) */
 150struct pcpu_tstats {
 151        u64     rx_packets;
 152        u64     rx_bytes;
 153        u64     tx_packets;
 154        u64     tx_bytes;
 155        struct u64_stats_sync   syncp;
 156};
 157
 158static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
 159                                                  struct rtnl_link_stats64 *tot)
 160{
 161        int i;
 162
 163        for_each_possible_cpu(i) {
 164                const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 165                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
 166                unsigned int start;
 167
 168                do {
 169                        start = u64_stats_fetch_begin_bh(&tstats->syncp);
 170                        rx_packets = tstats->rx_packets;
 171                        tx_packets = tstats->tx_packets;
 172                        rx_bytes = tstats->rx_bytes;
 173                        tx_bytes = tstats->tx_bytes;
 174                } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
 175
 176                tot->rx_packets += rx_packets;
 177                tot->tx_packets += tx_packets;
 178                tot->rx_bytes   += rx_bytes;
 179                tot->tx_bytes   += tx_bytes;
 180        }
 181
 182        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 183        tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 184        tot->tx_dropped = dev->stats.tx_dropped;
 185        tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
 186        tot->tx_errors = dev->stats.tx_errors;
 187        tot->collisions = dev->stats.collisions;
 188
 189        return tot;
 190}
 191
 192static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
 193                __be32 remote, __be32 local)
 194{
 195        unsigned int h0 = HASH(remote);
 196        unsigned int h1 = HASH(local);
 197        struct ip_tunnel *t;
 198        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 199
 200        for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 201                if (local == t->parms.iph.saddr &&
 202                    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 203                        return t;
 204
 205        for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 206                if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 207                        return t;
 208
 209        for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 210                if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 211                        return t;
 212
 213        t = rcu_dereference(ipn->tunnels_wc[0]);
 214        if (t && (t->dev->flags&IFF_UP))
 215                return t;
 216        return NULL;
 217}
 218
 219static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
 220                struct ip_tunnel_parm *parms)
 221{
 222        __be32 remote = parms->iph.daddr;
 223        __be32 local = parms->iph.saddr;
 224        unsigned int h = 0;
 225        int prio = 0;
 226
 227        if (remote) {
 228                prio |= 2;
 229                h ^= HASH(remote);
 230        }
 231        if (local) {
 232                prio |= 1;
 233                h ^= HASH(local);
 234        }
 235        return &ipn->tunnels[prio][h];
 236}
 237
 238static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
 239                struct ip_tunnel *t)
 240{
 241        return __ipip_bucket(ipn, &t->parms);
 242}
 243
 244static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 245{
 246        struct ip_tunnel __rcu **tp;
 247        struct ip_tunnel *iter;
 248
 249        for (tp = ipip_bucket(ipn, t);
 250             (iter = rtnl_dereference(*tp)) != NULL;
 251             tp = &iter->next) {
 252                if (t == iter) {
 253                        rcu_assign_pointer(*tp, t->next);
 254                        break;
 255                }
 256        }
 257}
 258
 259static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 260{
 261        struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 262
 263        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 264        rcu_assign_pointer(*tp, t);
 265}
 266
 267static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
 268                struct ip_tunnel_parm *parms, int create)
 269{
 270        __be32 remote = parms->iph.daddr;
 271        __be32 local = parms->iph.saddr;
 272        struct ip_tunnel *t, *nt;
 273        struct ip_tunnel __rcu **tp;
 274        struct net_device *dev;
 275        char name[IFNAMSIZ];
 276        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 277
 278        for (tp = __ipip_bucket(ipn, parms);
 279                 (t = rtnl_dereference(*tp)) != NULL;
 280                 tp = &t->next) {
 281                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 282                        return t;
 283        }
 284        if (!create)
 285                return NULL;
 286
 287        if (parms->name[0])
 288                strlcpy(name, parms->name, IFNAMSIZ);
 289        else
 290                strcpy(name, "tunl%d");
 291
 292        dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 293        if (dev == NULL)
 294                return NULL;
 295
 296        dev_net_set(dev, net);
 297
 298        nt = netdev_priv(dev);
 299        nt->parms = *parms;
 300
 301        if (ipip_tunnel_init(dev) < 0)
 302                goto failed_free;
 303
 304        if (register_netdevice(dev) < 0)
 305                goto failed_free;
 306
 307        strcpy(nt->parms.name, dev->name);
 308
 309        dev_hold(dev);
 310        ipip_tunnel_link(ipn, nt);
 311        return nt;
 312
 313failed_free:
 314        ipip_dev_free(dev);
 315        return NULL;
 316}
 317
 318/* called with RTNL */
 319static void ipip_tunnel_uninit(struct net_device *dev)
 320{
 321        struct net *net = dev_net(dev);
 322        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 323
 324        if (dev == ipn->fb_tunnel_dev)
 325                RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
 326        else
 327                ipip_tunnel_unlink(ipn, netdev_priv(dev));
 328        dev_put(dev);
 329}
 330
 331static int ipip_err(struct sk_buff *skb, u32 info)
 332{
 333
 334/* All the routers (except for Linux) return only
 335   8 bytes of packet payload. It means, that precise relaying of
 336   ICMP in the real Internet is absolutely infeasible.
 337 */
 338        const struct iphdr *iph = (const struct iphdr *)skb->data;
 339        const int type = icmp_hdr(skb)->type;
 340        const int code = icmp_hdr(skb)->code;
 341        struct ip_tunnel *t;
 342        int err;
 343
 344        switch (type) {
 345        default:
 346        case ICMP_PARAMETERPROB:
 347                return 0;
 348
 349        case ICMP_DEST_UNREACH:
 350                switch (code) {
 351                case ICMP_SR_FAILED:
 352                case ICMP_PORT_UNREACH:
 353                        /* Impossible event. */
 354                        return 0;
 355                default:
 356                        /* All others are translated to HOST_UNREACH.
 357                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 358                           I believe they are just ether pollution. --ANK
 359                         */
 360                        break;
 361                }
 362                break;
 363        case ICMP_TIME_EXCEEDED:
 364                if (code != ICMP_EXC_TTL)
 365                        return 0;
 366                break;
 367        case ICMP_REDIRECT:
 368                break;
 369        }
 370
 371        err = -ENOENT;
 372        t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 373        if (t == NULL)
 374                goto out;
 375
 376        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 377                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 378                                 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
 379                err = 0;
 380                goto out;
 381        }
 382
 383        if (type == ICMP_REDIRECT) {
 384                ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
 385                              IPPROTO_IPIP, 0);
 386                err = 0;
 387                goto out;
 388        }
 389
 390        if (t->parms.iph.daddr == 0)
 391                goto out;
 392
 393        err = 0;
 394        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 395                goto out;
 396
 397        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 398                t->err_count++;
 399        else
 400                t->err_count = 1;
 401        t->err_time = jiffies;
 402out:
 403
 404        return err;
 405}
 406
 407static int ipip_rcv(struct sk_buff *skb)
 408{
 409        struct ip_tunnel *tunnel;
 410        const struct iphdr *iph = ip_hdr(skb);
 411        int err;
 412
 413        tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 414        if (tunnel != NULL) {
 415                struct pcpu_tstats *tstats;
 416
 417                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 418                        goto drop;
 419
 420                secpath_reset(skb);
 421
 422                skb->mac_header = skb->network_header;
 423                skb_reset_network_header(skb);
 424                skb->protocol = htons(ETH_P_IP);
 425                skb->pkt_type = PACKET_HOST;
 426
 427                __skb_tunnel_rx(skb, tunnel->dev);
 428
 429                err = IP_ECN_decapsulate(iph, skb);
 430                if (unlikely(err)) {
 431                        if (log_ecn_error)
 432                                net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 433                                                     &iph->saddr, iph->tos);
 434                        if (err > 1) {
 435                                ++tunnel->dev->stats.rx_frame_errors;
 436                                ++tunnel->dev->stats.rx_errors;
 437                                goto drop;
 438                        }
 439                }
 440
 441                tstats = this_cpu_ptr(tunnel->dev->tstats);
 442                u64_stats_update_begin(&tstats->syncp);
 443                tstats->rx_packets++;
 444                tstats->rx_bytes += skb->len;
 445                u64_stats_update_end(&tstats->syncp);
 446
 447                netif_rx(skb);
 448                return 0;
 449        }
 450
 451        return -1;
 452
 453drop:
 454        kfree_skb(skb);
 455        return 0;
 456}
 457
 458/*
 459 *      This function assumes it is being called from dev_queue_xmit()
 460 *      and that skb is filled properly by that function.
 461 */
 462
 463static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 464{
 465        struct ip_tunnel *tunnel = netdev_priv(dev);
 466        struct pcpu_tstats *tstats;
 467        const struct iphdr  *tiph = &tunnel->parms.iph;
 468        u8     tos = tunnel->parms.iph.tos;
 469        __be16 df = tiph->frag_off;
 470        struct rtable *rt;                      /* Route to the other host */
 471        struct net_device *tdev;                /* Device to other host */
 472        const struct iphdr  *old_iph = ip_hdr(skb);
 473        struct iphdr  *iph;                     /* Our new IP header */
 474        unsigned int max_headroom;              /* The extra header space needed */
 475        __be32 dst = tiph->daddr;
 476        struct flowi4 fl4;
 477        int    mtu;
 478
 479        if (skb->protocol != htons(ETH_P_IP))
 480                goto tx_error;
 481
 482        if (tos & 1)
 483                tos = old_iph->tos;
 484
 485        if (!dst) {
 486                /* NBMA tunnel */
 487                if ((rt = skb_rtable(skb)) == NULL) {
 488                        dev->stats.tx_fifo_errors++;
 489                        goto tx_error;
 490                }
 491                dst = rt_nexthop(rt, old_iph->daddr);
 492        }
 493
 494        rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 495                                   dst, tiph->saddr,
 496                                   0, 0,
 497                                   IPPROTO_IPIP, RT_TOS(tos),
 498                                   tunnel->parms.link);
 499        if (IS_ERR(rt)) {
 500                dev->stats.tx_carrier_errors++;
 501                goto tx_error_icmp;
 502        }
 503        tdev = rt->dst.dev;
 504
 505        if (tdev == dev) {
 506                ip_rt_put(rt);
 507                dev->stats.collisions++;
 508                goto tx_error;
 509        }
 510
 511        df |= old_iph->frag_off & htons(IP_DF);
 512
 513        if (df) {
 514                mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 515
 516                if (mtu < 68) {
 517                        dev->stats.collisions++;
 518                        ip_rt_put(rt);
 519                        goto tx_error;
 520                }
 521
 522                if (skb_dst(skb))
 523                        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 524
 525                if ((old_iph->frag_off & htons(IP_DF)) &&
 526                    mtu < ntohs(old_iph->tot_len)) {
 527                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 528                                  htonl(mtu));
 529                        ip_rt_put(rt);
 530                        goto tx_error;
 531                }
 532        }
 533
 534        if (tunnel->err_count > 0) {
 535                if (time_before(jiffies,
 536                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 537                        tunnel->err_count--;
 538                        dst_link_failure(skb);
 539                } else
 540                        tunnel->err_count = 0;
 541        }
 542
 543        /*
 544         * Okay, now see if we can stuff it in the buffer as-is.
 545         */
 546        max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 547
 548        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 549            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 550                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 551                if (!new_skb) {
 552                        ip_rt_put(rt);
 553                        dev->stats.tx_dropped++;
 554                        dev_kfree_skb(skb);
 555                        return NETDEV_TX_OK;
 556                }
 557                if (skb->sk)
 558                        skb_set_owner_w(new_skb, skb->sk);
 559                dev_kfree_skb(skb);
 560                skb = new_skb;
 561                old_iph = ip_hdr(skb);
 562        }
 563
 564        skb->transport_header = skb->network_header;
 565        skb_push(skb, sizeof(struct iphdr));
 566        skb_reset_network_header(skb);
 567        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 568        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 569                              IPSKB_REROUTED);
 570        skb_dst_drop(skb);
 571        skb_dst_set(skb, &rt->dst);
 572
 573        /*
 574         *      Push down and install the IPIP header.
 575         */
 576
 577        iph                     =       ip_hdr(skb);
 578        iph->version            =       4;
 579        iph->ihl                =       sizeof(struct iphdr)>>2;
 580        iph->frag_off           =       df;
 581        iph->protocol           =       IPPROTO_IPIP;
 582        iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 583        iph->daddr              =       fl4.daddr;
 584        iph->saddr              =       fl4.saddr;
 585
 586        if ((iph->ttl = tiph->ttl) == 0)
 587                iph->ttl        =       old_iph->ttl;
 588
 589        nf_reset(skb);
 590        tstats = this_cpu_ptr(dev->tstats);
 591        __IPTUNNEL_XMIT(tstats, &dev->stats);
 592        return NETDEV_TX_OK;
 593
 594tx_error_icmp:
 595        dst_link_failure(skb);
 596tx_error:
 597        dev->stats.tx_errors++;
 598        dev_kfree_skb(skb);
 599        return NETDEV_TX_OK;
 600}
 601
 602static void ipip_tunnel_bind_dev(struct net_device *dev)
 603{
 604        struct net_device *tdev = NULL;
 605        struct ip_tunnel *tunnel;
 606        const struct iphdr *iph;
 607
 608        tunnel = netdev_priv(dev);
 609        iph = &tunnel->parms.iph;
 610
 611        if (iph->daddr) {
 612                struct rtable *rt;
 613                struct flowi4 fl4;
 614
 615                rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 616                                           iph->daddr, iph->saddr,
 617                                           0, 0,
 618                                           IPPROTO_IPIP,
 619                                           RT_TOS(iph->tos),
 620                                           tunnel->parms.link);
 621                if (!IS_ERR(rt)) {
 622                        tdev = rt->dst.dev;
 623                        ip_rt_put(rt);
 624                }
 625                dev->flags |= IFF_POINTOPOINT;
 626        }
 627
 628        if (!tdev && tunnel->parms.link)
 629                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 630
 631        if (tdev) {
 632                dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 633                dev->mtu = tdev->mtu - sizeof(struct iphdr);
 634        }
 635        dev->iflink = tunnel->parms.link;
 636}
 637
 638static int
 639ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 640{
 641        int err = 0;
 642        struct ip_tunnel_parm p;
 643        struct ip_tunnel *t;
 644        struct net *net = dev_net(dev);
 645        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 646
 647        switch (cmd) {
 648        case SIOCGETTUNNEL:
 649                t = NULL;
 650                if (dev == ipn->fb_tunnel_dev) {
 651                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 652                                err = -EFAULT;
 653                                break;
 654                        }
 655                        t = ipip_tunnel_locate(net, &p, 0);
 656                }
 657                if (t == NULL)
 658                        t = netdev_priv(dev);
 659                memcpy(&p, &t->parms, sizeof(p));
 660                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 661                        err = -EFAULT;
 662                break;
 663
 664        case SIOCADDTUNNEL:
 665        case SIOCCHGTUNNEL:
 666                err = -EPERM;
 667                if (!capable(CAP_NET_ADMIN))
 668                        goto done;
 669
 670                err = -EFAULT;
 671                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 672                        goto done;
 673
 674                err = -EINVAL;
 675                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 676                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 677                        goto done;
 678                if (p.iph.ttl)
 679                        p.iph.frag_off |= htons(IP_DF);
 680
 681                t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 682
 683                if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 684                        if (t != NULL) {
 685                                if (t->dev != dev) {
 686                                        err = -EEXIST;
 687                                        break;
 688                                }
 689                        } else {
 690                                if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 691                                    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 692                                        err = -EINVAL;
 693                                        break;
 694                                }
 695                                t = netdev_priv(dev);
 696                                ipip_tunnel_unlink(ipn, t);
 697                                synchronize_net();
 698                                t->parms.iph.saddr = p.iph.saddr;
 699                                t->parms.iph.daddr = p.iph.daddr;
 700                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
 701                                memcpy(dev->broadcast, &p.iph.daddr, 4);
 702                                ipip_tunnel_link(ipn, t);
 703                                netdev_state_change(dev);
 704                        }
 705                }
 706
 707                if (t) {
 708                        err = 0;
 709                        if (cmd == SIOCCHGTUNNEL) {
 710                                t->parms.iph.ttl = p.iph.ttl;
 711                                t->parms.iph.tos = p.iph.tos;
 712                                t->parms.iph.frag_off = p.iph.frag_off;
 713                                if (t->parms.link != p.link) {
 714                                        t->parms.link = p.link;
 715                                        ipip_tunnel_bind_dev(dev);
 716                                        netdev_state_change(dev);
 717                                }
 718                        }
 719                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 720                                err = -EFAULT;
 721                } else
 722                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 723                break;
 724
 725        case SIOCDELTUNNEL:
 726                err = -EPERM;
 727                if (!capable(CAP_NET_ADMIN))
 728                        goto done;
 729
 730                if (dev == ipn->fb_tunnel_dev) {
 731                        err = -EFAULT;
 732                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 733                                goto done;
 734                        err = -ENOENT;
 735                        if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 736                                goto done;
 737                        err = -EPERM;
 738                        if (t->dev == ipn->fb_tunnel_dev)
 739                                goto done;
 740                        dev = t->dev;
 741                }
 742                unregister_netdevice(dev);
 743                err = 0;
 744                break;
 745
 746        default:
 747                err = -EINVAL;
 748        }
 749
 750done:
 751        return err;
 752}
 753
 754static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 755{
 756        if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 757                return -EINVAL;
 758        dev->mtu = new_mtu;
 759        return 0;
 760}
 761
 762static const struct net_device_ops ipip_netdev_ops = {
 763        .ndo_uninit     = ipip_tunnel_uninit,
 764        .ndo_start_xmit = ipip_tunnel_xmit,
 765        .ndo_do_ioctl   = ipip_tunnel_ioctl,
 766        .ndo_change_mtu = ipip_tunnel_change_mtu,
 767        .ndo_get_stats64 = ipip_get_stats64,
 768};
 769
 770static void ipip_dev_free(struct net_device *dev)
 771{
 772        free_percpu(dev->tstats);
 773        free_netdev(dev);
 774}
 775
 776static void ipip_tunnel_setup(struct net_device *dev)
 777{
 778        dev->netdev_ops         = &ipip_netdev_ops;
 779        dev->destructor         = ipip_dev_free;
 780
 781        dev->type               = ARPHRD_TUNNEL;
 782        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 783        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 784        dev->flags              = IFF_NOARP;
 785        dev->iflink             = 0;
 786        dev->addr_len           = 4;
 787        dev->features           |= NETIF_F_NETNS_LOCAL;
 788        dev->features           |= NETIF_F_LLTX;
 789        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 790}
 791
 792static int ipip_tunnel_init(struct net_device *dev)
 793{
 794        struct ip_tunnel *tunnel = netdev_priv(dev);
 795
 796        tunnel->dev = dev;
 797
 798        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 799        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 800
 801        ipip_tunnel_bind_dev(dev);
 802
 803        dev->tstats = alloc_percpu(struct pcpu_tstats);
 804        if (!dev->tstats)
 805                return -ENOMEM;
 806
 807        return 0;
 808}
 809
 810static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
 811{
 812        struct ip_tunnel *tunnel = netdev_priv(dev);
 813        struct iphdr *iph = &tunnel->parms.iph;
 814        struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 815
 816        tunnel->dev = dev;
 817        strcpy(tunnel->parms.name, dev->name);
 818
 819        iph->version            = 4;
 820        iph->protocol           = IPPROTO_IPIP;
 821        iph->ihl                = 5;
 822
 823        dev->tstats = alloc_percpu(struct pcpu_tstats);
 824        if (!dev->tstats)
 825                return -ENOMEM;
 826
 827        dev_hold(dev);
 828        rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 829        return 0;
 830}
 831
 832static struct xfrm_tunnel ipip_handler __read_mostly = {
 833        .handler        =       ipip_rcv,
 834        .err_handler    =       ipip_err,
 835        .priority       =       1,
 836};
 837
 838static const char banner[] __initconst =
 839        KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 840
 841static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 842{
 843        int prio;
 844
 845        for (prio = 1; prio < 4; prio++) {
 846                int h;
 847                for (h = 0; h < HASH_SIZE; h++) {
 848                        struct ip_tunnel *t;
 849
 850                        t = rtnl_dereference(ipn->tunnels[prio][h]);
 851                        while (t != NULL) {
 852                                unregister_netdevice_queue(t->dev, head);
 853                                t = rtnl_dereference(t->next);
 854                        }
 855                }
 856        }
 857}
 858
 859static int __net_init ipip_init_net(struct net *net)
 860{
 861        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 862        struct ip_tunnel *t;
 863        int err;
 864
 865        ipn->tunnels[0] = ipn->tunnels_wc;
 866        ipn->tunnels[1] = ipn->tunnels_l;
 867        ipn->tunnels[2] = ipn->tunnels_r;
 868        ipn->tunnels[3] = ipn->tunnels_r_l;
 869
 870        ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 871                                           "tunl0",
 872                                           ipip_tunnel_setup);
 873        if (!ipn->fb_tunnel_dev) {
 874                err = -ENOMEM;
 875                goto err_alloc_dev;
 876        }
 877        dev_net_set(ipn->fb_tunnel_dev, net);
 878
 879        err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
 880        if (err)
 881                goto err_reg_dev;
 882
 883        if ((err = register_netdev(ipn->fb_tunnel_dev)))
 884                goto err_reg_dev;
 885
 886        t = netdev_priv(ipn->fb_tunnel_dev);
 887
 888        strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
 889        return 0;
 890
 891err_reg_dev:
 892        ipip_dev_free(ipn->fb_tunnel_dev);
 893err_alloc_dev:
 894        /* nothing */
 895        return err;
 896}
 897
 898static void __net_exit ipip_exit_net(struct net *net)
 899{
 900        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 901        LIST_HEAD(list);
 902
 903        rtnl_lock();
 904        ipip_destroy_tunnels(ipn, &list);
 905        unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
 906        unregister_netdevice_many(&list);
 907        rtnl_unlock();
 908}
 909
 910static struct pernet_operations ipip_net_ops = {
 911        .init = ipip_init_net,
 912        .exit = ipip_exit_net,
 913        .id   = &ipip_net_id,
 914        .size = sizeof(struct ipip_net),
 915};
 916
 917static int __init ipip_init(void)
 918{
 919        int err;
 920
 921        printk(banner);
 922
 923        err = register_pernet_device(&ipip_net_ops);
 924        if (err < 0)
 925                return err;
 926        err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 927        if (err < 0) {
 928                unregister_pernet_device(&ipip_net_ops);
 929                pr_info("%s: can't register tunnel\n", __func__);
 930        }
 931        return err;
 932}
 933
 934static void __exit ipip_fini(void)
 935{
 936        if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 937                pr_info("%s: can't deregister tunnel\n", __func__);
 938
 939        unregister_pernet_device(&ipip_net_ops);
 940}
 941
 942module_init(ipip_init);
 943module_exit(ipip_fini);
 944MODULE_LICENSE("GPL");
 945MODULE_ALIAS_NETDEV("tunl0");
 946
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.