linux/net/ipv4/ip_gre.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     GRE over IP protocol decoder.
   3 *
   4 *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   5 *
   6 *      This program is free software; you can redistribute it and/or
   7 *      modify it under the terms of the GNU General Public License
   8 *      as published by the Free Software Foundation; either version
   9 *      2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#include <linux/capability.h>
  14#include <linux/module.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/slab.h>
  18#include <asm/uaccess.h>
  19#include <linux/skbuff.h>
  20#include <linux/netdevice.h>
  21#include <linux/in.h>
  22#include <linux/tcp.h>
  23#include <linux/udp.h>
  24#include <linux/if_arp.h>
  25#include <linux/mroute.h>
  26#include <linux/init.h>
  27#include <linux/in6.h>
  28#include <linux/inetdevice.h>
  29#include <linux/igmp.h>
  30#include <linux/netfilter_ipv4.h>
  31#include <linux/etherdevice.h>
  32#include <linux/if_ether.h>
  33
  34#include <net/sock.h>
  35#include <net/ip.h>
  36#include <net/icmp.h>
  37#include <net/protocol.h>
  38#include <net/ipip.h>
  39#include <net/arp.h>
  40#include <net/checksum.h>
  41#include <net/dsfield.h>
  42#include <net/inet_ecn.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/netns/generic.h>
  46#include <net/rtnetlink.h>
  47
  48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#endif
  53
  54/*
  55   Problems & solutions
  56   --------------------
  57
  58   1. The most important issue is detecting local dead loops.
  59   They would cause complete host lockup in transmit, which
  60   would be "resolved" by stack overflow or, if queueing is enabled,
  61   with infinite looping in net_bh.
  62
  63   We cannot track such dead loops during route installation,
  64   it is infeasible task. The most general solutions would be
  65   to keep skb->encapsulation counter (sort of local ttl),
  66   and silently drop packet when it expires. It is the best
  67   solution, but it supposes maintaing new variable in ALL
  68   skb, even if no tunneling is used.
  69
  70   Current solution: HARD_TX_LOCK lock breaks dead loops.
  71
  72
  73
  74   2. Networking dead loops would not kill routers, but would really
  75   kill network. IP hop limit plays role of "t->recursion" in this case,
  76   if we copy it from packet being encapsulated to upper header.
  77   It is very good solution, but it introduces two problems:
  78
  79   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  80     do not work over tunnels.
  81   - traceroute does not work. I planned to relay ICMP from tunnel,
  82     so that this problem would be solved and traceroute output
  83     would even more informative. This idea appeared to be wrong:
  84     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  85     true router now :-)), all routers (at least, in neighbourhood of mine)
  86     return only 8 bytes of payload. It is the end.
  87
  88   Hence, if we want that OSPF worked or traceroute said something reasonable,
  89   we should search for another solution.
  90
  91   One of them is to parse packet trying to detect inner encapsulation
  92   made by our node. It is difficult or even impossible, especially,
  93   taking into account fragmentation. TO be short, tt is not solution at all.
  94
  95   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  96   We force DF flag on tunnels with preconfigured hop limit,
  97   that is ALL. :-) Well, it does not remove the problem completely,
  98   but exponential growth of network traffic is changed to linear
  99   (branches, that exceed pmtu are pruned) and tunnel mtu
 100   fastly degrades to value <68, where looping stops.
 101   Yes, it is not good if there exists a router in the loop,
 102   which does not force DF, even when encapsulating packets have DF set.
 103   But it is not our problem! Nobody could accuse us, we made
 104   all that we could make. Even if it is your gated who injected
 105   fatal route to network, even if it were you who configured
 106   fatal static route: you are innocent. :-)
 107
 108
 109
 110   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
 111   practically identical code. It would be good to glue them
 112   together, but it is not very evident, how to make them modular.
 113   sit is integral part of IPv6, ipip and gre are naturally modular.
 114   We could extract common parts (hash table, ioctl etc)
 115   to a separate module (ip_tunnel.c).
 116
 117   Alexey Kuznetsov.
 118 */
 119
 120static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 121static int ipgre_tunnel_init(struct net_device *dev);
 122static void ipgre_tunnel_setup(struct net_device *dev);
 123static int ipgre_tunnel_bind_dev(struct net_device *dev);
 124
 125/* Fallback tunnel: no source, no destination, no key, no options */
 126
 127#define HASH_SIZE  16
 128
 129static int ipgre_net_id __read_mostly;
 130struct ipgre_net {
 131        struct ip_tunnel *tunnels[4][HASH_SIZE];
 132
 133        struct net_device *fb_tunnel_dev;
 134};
 135
 136/* Tunnel hash table */
 137
 138/*
 139   4 hash tables:
 140
 141   3: (remote,local)
 142   2: (remote,*)
 143   1: (*,local)
 144   0: (*,*)
 145
 146   We require exact key match i.e. if a key is present in packet
 147   it will match only tunnel with the same key; if it is not present,
 148   it will match only keyless tunnel.
 149
 150   All keysless packets, if not matched configured keyless tunnels
 151   will match fallback tunnel.
 152 */
 153
 154#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 155
 156#define tunnels_r_l     tunnels[3]
 157#define tunnels_r       tunnels[2]
 158#define tunnels_l       tunnels[1]
 159#define tunnels_wc      tunnels[0]
 160/*
 161 * Locking : hash tables are protected by RCU and a spinlock
 162 */
 163static DEFINE_SPINLOCK(ipgre_lock);
 164
 165#define for_each_ip_tunnel_rcu(start) \
 166        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 167
 168/* Given src, dst and key, find appropriate for input tunnel. */
 169
 170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
 171                                              __be32 remote, __be32 local,
 172                                              __be32 key, __be16 gre_proto)
 173{
 174        struct net *net = dev_net(dev);
 175        int link = dev->ifindex;
 176        unsigned h0 = HASH(remote);
 177        unsigned h1 = HASH(key);
 178        struct ip_tunnel *t, *cand = NULL;
 179        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 180        int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
 181                       ARPHRD_ETHER : ARPHRD_IPGRE;
 182        int score, cand_score = 4;
 183
 184        for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
 185                if (local != t->parms.iph.saddr ||
 186                    remote != t->parms.iph.daddr ||
 187                    key != t->parms.i_key ||
 188                    !(t->dev->flags & IFF_UP))
 189                        continue;
 190
 191                if (t->dev->type != ARPHRD_IPGRE &&
 192                    t->dev->type != dev_type)
 193                        continue;
 194
 195                score = 0;
 196                if (t->parms.link != link)
 197                        score |= 1;
 198                if (t->dev->type != dev_type)
 199                        score |= 2;
 200                if (score == 0)
 201                        return t;
 202
 203                if (score < cand_score) {
 204                        cand = t;
 205                        cand_score = score;
 206                }
 207        }
 208
 209        for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
 210                if (remote != t->parms.iph.daddr ||
 211                    key != t->parms.i_key ||
 212                    !(t->dev->flags & IFF_UP))
 213                        continue;
 214
 215                if (t->dev->type != ARPHRD_IPGRE &&
 216                    t->dev->type != dev_type)
 217                        continue;
 218
 219                score = 0;
 220                if (t->parms.link != link)
 221                        score |= 1;
 222                if (t->dev->type != dev_type)
 223                        score |= 2;
 224                if (score == 0)
 225                        return t;
 226
 227                if (score < cand_score) {
 228                        cand = t;
 229                        cand_score = score;
 230                }
 231        }
 232
 233        for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
 234                if ((local != t->parms.iph.saddr &&
 235                     (local != t->parms.iph.daddr ||
 236                      !ipv4_is_multicast(local))) ||
 237                    key != t->parms.i_key ||
 238                    !(t->dev->flags & IFF_UP))
 239                        continue;
 240
 241                if (t->dev->type != ARPHRD_IPGRE &&
 242                    t->dev->type != dev_type)
 243                        continue;
 244
 245                score = 0;
 246                if (t->parms.link != link)
 247                        score |= 1;
 248                if (t->dev->type != dev_type)
 249                        score |= 2;
 250                if (score == 0)
 251                        return t;
 252
 253                if (score < cand_score) {
 254                        cand = t;
 255                        cand_score = score;
 256                }
 257        }
 258
 259        for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
 260                if (t->parms.i_key != key ||
 261                    !(t->dev->flags & IFF_UP))
 262                        continue;
 263
 264                if (t->dev->type != ARPHRD_IPGRE &&
 265                    t->dev->type != dev_type)
 266                        continue;
 267
 268                score = 0;
 269                if (t->parms.link != link)
 270                        score |= 1;
 271                if (t->dev->type != dev_type)
 272                        score |= 2;
 273                if (score == 0)
 274                        return t;
 275
 276                if (score < cand_score) {
 277                        cand = t;
 278                        cand_score = score;
 279                }
 280        }
 281
 282        if (cand != NULL)
 283                return cand;
 284
 285        dev = ign->fb_tunnel_dev;
 286        if (dev->flags & IFF_UP)
 287                return netdev_priv(dev);
 288
 289        return NULL;
 290}
 291
 292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
 293                struct ip_tunnel_parm *parms)
 294{
 295        __be32 remote = parms->iph.daddr;
 296        __be32 local = parms->iph.saddr;
 297        __be32 key = parms->i_key;
 298        unsigned h = HASH(key);
 299        int prio = 0;
 300
 301        if (local)
 302                prio |= 1;
 303        if (remote && !ipv4_is_multicast(remote)) {
 304                prio |= 2;
 305                h ^= HASH(remote);
 306        }
 307
 308        return &ign->tunnels[prio][h];
 309}
 310
 311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
 312                struct ip_tunnel *t)
 313{
 314        return __ipgre_bucket(ign, &t->parms);
 315}
 316
 317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
 318{
 319        struct ip_tunnel **tp = ipgre_bucket(ign, t);
 320
 321        spin_lock_bh(&ipgre_lock);
 322        t->next = *tp;
 323        rcu_assign_pointer(*tp, t);
 324        spin_unlock_bh(&ipgre_lock);
 325}
 326
 327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
 328{
 329        struct ip_tunnel **tp;
 330
 331        for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
 332                if (t == *tp) {
 333                        spin_lock_bh(&ipgre_lock);
 334                        *tp = t->next;
 335                        spin_unlock_bh(&ipgre_lock);
 336                        break;
 337                }
 338        }
 339}
 340
 341static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
 342                                           struct ip_tunnel_parm *parms,
 343                                           int type)
 344{
 345        __be32 remote = parms->iph.daddr;
 346        __be32 local = parms->iph.saddr;
 347        __be32 key = parms->i_key;
 348        int link = parms->link;
 349        struct ip_tunnel *t, **tp;
 350        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 351
 352        for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
 353                if (local == t->parms.iph.saddr &&
 354                    remote == t->parms.iph.daddr &&
 355                    key == t->parms.i_key &&
 356                    link == t->parms.link &&
 357                    type == t->dev->type)
 358                        break;
 359
 360        return t;
 361}
 362
 363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
 364                struct ip_tunnel_parm *parms, int create)
 365{
 366        struct ip_tunnel *t, *nt;
 367        struct net_device *dev;
 368        char name[IFNAMSIZ];
 369        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 370
 371        t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
 372        if (t || !create)
 373                return t;
 374
 375        if (parms->name[0])
 376                strlcpy(name, parms->name, IFNAMSIZ);
 377        else
 378                sprintf(name, "gre%%d");
 379
 380        dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
 381        if (!dev)
 382          return NULL;
 383
 384        dev_net_set(dev, net);
 385
 386        if (strchr(name, '%')) {
 387                if (dev_alloc_name(dev, name) < 0)
 388                        goto failed_free;
 389        }
 390
 391        nt = netdev_priv(dev);
 392        nt->parms = *parms;
 393        dev->rtnl_link_ops = &ipgre_link_ops;
 394
 395        dev->mtu = ipgre_tunnel_bind_dev(dev);
 396
 397        if (register_netdevice(dev) < 0)
 398                goto failed_free;
 399
 400        dev_hold(dev);
 401        ipgre_tunnel_link(ign, nt);
 402        return nt;
 403
 404failed_free:
 405        free_netdev(dev);
 406        return NULL;
 407}
 408
 409static void ipgre_tunnel_uninit(struct net_device *dev)
 410{
 411        struct net *net = dev_net(dev);
 412        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 413
 414        ipgre_tunnel_unlink(ign, netdev_priv(dev));
 415        dev_put(dev);
 416}
 417
 418
 419static void ipgre_err(struct sk_buff *skb, u32 info)
 420{
 421
 422/* All the routers (except for Linux) return only
 423   8 bytes of packet payload. It means, that precise relaying of
 424   ICMP in the real Internet is absolutely infeasible.
 425
 426   Moreover, Cisco "wise men" put GRE key to the third word
 427   in GRE header. It makes impossible maintaining even soft state for keyed
 428   GRE tunnels with enabled checksum. Tell them "thank you".
 429
 430   Well, I wonder, rfc1812 was written by Cisco employee,
 431   what the hell these idiots break standrads established
 432   by themself???
 433 */
 434
 435        struct iphdr *iph = (struct iphdr *)skb->data;
 436        __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
 437        int grehlen = (iph->ihl<<2) + 4;
 438        const int type = icmp_hdr(skb)->type;
 439        const int code = icmp_hdr(skb)->code;
 440        struct ip_tunnel *t;
 441        __be16 flags;
 442
 443        flags = p[0];
 444        if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
 445                if (flags&(GRE_VERSION|GRE_ROUTING))
 446                        return;
 447                if (flags&GRE_KEY) {
 448                        grehlen += 4;
 449                        if (flags&GRE_CSUM)
 450                                grehlen += 4;
 451                }
 452        }
 453
 454        /* If only 8 bytes returned, keyed message will be dropped here */
 455        if (skb_headlen(skb) < grehlen)
 456                return;
 457
 458        switch (type) {
 459        default:
 460        case ICMP_PARAMETERPROB:
 461                return;
 462
 463        case ICMP_DEST_UNREACH:
 464                switch (code) {
 465                case ICMP_SR_FAILED:
 466                case ICMP_PORT_UNREACH:
 467                        /* Impossible event. */
 468                        return;
 469                case ICMP_FRAG_NEEDED:
 470                        /* Soft state for pmtu is maintained by IP core. */
 471                        return;
 472                default:
 473                        /* All others are translated to HOST_UNREACH.
 474                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 475                           I believe they are just ether pollution. --ANK
 476                         */
 477                        break;
 478                }
 479                break;
 480        case ICMP_TIME_EXCEEDED:
 481                if (code != ICMP_EXC_TTL)
 482                        return;
 483                break;
 484        }
 485
 486        rcu_read_lock();
 487        t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
 488                                flags & GRE_KEY ?
 489                                *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
 490                                p[1]);
 491        if (t == NULL || t->parms.iph.daddr == 0 ||
 492            ipv4_is_multicast(t->parms.iph.daddr))
 493                goto out;
 494
 495        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 496                goto out;
 497
 498        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 499                t->err_count++;
 500        else
 501                t->err_count = 1;
 502        t->err_time = jiffies;
 503out:
 504        rcu_read_unlock();
 505}
 506
 507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 508{
 509        if (INET_ECN_is_ce(iph->tos)) {
 510                if (skb->protocol == htons(ETH_P_IP)) {
 511                        IP_ECN_set_ce(ip_hdr(skb));
 512                } else if (skb->protocol == htons(ETH_P_IPV6)) {
 513                        IP6_ECN_set_ce(ipv6_hdr(skb));
 514                }
 515        }
 516}
 517
 518static inline u8
 519ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
 520{
 521        u8 inner = 0;
 522        if (skb->protocol == htons(ETH_P_IP))
 523                inner = old_iph->tos;
 524        else if (skb->protocol == htons(ETH_P_IPV6))
 525                inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
 526        return INET_ECN_encapsulate(tos, inner);
 527}
 528
 529static int ipgre_rcv(struct sk_buff *skb)
 530{
 531        struct iphdr *iph;
 532        u8     *h;
 533        __be16    flags;
 534        __sum16   csum = 0;
 535        __be32 key = 0;
 536        u32    seqno = 0;
 537        struct ip_tunnel *tunnel;
 538        int    offset = 4;
 539        __be16 gre_proto;
 540
 541        if (!pskb_may_pull(skb, 16))
 542                goto drop_nolock;
 543
 544        iph = ip_hdr(skb);
 545        h = skb->data;
 546        flags = *(__be16*)h;
 547
 548        if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
 549                /* - Version must be 0.
 550                   - We do not support routing headers.
 551                 */
 552                if (flags&(GRE_VERSION|GRE_ROUTING))
 553                        goto drop_nolock;
 554
 555                if (flags&GRE_CSUM) {
 556                        switch (skb->ip_summed) {
 557                        case CHECKSUM_COMPLETE:
 558                                csum = csum_fold(skb->csum);
 559                                if (!csum)
 560                                        break;
 561                                /* fall through */
 562                        case CHECKSUM_NONE:
 563                                skb->csum = 0;
 564                                csum = __skb_checksum_complete(skb);
 565                                skb->ip_summed = CHECKSUM_COMPLETE;
 566                        }
 567                        offset += 4;
 568                }
 569                if (flags&GRE_KEY) {
 570                        key = *(__be32*)(h + offset);
 571                        offset += 4;
 572                }
 573                if (flags&GRE_SEQ) {
 574                        seqno = ntohl(*(__be32*)(h + offset));
 575                        offset += 4;
 576                }
 577        }
 578
 579        gre_proto = *(__be16 *)(h + 2);
 580
 581        rcu_read_lock();
 582        if ((tunnel = ipgre_tunnel_lookup(skb->dev,
 583                                          iph->saddr, iph->daddr, key,
 584                                          gre_proto))) {
 585                struct net_device_stats *stats = &tunnel->dev->stats;
 586
 587                secpath_reset(skb);
 588
 589                skb->protocol = gre_proto;
 590                /* WCCP version 1 and 2 protocol decoding.
 591                 * - Change protocol to IP
 592                 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
 593                 */
 594                if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
 595                        skb->protocol = htons(ETH_P_IP);
 596                        if ((*(h + offset) & 0xF0) != 0x40)
 597                                offset += 4;
 598                }
 599
 600                skb->mac_header = skb->network_header;
 601                __pskb_pull(skb, offset);
 602                skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
 603                skb->pkt_type = PACKET_HOST;
 604#ifdef CONFIG_NET_IPGRE_BROADCAST
 605                if (ipv4_is_multicast(iph->daddr)) {
 606                        /* Looped back packet, drop it! */
 607                        if (skb_rtable(skb)->fl.iif == 0)
 608                                goto drop;
 609                        stats->multicast++;
 610                        skb->pkt_type = PACKET_BROADCAST;
 611                }
 612#endif
 613
 614                if (((flags&GRE_CSUM) && csum) ||
 615                    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
 616                        stats->rx_crc_errors++;
 617                        stats->rx_errors++;
 618                        goto drop;
 619                }
 620                if (tunnel->parms.i_flags&GRE_SEQ) {
 621                        if (!(flags&GRE_SEQ) ||
 622                            (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
 623                                stats->rx_fifo_errors++;
 624                                stats->rx_errors++;
 625                                goto drop;
 626                        }
 627                        tunnel->i_seqno = seqno + 1;
 628                }
 629
 630                /* Warning: All skb pointers will be invalidated! */
 631                if (tunnel->dev->type == ARPHRD_ETHER) {
 632                        if (!pskb_may_pull(skb, ETH_HLEN)) {
 633                                stats->rx_length_errors++;
 634                                stats->rx_errors++;
 635                                goto drop;
 636                        }
 637
 638                        iph = ip_hdr(skb);
 639                        skb->protocol = eth_type_trans(skb, tunnel->dev);
 640                        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 641                }
 642
 643                skb_tunnel_rx(skb, tunnel->dev);
 644
 645                skb_reset_network_header(skb);
 646                ipgre_ecn_decapsulate(iph, skb);
 647
 648                netif_rx(skb);
 649                rcu_read_unlock();
 650                return(0);
 651        }
 652        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 653
 654drop:
 655        rcu_read_unlock();
 656drop_nolock:
 657        kfree_skb(skb);
 658        return(0);
 659}
 660
 661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 662{
 663        struct ip_tunnel *tunnel = netdev_priv(dev);
 664        struct net_device_stats *stats = &dev->stats;
 665        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 666        struct iphdr  *old_iph = ip_hdr(skb);
 667        struct iphdr  *tiph;
 668        u8     tos;
 669        __be16 df;
 670        struct rtable *rt;                      /* Route to the other host */
 671        struct net_device *tdev;                        /* Device to other host */
 672        struct iphdr  *iph;                     /* Our new IP header */
 673        unsigned int max_headroom;              /* The extra header space needed */
 674        int    gre_hlen;
 675        __be32 dst;
 676        int    mtu;
 677
 678        if (dev->type == ARPHRD_ETHER)
 679                IPCB(skb)->flags = 0;
 680
 681        if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
 682                gre_hlen = 0;
 683                tiph = (struct iphdr *)skb->data;
 684        } else {
 685                gre_hlen = tunnel->hlen;
 686                tiph = &tunnel->parms.iph;
 687        }
 688
 689        if ((dst = tiph->daddr) == 0) {
 690                /* NBMA tunnel */
 691
 692                if (skb_dst(skb) == NULL) {
 693                        stats->tx_fifo_errors++;
 694                        goto tx_error;
 695                }
 696
 697                if (skb->protocol == htons(ETH_P_IP)) {
 698                        rt = skb_rtable(skb);
 699                        if ((dst = rt->rt_gateway) == 0)
 700                                goto tx_error_icmp;
 701                }
 702#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 703                else if (skb->protocol == htons(ETH_P_IPV6)) {
 704                        struct in6_addr *addr6;
 705                        int addr_type;
 706                        struct neighbour *neigh = skb_dst(skb)->neighbour;
 707
 708                        if (neigh == NULL)
 709                                goto tx_error;
 710
 711                        addr6 = (struct in6_addr *)&neigh->primary_key;
 712                        addr_type = ipv6_addr_type(addr6);
 713
 714                        if (addr_type == IPV6_ADDR_ANY) {
 715                                addr6 = &ipv6_hdr(skb)->daddr;
 716                                addr_type = ipv6_addr_type(addr6);
 717                        }
 718
 719                        if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 720                                goto tx_error_icmp;
 721
 722                        dst = addr6->s6_addr32[3];
 723                }
 724#endif
 725                else
 726                        goto tx_error;
 727        }
 728
 729        tos = tiph->tos;
 730        if (tos == 1) {
 731                tos = 0;
 732                if (skb->protocol == htons(ETH_P_IP))
 733                        tos = old_iph->tos;
 734                else if (skb->protocol == htons(ETH_P_IPV6))
 735                        tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
 736        }
 737
 738        {
 739                struct flowi fl = { .oif = tunnel->parms.link,
 740                                    .nl_u = { .ip4_u =
 741                                              { .daddr = dst,
 742                                                .saddr = tiph->saddr,
 743                                                .tos = RT_TOS(tos) } },
 744                                    .proto = IPPROTO_GRE };
 745                if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
 746                        stats->tx_carrier_errors++;
 747                        goto tx_error;
 748                }
 749        }
 750        tdev = rt->dst.dev;
 751
 752        if (tdev == dev) {
 753                ip_rt_put(rt);
 754                stats->collisions++;
 755                goto tx_error;
 756        }
 757
 758        df = tiph->frag_off;
 759        if (df)
 760                mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
 761        else
 762                mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 763
 764        if (skb_dst(skb))
 765                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 766
 767        if (skb->protocol == htons(ETH_P_IP)) {
 768                df |= (old_iph->frag_off&htons(IP_DF));
 769
 770                if ((old_iph->frag_off&htons(IP_DF)) &&
 771                    mtu < ntohs(old_iph->tot_len)) {
 772                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 773                        ip_rt_put(rt);
 774                        goto tx_error;
 775                }
 776        }
 777#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 778        else if (skb->protocol == htons(ETH_P_IPV6)) {
 779                struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 780
 781                if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
 782                        if ((tunnel->parms.iph.daddr &&
 783                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 784                            rt6->rt6i_dst.plen == 128) {
 785                                rt6->rt6i_flags |= RTF_MODIFIED;
 786                                skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
 787                        }
 788                }
 789
 790                if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
 791                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 792                        ip_rt_put(rt);
 793                        goto tx_error;
 794                }
 795        }
 796#endif
 797
 798        if (tunnel->err_count > 0) {
 799                if (time_before(jiffies,
 800                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 801                        tunnel->err_count--;
 802
 803                        dst_link_failure(skb);
 804                } else
 805                        tunnel->err_count = 0;
 806        }
 807
 808        max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
 809
 810        if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
 811            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 812                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 813                if (max_headroom > dev->needed_headroom)
 814                        dev->needed_headroom = max_headroom;
 815                if (!new_skb) {
 816                        ip_rt_put(rt);
 817                        txq->tx_dropped++;
 818                        dev_kfree_skb(skb);
 819                        return NETDEV_TX_OK;
 820                }
 821                if (skb->sk)
 822                        skb_set_owner_w(new_skb, skb->sk);
 823                dev_kfree_skb(skb);
 824                skb = new_skb;
 825                old_iph = ip_hdr(skb);
 826        }
 827
 828        skb_reset_transport_header(skb);
 829        skb_push(skb, gre_hlen);
 830        skb_reset_network_header(skb);
 831        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 832        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 833                              IPSKB_REROUTED);
 834        skb_dst_drop(skb);
 835        skb_dst_set(skb, &rt->dst);
 836
 837        /*
 838         *      Push down and install the IPIP header.
 839         */
 840
 841        iph                     =       ip_hdr(skb);
 842        iph->version            =       4;
 843        iph->ihl                =       sizeof(struct iphdr) >> 2;
 844        iph->frag_off           =       df;
 845        iph->protocol           =       IPPROTO_GRE;
 846        iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
 847        iph->daddr              =       rt->rt_dst;
 848        iph->saddr              =       rt->rt_src;
 849
 850        if ((iph->ttl = tiph->ttl) == 0) {
 851                if (skb->protocol == htons(ETH_P_IP))
 852                        iph->ttl = old_iph->ttl;
 853#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 854                else if (skb->protocol == htons(ETH_P_IPV6))
 855                        iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 856#endif
 857                else
 858                        iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
 859        }
 860
 861        ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
 862        ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
 863                                   htons(ETH_P_TEB) : skb->protocol;
 864
 865        if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
 866                __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
 867
 868                if (tunnel->parms.o_flags&GRE_SEQ) {
 869                        ++tunnel->o_seqno;
 870                        *ptr = htonl(tunnel->o_seqno);
 871                        ptr--;
 872                }
 873                if (tunnel->parms.o_flags&GRE_KEY) {
 874                        *ptr = tunnel->parms.o_key;
 875                        ptr--;
 876                }
 877                if (tunnel->parms.o_flags&GRE_CSUM) {
 878                        *ptr = 0;
 879                        *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
 880                }
 881        }
 882
 883        nf_reset(skb);
 884
 885        IPTUNNEL_XMIT();
 886        return NETDEV_TX_OK;
 887
 888tx_error_icmp:
 889        dst_link_failure(skb);
 890
 891tx_error:
 892        stats->tx_errors++;
 893        dev_kfree_skb(skb);
 894        return NETDEV_TX_OK;
 895}
 896
 897static int ipgre_tunnel_bind_dev(struct net_device *dev)
 898{
 899        struct net_device *tdev = NULL;
 900        struct ip_tunnel *tunnel;
 901        struct iphdr *iph;
 902        int hlen = LL_MAX_HEADER;
 903        int mtu = ETH_DATA_LEN;
 904        int addend = sizeof(struct iphdr) + 4;
 905
 906        tunnel = netdev_priv(dev);
 907        iph = &tunnel->parms.iph;
 908
 909        /* Guess output device to choose reasonable mtu and needed_headroom */
 910
 911        if (iph->daddr) {
 912                struct flowi fl = { .oif = tunnel->parms.link,
 913                                    .nl_u = { .ip4_u =
 914                                              { .daddr = iph->daddr,
 915                                                .saddr = iph->saddr,
 916                                                .tos = RT_TOS(iph->tos) } },
 917                                    .proto = IPPROTO_GRE };
 918                struct rtable *rt;
 919                if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
 920                        tdev = rt->dst.dev;
 921                        ip_rt_put(rt);
 922                }
 923
 924                if (dev->type != ARPHRD_ETHER)
 925                        dev->flags |= IFF_POINTOPOINT;
 926        }
 927
 928        if (!tdev && tunnel->parms.link)
 929                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 930
 931        if (tdev) {
 932                hlen = tdev->hard_header_len + tdev->needed_headroom;
 933                mtu = tdev->mtu;
 934        }
 935        dev->iflink = tunnel->parms.link;
 936
 937        /* Precalculate GRE options length */
 938        if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
 939                if (tunnel->parms.o_flags&GRE_CSUM)
 940                        addend += 4;
 941                if (tunnel->parms.o_flags&GRE_KEY)
 942                        addend += 4;
 943                if (tunnel->parms.o_flags&GRE_SEQ)
 944                        addend += 4;
 945        }
 946        dev->needed_headroom = addend + hlen;
 947        mtu -= dev->hard_header_len + addend;
 948
 949        if (mtu < 68)
 950                mtu = 68;
 951
 952        tunnel->hlen = addend;
 953
 954        return mtu;
 955}
 956
 957static int
 958ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 959{
 960        int err = 0;
 961        struct ip_tunnel_parm p;
 962        struct ip_tunnel *t;
 963        struct net *net = dev_net(dev);
 964        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 965
 966        switch (cmd) {
 967        case SIOCGETTUNNEL:
 968                t = NULL;
 969                if (dev == ign->fb_tunnel_dev) {
 970                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 971                                err = -EFAULT;
 972                                break;
 973                        }
 974                        t = ipgre_tunnel_locate(net, &p, 0);
 975                }
 976                if (t == NULL)
 977                        t = netdev_priv(dev);
 978                memcpy(&p, &t->parms, sizeof(p));
 979                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 980                        err = -EFAULT;
 981                break;
 982
 983        case SIOCADDTUNNEL:
 984        case SIOCCHGTUNNEL:
 985                err = -EPERM;
 986                if (!capable(CAP_NET_ADMIN))
 987                        goto done;
 988
 989                err = -EFAULT;
 990                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 991                        goto done;
 992
 993                err = -EINVAL;
 994                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
 995                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
 996                    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
 997                        goto done;
 998                if (p.iph.ttl)
 999                        p.iph.frag_off |= htons(IP_DF);
1000
1001                if (!(p.i_flags&GRE_KEY))
1002                        p.i_key = 0;
1003                if (!(p.o_flags&GRE_KEY))
1004                        p.o_key = 0;
1005
1006                t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1007
1008                if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1009                        if (t != NULL) {
1010                                if (t->dev != dev) {
1011                                        err = -EEXIST;
1012                                        break;
1013                                }
1014                        } else {
1015                                unsigned nflags = 0;
1016
1017                                t = netdev_priv(dev);
1018
1019                                if (ipv4_is_multicast(p.iph.daddr))
1020                                        nflags = IFF_BROADCAST;
1021                                else if (p.iph.daddr)
1022                                        nflags = IFF_POINTOPOINT;
1023
1024                                if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1025                                        err = -EINVAL;
1026                                        break;
1027                                }
1028                                ipgre_tunnel_unlink(ign, t);
1029                                t->parms.iph.saddr = p.iph.saddr;
1030                                t->parms.iph.daddr = p.iph.daddr;
1031                                t->parms.i_key = p.i_key;
1032                                t->parms.o_key = p.o_key;
1033                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
1034                                memcpy(dev->broadcast, &p.iph.daddr, 4);
1035                                ipgre_tunnel_link(ign, t);
1036                                netdev_state_change(dev);
1037                        }
1038                }
1039
1040                if (t) {
1041                        err = 0;
1042                        if (cmd == SIOCCHGTUNNEL) {
1043                                t->parms.iph.ttl = p.iph.ttl;
1044                                t->parms.iph.tos = p.iph.tos;
1045                                t->parms.iph.frag_off = p.iph.frag_off;
1046                                if (t->parms.link != p.link) {
1047                                        t->parms.link = p.link;
1048                                        dev->mtu = ipgre_tunnel_bind_dev(dev);
1049                                        netdev_state_change(dev);
1050                                }
1051                        }
1052                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1053                                err = -EFAULT;
1054                } else
1055                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1056                break;
1057
1058        case SIOCDELTUNNEL:
1059                err = -EPERM;
1060                if (!capable(CAP_NET_ADMIN))
1061                        goto done;
1062
1063                if (dev == ign->fb_tunnel_dev) {
1064                        err = -EFAULT;
1065                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1066                                goto done;
1067                        err = -ENOENT;
1068                        if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1069                                goto done;
1070                        err = -EPERM;
1071                        if (t == netdev_priv(ign->fb_tunnel_dev))
1072                                goto done;
1073                        dev = t->dev;
1074                }
1075                unregister_netdevice(dev);
1076                err = 0;
1077                break;
1078
1079        default:
1080                err = -EINVAL;
1081        }
1082
1083done:
1084        return err;
1085}
1086
1087static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1088{
1089        struct ip_tunnel *tunnel = netdev_priv(dev);
1090        if (new_mtu < 68 ||
1091            new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1092                return -EINVAL;
1093        dev->mtu = new_mtu;
1094        return 0;
1095}
1096
1097/* Nice toy. Unfortunately, useless in real life :-)
1098   It allows to construct virtual multiprotocol broadcast "LAN"
1099   over the Internet, provided multicast routing is tuned.
1100
1101
1102   I have no idea was this bicycle invented before me,
1103   so that I had to set ARPHRD_IPGRE to a random value.
1104   I have an impression, that Cisco could make something similar,
1105   but this feature is apparently missing in IOS<=11.2(8).
1106
1107   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1108   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1109
1110   ping -t 255 224.66.66.66
1111
1112   If nobody answers, mbone does not work.
1113
1114   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1115   ip addr add 10.66.66.<somewhat>/24 dev Universe
1116   ifconfig Universe up
1117   ifconfig Universe add fe80::<Your_real_addr>/10
1118   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1119   ftp 10.66.66.66
1120   ...
1121   ftp fec0:6666:6666::193.233.7.65
1122   ...
1123
1124 */
1125
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127                        unsigned short type,
1128                        const void *daddr, const void *saddr, unsigned len)
1129{
1130        struct ip_tunnel *t = netdev_priv(dev);
1131        struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1132        __be16 *p = (__be16*)(iph+1);
1133
1134        memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1135        p[0]            = t->parms.o_flags;
1136        p[1]            = htons(type);
1137
1138        /*
1139         *      Set the source hardware address.
1140         */
1141
1142        if (saddr)
1143                memcpy(&iph->saddr, saddr, 4);
1144        if (daddr)
1145                memcpy(&iph->daddr, daddr, 4);
1146        if (iph->daddr)
1147                return t->hlen;
1148
1149        return -t->hlen;
1150}
1151
1152static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1153{
1154        struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1155        memcpy(haddr, &iph->saddr, 4);
1156        return 4;
1157}
1158
1159static const struct header_ops ipgre_header_ops = {
1160        .create = ipgre_header,
1161        .parse  = ipgre_header_parse,
1162};
1163
1164#ifdef CONFIG_NET_IPGRE_BROADCAST
1165static int ipgre_open(struct net_device *dev)
1166{
1167        struct ip_tunnel *t = netdev_priv(dev);
1168
1169        if (ipv4_is_multicast(t->parms.iph.daddr)) {
1170                struct flowi fl = { .oif = t->parms.link,
1171                                    .nl_u = { .ip4_u =
1172                                              { .daddr = t->parms.iph.daddr,
1173                                                .saddr = t->parms.iph.saddr,
1174                                                .tos = RT_TOS(t->parms.iph.tos) } },
1175                                    .proto = IPPROTO_GRE };
1176                struct rtable *rt;
1177                if (ip_route_output_key(dev_net(dev), &rt, &fl))
1178                        return -EADDRNOTAVAIL;
1179                dev = rt->dst.dev;
1180                ip_rt_put(rt);
1181                if (__in_dev_get_rtnl(dev) == NULL)
1182                        return -EADDRNOTAVAIL;
1183                t->mlink = dev->ifindex;
1184                ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1185        }
1186        return 0;
1187}
1188
1189static int ipgre_close(struct net_device *dev)
1190{
1191        struct ip_tunnel *t = netdev_priv(dev);
1192
1193        if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1194                struct in_device *in_dev;
1195                in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1196                if (in_dev) {
1197                        ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1198                        in_dev_put(in_dev);
1199                }
1200        }
1201        return 0;
1202}
1203
1204#endif
1205
1206static const struct net_device_ops ipgre_netdev_ops = {
1207        .ndo_init               = ipgre_tunnel_init,
1208        .ndo_uninit             = ipgre_tunnel_uninit,
1209#ifdef CONFIG_NET_IPGRE_BROADCAST
1210        .ndo_open               = ipgre_open,
1211        .ndo_stop               = ipgre_close,
1212#endif
1213        .ndo_start_xmit         = ipgre_tunnel_xmit,
1214        .ndo_do_ioctl           = ipgre_tunnel_ioctl,
1215        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1216};
1217
1218static void ipgre_tunnel_setup(struct net_device *dev)
1219{
1220        dev->netdev_ops         = &ipgre_netdev_ops;
1221        dev->destructor         = free_netdev;
1222
1223        dev->type               = ARPHRD_IPGRE;
1224        dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1225        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1226        dev->flags              = IFF_NOARP;
1227        dev->iflink             = 0;
1228        dev->addr_len           = 4;
1229        dev->features           |= NETIF_F_NETNS_LOCAL;
1230        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
1231}
1232
1233static int ipgre_tunnel_init(struct net_device *dev)
1234{
1235        struct ip_tunnel *tunnel;
1236        struct iphdr *iph;
1237
1238        tunnel = netdev_priv(dev);
1239        iph = &tunnel->parms.iph;
1240
1241        tunnel->dev = dev;
1242        strcpy(tunnel->parms.name, dev->name);
1243
1244        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1245        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1246
1247        if (iph->daddr) {
1248#ifdef CONFIG_NET_IPGRE_BROADCAST
1249                if (ipv4_is_multicast(iph->daddr)) {
1250                        if (!iph->saddr)
1251                                return -EINVAL;
1252                        dev->flags = IFF_BROADCAST;
1253                        dev->header_ops = &ipgre_header_ops;
1254                }
1255#endif
1256        } else
1257                dev->header_ops = &ipgre_header_ops;
1258
1259        return 0;
1260}
1261
1262static void ipgre_fb_tunnel_init(struct net_device *dev)
1263{
1264        struct ip_tunnel *tunnel = netdev_priv(dev);
1265        struct iphdr *iph = &tunnel->parms.iph;
1266        struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1267
1268        tunnel->dev = dev;
1269        strcpy(tunnel->parms.name, dev->name);
1270
1271        iph->version            = 4;
1272        iph->protocol           = IPPROTO_GRE;
1273        iph->ihl                = 5;
1274        tunnel->hlen            = sizeof(struct iphdr) + 4;
1275
1276        dev_hold(dev);
1277        ign->tunnels_wc[0]      = tunnel;
1278}
1279
1280
1281static const struct net_protocol ipgre_protocol = {
1282        .handler        =       ipgre_rcv,
1283        .err_handler    =       ipgre_err,
1284        .netns_ok       =       1,
1285};
1286
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1288{
1289        int prio;
1290
1291        for (prio = 0; prio < 4; prio++) {
1292                int h;
1293                for (h = 0; h < HASH_SIZE; h++) {
1294                        struct ip_tunnel *t = ign->tunnels[prio][h];
1295
1296                        while (t != NULL) {
1297                                unregister_netdevice_queue(t->dev, head);
1298                                t = t->next;
1299                        }
1300                }
1301        }
1302}
1303
1304static int __net_init ipgre_init_net(struct net *net)
1305{
1306        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1307        int err;
1308
1309        ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1310                                           ipgre_tunnel_setup);
1311        if (!ign->fb_tunnel_dev) {
1312                err = -ENOMEM;
1313                goto err_alloc_dev;
1314        }
1315        dev_net_set(ign->fb_tunnel_dev, net);
1316
1317        ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1318        ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1319
1320        if ((err = register_netdev(ign->fb_tunnel_dev)))
1321                goto err_reg_dev;
1322
1323        return 0;
1324
1325err_reg_dev:
1326        free_netdev(ign->fb_tunnel_dev);
1327err_alloc_dev:
1328        return err;
1329}
1330
1331static void __net_exit ipgre_exit_net(struct net *net)
1332{
1333        struct ipgre_net *ign;
1334        LIST_HEAD(list);
1335
1336        ign = net_generic(net, ipgre_net_id);
1337        rtnl_lock();
1338        ipgre_destroy_tunnels(ign, &list);
1339        unregister_netdevice_many(&list);
1340        rtnl_unlock();
1341}
1342
1343static struct pernet_operations ipgre_net_ops = {
1344        .init = ipgre_init_net,
1345        .exit = ipgre_exit_net,
1346        .id   = &ipgre_net_id,
1347        .size = sizeof(struct ipgre_net),
1348};
1349
1350static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1351{
1352        __be16 flags;
1353
1354        if (!data)
1355                return 0;
1356
1357        flags = 0;
1358        if (data[IFLA_GRE_IFLAGS])
1359                flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1360        if (data[IFLA_GRE_OFLAGS])
1361                flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1362        if (flags & (GRE_VERSION|GRE_ROUTING))
1363                return -EINVAL;
1364
1365        return 0;
1366}
1367
1368static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1369{
1370        __be32 daddr;
1371
1372        if (tb[IFLA_ADDRESS]) {
1373                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1374                        return -EINVAL;
1375                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1376                        return -EADDRNOTAVAIL;
1377        }
1378
1379        if (!data)
1380                goto out;
1381
1382        if (data[IFLA_GRE_REMOTE]) {
1383                memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1384                if (!daddr)
1385                        return -EINVAL;
1386        }
1387
1388out:
1389        return ipgre_tunnel_validate(tb, data);
1390}
1391
1392static void ipgre_netlink_parms(struct nlattr *data[],
1393                                struct ip_tunnel_parm *parms)
1394{
1395        memset(parms, 0, sizeof(*parms));
1396
1397        parms->iph.protocol = IPPROTO_GRE;
1398
1399        if (!data)
1400                return;
1401
1402        if (data[IFLA_GRE_LINK])
1403                parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1404
1405        if (data[IFLA_GRE_IFLAGS])
1406                parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1407
1408        if (data[IFLA_GRE_OFLAGS])
1409                parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1410
1411        if (data[IFLA_GRE_IKEY])
1412                parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1413
1414        if (data[IFLA_GRE_OKEY])
1415                parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1416
1417        if (data[IFLA_GRE_LOCAL])
1418                parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1419
1420        if (data[IFLA_GRE_REMOTE])
1421                parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1422
1423        if (data[IFLA_GRE_TTL])
1424                parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1425
1426        if (data[IFLA_GRE_TOS])
1427                parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1428
1429        if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1430                parms->iph.frag_off = htons(IP_DF);
1431}
1432
1433static int ipgre_tap_init(struct net_device *dev)
1434{
1435        struct ip_tunnel *tunnel;
1436
1437        tunnel = netdev_priv(dev);
1438
1439        tunnel->dev = dev;
1440        strcpy(tunnel->parms.name, dev->name);
1441
1442        ipgre_tunnel_bind_dev(dev);
1443
1444        return 0;
1445}
1446
1447static const struct net_device_ops ipgre_tap_netdev_ops = {
1448        .ndo_init               = ipgre_tap_init,
1449        .ndo_uninit             = ipgre_tunnel_uninit,
1450        .ndo_start_xmit         = ipgre_tunnel_xmit,
1451        .ndo_set_mac_address    = eth_mac_addr,
1452        .ndo_validate_addr      = eth_validate_addr,
1453        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1454};
1455
1456static void ipgre_tap_setup(struct net_device *dev)
1457{
1458
1459        ether_setup(dev);
1460
1461        dev->netdev_ops         = &ipgre_tap_netdev_ops;
1462        dev->destructor         = free_netdev;
1463
1464        dev->iflink             = 0;
1465        dev->features           |= NETIF_F_NETNS_LOCAL;
1466}
1467
1468static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1469                         struct nlattr *data[])
1470{
1471        struct ip_tunnel *nt;
1472        struct net *net = dev_net(dev);
1473        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1474        int mtu;
1475        int err;
1476
1477        nt = netdev_priv(dev);
1478        ipgre_netlink_parms(data, &nt->parms);
1479
1480        if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1481                return -EEXIST;
1482
1483        if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1484                random_ether_addr(dev->dev_addr);
1485
1486        mtu = ipgre_tunnel_bind_dev(dev);
1487        if (!tb[IFLA_MTU])
1488                dev->mtu = mtu;
1489
1490        err = register_netdevice(dev);
1491        if (err)
1492                goto out;
1493
1494        dev_hold(dev);
1495        ipgre_tunnel_link(ign, nt);
1496
1497out:
1498        return err;
1499}
1500
1501static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1502                            struct nlattr *data[])
1503{
1504        struct ip_tunnel *t, *nt;
1505        struct net *net = dev_net(dev);
1506        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1507        struct ip_tunnel_parm p;
1508        int mtu;
1509
1510        if (dev == ign->fb_tunnel_dev)
1511                return -EINVAL;
1512
1513        nt = netdev_priv(dev);
1514        ipgre_netlink_parms(data, &p);
1515
1516        t = ipgre_tunnel_locate(net, &p, 0);
1517
1518        if (t) {
1519                if (t->dev != dev)
1520                        return -EEXIST;
1521        } else {
1522                t = nt;
1523
1524                if (dev->type != ARPHRD_ETHER) {
1525                        unsigned nflags = 0;
1526
1527                        if (ipv4_is_multicast(p.iph.daddr))
1528                                nflags = IFF_BROADCAST;
1529                        else if (p.iph.daddr)
1530                                nflags = IFF_POINTOPOINT;
1531
1532                        if ((dev->flags ^ nflags) &
1533                            (IFF_POINTOPOINT | IFF_BROADCAST))
1534                                return -EINVAL;
1535                }
1536
1537                ipgre_tunnel_unlink(ign, t);
1538                t->parms.iph.saddr = p.iph.saddr;
1539                t->parms.iph.daddr = p.iph.daddr;
1540                t->parms.i_key = p.i_key;
1541                if (dev->type != ARPHRD_ETHER) {
1542                        memcpy(dev->dev_addr, &p.iph.saddr, 4);
1543                        memcpy(dev->broadcast, &p.iph.daddr, 4);
1544                }
1545                ipgre_tunnel_link(ign, t);
1546                netdev_state_change(dev);
1547        }
1548
1549        t->parms.o_key = p.o_key;
1550        t->parms.iph.ttl = p.iph.ttl;
1551        t->parms.iph.tos = p.iph.tos;
1552        t->parms.iph.frag_off = p.iph.frag_off;
1553
1554        if (t->parms.link != p.link) {
1555                t->parms.link = p.link;
1556                mtu = ipgre_tunnel_bind_dev(dev);
1557                if (!tb[IFLA_MTU])
1558                        dev->mtu = mtu;
1559                netdev_state_change(dev);
1560        }
1561
1562        return 0;
1563}
1564
1565static size_t ipgre_get_size(const struct net_device *dev)
1566{
1567        return
1568                /* IFLA_GRE_LINK */
1569                nla_total_size(4) +
1570                /* IFLA_GRE_IFLAGS */
1571                nla_total_size(2) +
1572                /* IFLA_GRE_OFLAGS */
1573                nla_total_size(2) +
1574                /* IFLA_GRE_IKEY */
1575                nla_total_size(4) +
1576                /* IFLA_GRE_OKEY */
1577                nla_total_size(4) +
1578                /* IFLA_GRE_LOCAL */
1579                nla_total_size(4) +
1580                /* IFLA_GRE_REMOTE */
1581                nla_total_size(4) +
1582                /* IFLA_GRE_TTL */
1583                nla_total_size(1) +
1584                /* IFLA_GRE_TOS */
1585                nla_total_size(1) +
1586                /* IFLA_GRE_PMTUDISC */
1587                nla_total_size(1) +
1588                0;
1589}
1590
1591static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1592{
1593        struct ip_tunnel *t = netdev_priv(dev);
1594        struct ip_tunnel_parm *p = &t->parms;
1595
1596        NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1597        NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1598        NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1599        NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1600        NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1601        NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1602        NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1603        NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1604        NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1605        NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1606
1607        return 0;
1608
1609nla_put_failure:
1610        return -EMSGSIZE;
1611}
1612
1613static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1614        [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1615        [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1616        [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1617        [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1618        [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1619        [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1620        [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1621        [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1622        [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1623        [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1624};
1625
1626static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1627        .kind           = "gre",
1628        .maxtype        = IFLA_GRE_MAX,
1629        .policy         = ipgre_policy,
1630        .priv_size      = sizeof(struct ip_tunnel),
1631        .setup          = ipgre_tunnel_setup,
1632        .validate       = ipgre_tunnel_validate,
1633        .newlink        = ipgre_newlink,
1634        .changelink     = ipgre_changelink,
1635        .get_size       = ipgre_get_size,
1636        .fill_info      = ipgre_fill_info,
1637};
1638
1639static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1640        .kind           = "gretap",
1641        .maxtype        = IFLA_GRE_MAX,
1642        .policy         = ipgre_policy,
1643        .priv_size      = sizeof(struct ip_tunnel),
1644        .setup          = ipgre_tap_setup,
1645        .validate       = ipgre_tap_validate,
1646        .newlink        = ipgre_newlink,
1647        .changelink     = ipgre_changelink,
1648        .get_size       = ipgre_get_size,
1649        .fill_info      = ipgre_fill_info,
1650};
1651
1652/*
1653 *      And now the modules code and kernel interface.
1654 */
1655
1656static int __init ipgre_init(void)
1657{
1658        int err;
1659
1660        printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1661
1662        err = register_pernet_device(&ipgre_net_ops);
1663        if (err < 0)
1664                return err;
1665
1666        err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1667        if (err < 0) {
1668                printk(KERN_INFO "ipgre init: can't add protocol\n");
1669                goto add_proto_failed;
1670        }
1671
1672        err = rtnl_link_register(&ipgre_link_ops);
1673        if (err < 0)
1674                goto rtnl_link_failed;
1675
1676        err = rtnl_link_register(&ipgre_tap_ops);
1677        if (err < 0)
1678                goto tap_ops_failed;
1679
1680out:
1681        return err;
1682
1683tap_ops_failed:
1684        rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed:
1686        inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1687add_proto_failed:
1688        unregister_pernet_device(&ipgre_net_ops);
1689        goto out;
1690}
1691
1692static void __exit ipgre_fini(void)
1693{
1694        rtnl_link_unregister(&ipgre_tap_ops);
1695        rtnl_link_unregister(&ipgre_link_ops);
1696        if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1697                printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698        unregister_pernet_device(&ipgre_net_ops);
1699}
1700
1701module_init(ipgre_init);
1702module_exit(ipgre_fini);
1703MODULE_LICENSE("GPL");
1704MODULE_ALIAS_RTNL_LINK("gre");
1705MODULE_ALIAS_RTNL_LINK("gretap");
1706
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.