linux/net/ipv4/ip_gre.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     GRE over IP protocol decoder.
   3 *
   4 *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   5 *
   6 *      This program is free software; you can redistribute it and/or
   7 *      modify it under the terms of the GNU General Public License
   8 *      as published by the Free Software Foundation; either version
   9 *      2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/capability.h>
  16#include <linux/module.h>
  17#include <linux/types.h>
  18#include <linux/kernel.h>
  19#include <linux/slab.h>
  20#include <asm/uaccess.h>
  21#include <linux/skbuff.h>
  22#include <linux/netdevice.h>
  23#include <linux/in.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/if_arp.h>
  27#include <linux/mroute.h>
  28#include <linux/init.h>
  29#include <linux/in6.h>
  30#include <linux/inetdevice.h>
  31#include <linux/igmp.h>
  32#include <linux/netfilter_ipv4.h>
  33#include <linux/etherdevice.h>
  34#include <linux/if_ether.h>
  35
  36#include <net/sock.h>
  37#include <net/ip.h>
  38#include <net/icmp.h>
  39#include <net/protocol.h>
  40#include <net/ipip.h>
  41#include <net/arp.h>
  42#include <net/checksum.h>
  43#include <net/dsfield.h>
  44#include <net/inet_ecn.h>
  45#include <net/xfrm.h>
  46#include <net/net_namespace.h>
  47#include <net/netns/generic.h>
  48#include <net/rtnetlink.h>
  49#include <net/gre.h>
  50
  51#if IS_ENABLED(CONFIG_IPV6)
  52#include <net/ipv6.h>
  53#include <net/ip6_fib.h>
  54#include <net/ip6_route.h>
  55#endif
  56
  57/*
  58   Problems & solutions
  59   --------------------
  60
  61   1. The most important issue is detecting local dead loops.
  62   They would cause complete host lockup in transmit, which
  63   would be "resolved" by stack overflow or, if queueing is enabled,
  64   with infinite looping in net_bh.
  65
  66   We cannot track such dead loops during route installation,
  67   it is infeasible task. The most general solutions would be
  68   to keep skb->encapsulation counter (sort of local ttl),
  69   and silently drop packet when it expires. It is a good
  70   solution, but it supposes maintaining new variable in ALL
  71   skb, even if no tunneling is used.
  72
  73   Current solution: xmit_recursion breaks dead loops. This is a percpu
  74   counter, since when we enter the first ndo_xmit(), cpu migration is
  75   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
  76
  77   2. Networking dead loops would not kill routers, but would really
  78   kill network. IP hop limit plays role of "t->recursion" in this case,
  79   if we copy it from packet being encapsulated to upper header.
  80   It is very good solution, but it introduces two problems:
  81
  82   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  83     do not work over tunnels.
  84   - traceroute does not work. I planned to relay ICMP from tunnel,
  85     so that this problem would be solved and traceroute output
  86     would even more informative. This idea appeared to be wrong:
  87     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  88     true router now :-)), all routers (at least, in neighbourhood of mine)
  89     return only 8 bytes of payload. It is the end.
  90
  91   Hence, if we want that OSPF worked or traceroute said something reasonable,
  92   we should search for another solution.
  93
  94   One of them is to parse packet trying to detect inner encapsulation
  95   made by our node. It is difficult or even impossible, especially,
  96   taking into account fragmentation. TO be short, ttl is not solution at all.
  97
  98   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  99   We force DF flag on tunnels with preconfigured hop limit,
 100   that is ALL. :-) Well, it does not remove the problem completely,
 101   but exponential growth of network traffic is changed to linear
 102   (branches, that exceed pmtu are pruned) and tunnel mtu
 103   rapidly degrades to value <68, where looping stops.
 104   Yes, it is not good if there exists a router in the loop,
 105   which does not force DF, even when encapsulating packets have DF set.
 106   But it is not our problem! Nobody could accuse us, we made
 107   all that we could make. Even if it is your gated who injected
 108   fatal route to network, even if it were you who configured
 109   fatal static route: you are innocent. :-)
 110
 111
 112
 113   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
 114   practically identical code. It would be good to glue them
 115   together, but it is not very evident, how to make them modular.
 116   sit is integral part of IPv6, ipip and gre are naturally modular.
 117   We could extract common parts (hash table, ioctl etc)
 118   to a separate module (ip_tunnel.c).
 119
 120   Alexey Kuznetsov.
 121 */
 122
 123static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 124static int ipgre_tunnel_init(struct net_device *dev);
 125static void ipgre_tunnel_setup(struct net_device *dev);
 126static int ipgre_tunnel_bind_dev(struct net_device *dev);
 127
 128/* Fallback tunnel: no source, no destination, no key, no options */
 129
 130#define HASH_SIZE  16
 131
 132static int ipgre_net_id __read_mostly;
 133struct ipgre_net {
 134        struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
 135
 136        struct net_device *fb_tunnel_dev;
 137};
 138
 139/* Tunnel hash table */
 140
 141/*
 142   4 hash tables:
 143
 144   3: (remote,local)
 145   2: (remote,*)
 146   1: (*,local)
 147   0: (*,*)
 148
 149   We require exact key match i.e. if a key is present in packet
 150   it will match only tunnel with the same key; if it is not present,
 151   it will match only keyless tunnel.
 152
 153   All keysless packets, if not matched configured keyless tunnels
 154   will match fallback tunnel.
 155 */
 156
 157#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 158
 159#define tunnels_r_l     tunnels[3]
 160#define tunnels_r       tunnels[2]
 161#define tunnels_l       tunnels[1]
 162#define tunnels_wc      tunnels[0]
 163/*
 164 * Locking : hash tables are protected by RCU and RTNL
 165 */
 166
 167#define for_each_ip_tunnel_rcu(start) \
 168        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 169
 170/* often modified stats are per cpu, other are shared (netdev->stats) */
 171struct pcpu_tstats {
 172        u64     rx_packets;
 173        u64     rx_bytes;
 174        u64     tx_packets;
 175        u64     tx_bytes;
 176        struct u64_stats_sync   syncp;
 177};
 178
 179static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
 180                                                   struct rtnl_link_stats64 *tot)
 181{
 182        int i;
 183
 184        for_each_possible_cpu(i) {
 185                const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 186                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
 187                unsigned int start;
 188
 189                do {
 190                        start = u64_stats_fetch_begin_bh(&tstats->syncp);
 191                        rx_packets = tstats->rx_packets;
 192                        tx_packets = tstats->tx_packets;
 193                        rx_bytes = tstats->rx_bytes;
 194                        tx_bytes = tstats->tx_bytes;
 195                } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
 196
 197                tot->rx_packets += rx_packets;
 198                tot->tx_packets += tx_packets;
 199                tot->rx_bytes   += rx_bytes;
 200                tot->tx_bytes   += tx_bytes;
 201        }
 202
 203        tot->multicast = dev->stats.multicast;
 204        tot->rx_crc_errors = dev->stats.rx_crc_errors;
 205        tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
 206        tot->rx_length_errors = dev->stats.rx_length_errors;
 207        tot->rx_errors = dev->stats.rx_errors;
 208        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 209        tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 210        tot->tx_dropped = dev->stats.tx_dropped;
 211        tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
 212        tot->tx_errors = dev->stats.tx_errors;
 213
 214        return tot;
 215}
 216
 217/* Given src, dst and key, find appropriate for input tunnel. */
 218
 219static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
 220                                             __be32 remote, __be32 local,
 221                                             __be32 key, __be16 gre_proto)
 222{
 223        struct net *net = dev_net(dev);
 224        int link = dev->ifindex;
 225        unsigned int h0 = HASH(remote);
 226        unsigned int h1 = HASH(key);
 227        struct ip_tunnel *t, *cand = NULL;
 228        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 229        int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
 230                       ARPHRD_ETHER : ARPHRD_IPGRE;
 231        int score, cand_score = 4;
 232
 233        for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
 234                if (local != t->parms.iph.saddr ||
 235                    remote != t->parms.iph.daddr ||
 236                    key != t->parms.i_key ||
 237                    !(t->dev->flags & IFF_UP))
 238                        continue;
 239
 240                if (t->dev->type != ARPHRD_IPGRE &&
 241                    t->dev->type != dev_type)
 242                        continue;
 243
 244                score = 0;
 245                if (t->parms.link != link)
 246                        score |= 1;
 247                if (t->dev->type != dev_type)
 248                        score |= 2;
 249                if (score == 0)
 250                        return t;
 251
 252                if (score < cand_score) {
 253                        cand = t;
 254                        cand_score = score;
 255                }
 256        }
 257
 258        for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
 259                if (remote != t->parms.iph.daddr ||
 260                    key != t->parms.i_key ||
 261                    !(t->dev->flags & IFF_UP))
 262                        continue;
 263
 264                if (t->dev->type != ARPHRD_IPGRE &&
 265                    t->dev->type != dev_type)
 266                        continue;
 267
 268                score = 0;
 269                if (t->parms.link != link)
 270                        score |= 1;
 271                if (t->dev->type != dev_type)
 272                        score |= 2;
 273                if (score == 0)
 274                        return t;
 275
 276                if (score < cand_score) {
 277                        cand = t;
 278                        cand_score = score;
 279                }
 280        }
 281
 282        for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
 283                if ((local != t->parms.iph.saddr &&
 284                     (local != t->parms.iph.daddr ||
 285                      !ipv4_is_multicast(local))) ||
 286                    key != t->parms.i_key ||
 287                    !(t->dev->flags & IFF_UP))
 288                        continue;
 289
 290                if (t->dev->type != ARPHRD_IPGRE &&
 291                    t->dev->type != dev_type)
 292                        continue;
 293
 294                score = 0;
 295                if (t->parms.link != link)
 296                        score |= 1;
 297                if (t->dev->type != dev_type)
 298                        score |= 2;
 299                if (score == 0)
 300                        return t;
 301
 302                if (score < cand_score) {
 303                        cand = t;
 304                        cand_score = score;
 305                }
 306        }
 307
 308        for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
 309                if (t->parms.i_key != key ||
 310                    !(t->dev->flags & IFF_UP))
 311                        continue;
 312
 313                if (t->dev->type != ARPHRD_IPGRE &&
 314                    t->dev->type != dev_type)
 315                        continue;
 316
 317                score = 0;
 318                if (t->parms.link != link)
 319                        score |= 1;
 320                if (t->dev->type != dev_type)
 321                        score |= 2;
 322                if (score == 0)
 323                        return t;
 324
 325                if (score < cand_score) {
 326                        cand = t;
 327                        cand_score = score;
 328                }
 329        }
 330
 331        if (cand != NULL)
 332                return cand;
 333
 334        dev = ign->fb_tunnel_dev;
 335        if (dev->flags & IFF_UP)
 336                return netdev_priv(dev);
 337
 338        return NULL;
 339}
 340
 341static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
 342                struct ip_tunnel_parm *parms)
 343{
 344        __be32 remote = parms->iph.daddr;
 345        __be32 local = parms->iph.saddr;
 346        __be32 key = parms->i_key;
 347        unsigned int h = HASH(key);
 348        int prio = 0;
 349
 350        if (local)
 351                prio |= 1;
 352        if (remote && !ipv4_is_multicast(remote)) {
 353                prio |= 2;
 354                h ^= HASH(remote);
 355        }
 356
 357        return &ign->tunnels[prio][h];
 358}
 359
 360static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
 361                struct ip_tunnel *t)
 362{
 363        return __ipgre_bucket(ign, &t->parms);
 364}
 365
 366static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
 367{
 368        struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
 369
 370        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 371        rcu_assign_pointer(*tp, t);
 372}
 373
 374static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
 375{
 376        struct ip_tunnel __rcu **tp;
 377        struct ip_tunnel *iter;
 378
 379        for (tp = ipgre_bucket(ign, t);
 380             (iter = rtnl_dereference(*tp)) != NULL;
 381             tp = &iter->next) {
 382                if (t == iter) {
 383                        rcu_assign_pointer(*tp, t->next);
 384                        break;
 385                }
 386        }
 387}
 388
 389static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
 390                                           struct ip_tunnel_parm *parms,
 391                                           int type)
 392{
 393        __be32 remote = parms->iph.daddr;
 394        __be32 local = parms->iph.saddr;
 395        __be32 key = parms->i_key;
 396        int link = parms->link;
 397        struct ip_tunnel *t;
 398        struct ip_tunnel __rcu **tp;
 399        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 400
 401        for (tp = __ipgre_bucket(ign, parms);
 402             (t = rtnl_dereference(*tp)) != NULL;
 403             tp = &t->next)
 404                if (local == t->parms.iph.saddr &&
 405                    remote == t->parms.iph.daddr &&
 406                    key == t->parms.i_key &&
 407                    link == t->parms.link &&
 408                    type == t->dev->type)
 409                        break;
 410
 411        return t;
 412}
 413
 414static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
 415                struct ip_tunnel_parm *parms, int create)
 416{
 417        struct ip_tunnel *t, *nt;
 418        struct net_device *dev;
 419        char name[IFNAMSIZ];
 420        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 421
 422        t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
 423        if (t || !create)
 424                return t;
 425
 426        if (parms->name[0])
 427                strlcpy(name, parms->name, IFNAMSIZ);
 428        else
 429                strcpy(name, "gre%d");
 430
 431        dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
 432        if (!dev)
 433                return NULL;
 434
 435        dev_net_set(dev, net);
 436
 437        nt = netdev_priv(dev);
 438        nt->parms = *parms;
 439        dev->rtnl_link_ops = &ipgre_link_ops;
 440
 441        dev->mtu = ipgre_tunnel_bind_dev(dev);
 442
 443        if (register_netdevice(dev) < 0)
 444                goto failed_free;
 445
 446        /* Can use a lockless transmit, unless we generate output sequences */
 447        if (!(nt->parms.o_flags & GRE_SEQ))
 448                dev->features |= NETIF_F_LLTX;
 449
 450        dev_hold(dev);
 451        ipgre_tunnel_link(ign, nt);
 452        return nt;
 453
 454failed_free:
 455        free_netdev(dev);
 456        return NULL;
 457}
 458
 459static void ipgre_tunnel_uninit(struct net_device *dev)
 460{
 461        struct net *net = dev_net(dev);
 462        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
 463
 464        ipgre_tunnel_unlink(ign, netdev_priv(dev));
 465        dev_put(dev);
 466}
 467
 468
 469static void ipgre_err(struct sk_buff *skb, u32 info)
 470{
 471
 472/* All the routers (except for Linux) return only
 473   8 bytes of packet payload. It means, that precise relaying of
 474   ICMP in the real Internet is absolutely infeasible.
 475
 476   Moreover, Cisco "wise men" put GRE key to the third word
 477   in GRE header. It makes impossible maintaining even soft state for keyed
 478   GRE tunnels with enabled checksum. Tell them "thank you".
 479
 480   Well, I wonder, rfc1812 was written by Cisco employee,
 481   what the hell these idiots break standards established
 482   by themselves???
 483 */
 484
 485        const struct iphdr *iph = (const struct iphdr *)skb->data;
 486        __be16       *p = (__be16 *)(skb->data+(iph->ihl<<2));
 487        int grehlen = (iph->ihl<<2) + 4;
 488        const int type = icmp_hdr(skb)->type;
 489        const int code = icmp_hdr(skb)->code;
 490        struct ip_tunnel *t;
 491        __be16 flags;
 492
 493        flags = p[0];
 494        if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
 495                if (flags&(GRE_VERSION|GRE_ROUTING))
 496                        return;
 497                if (flags&GRE_KEY) {
 498                        grehlen += 4;
 499                        if (flags&GRE_CSUM)
 500                                grehlen += 4;
 501                }
 502        }
 503
 504        /* If only 8 bytes returned, keyed message will be dropped here */
 505        if (skb_headlen(skb) < grehlen)
 506                return;
 507
 508        switch (type) {
 509        default:
 510        case ICMP_PARAMETERPROB:
 511                return;
 512
 513        case ICMP_DEST_UNREACH:
 514                switch (code) {
 515                case ICMP_SR_FAILED:
 516                case ICMP_PORT_UNREACH:
 517                        /* Impossible event. */
 518                        return;
 519                default:
 520                        /* All others are translated to HOST_UNREACH.
 521                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 522                           I believe they are just ether pollution. --ANK
 523                         */
 524                        break;
 525                }
 526                break;
 527        case ICMP_TIME_EXCEEDED:
 528                if (code != ICMP_EXC_TTL)
 529                        return;
 530                break;
 531
 532        case ICMP_REDIRECT:
 533                break;
 534        }
 535
 536        rcu_read_lock();
 537        t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
 538                                flags & GRE_KEY ?
 539                                *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
 540                                p[1]);
 541        if (t == NULL)
 542                goto out;
 543
 544        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 545                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 546                                 t->parms.link, 0, IPPROTO_GRE, 0);
 547                goto out;
 548        }
 549        if (type == ICMP_REDIRECT) {
 550                ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 551                              IPPROTO_GRE, 0);
 552                goto out;
 553        }
 554        if (t->parms.iph.daddr == 0 ||
 555            ipv4_is_multicast(t->parms.iph.daddr))
 556                goto out;
 557
 558        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 559                goto out;
 560
 561        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 562                t->err_count++;
 563        else
 564                t->err_count = 1;
 565        t->err_time = jiffies;
 566out:
 567        rcu_read_unlock();
 568}
 569
 570static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
 571{
 572        if (INET_ECN_is_ce(iph->tos)) {
 573                if (skb->protocol == htons(ETH_P_IP)) {
 574                        IP_ECN_set_ce(ip_hdr(skb));
 575                } else if (skb->protocol == htons(ETH_P_IPV6)) {
 576                        IP6_ECN_set_ce(ipv6_hdr(skb));
 577                }
 578        }
 579}
 580
 581static inline u8
 582ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
 583{
 584        u8 inner = 0;
 585        if (skb->protocol == htons(ETH_P_IP))
 586                inner = old_iph->tos;
 587        else if (skb->protocol == htons(ETH_P_IPV6))
 588                inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
 589        return INET_ECN_encapsulate(tos, inner);
 590}
 591
 592static int ipgre_rcv(struct sk_buff *skb)
 593{
 594        const struct iphdr *iph;
 595        u8     *h;
 596        __be16    flags;
 597        __sum16   csum = 0;
 598        __be32 key = 0;
 599        u32    seqno = 0;
 600        struct ip_tunnel *tunnel;
 601        int    offset = 4;
 602        __be16 gre_proto;
 603
 604        if (!pskb_may_pull(skb, 16))
 605                goto drop_nolock;
 606
 607        iph = ip_hdr(skb);
 608        h = skb->data;
 609        flags = *(__be16 *)h;
 610
 611        if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
 612                /* - Version must be 0.
 613                   - We do not support routing headers.
 614                 */
 615                if (flags&(GRE_VERSION|GRE_ROUTING))
 616                        goto drop_nolock;
 617
 618                if (flags&GRE_CSUM) {
 619                        switch (skb->ip_summed) {
 620                        case CHECKSUM_COMPLETE:
 621                                csum = csum_fold(skb->csum);
 622                                if (!csum)
 623                                        break;
 624                                /* fall through */
 625                        case CHECKSUM_NONE:
 626                                skb->csum = 0;
 627                                csum = __skb_checksum_complete(skb);
 628                                skb->ip_summed = CHECKSUM_COMPLETE;
 629                        }
 630                        offset += 4;
 631                }
 632                if (flags&GRE_KEY) {
 633                        key = *(__be32 *)(h + offset);
 634                        offset += 4;
 635                }
 636                if (flags&GRE_SEQ) {
 637                        seqno = ntohl(*(__be32 *)(h + offset));
 638                        offset += 4;
 639                }
 640        }
 641
 642        gre_proto = *(__be16 *)(h + 2);
 643
 644        rcu_read_lock();
 645        if ((tunnel = ipgre_tunnel_lookup(skb->dev,
 646                                          iph->saddr, iph->daddr, key,
 647                                          gre_proto))) {
 648                struct pcpu_tstats *tstats;
 649
 650                secpath_reset(skb);
 651
 652                skb->protocol = gre_proto;
 653                /* WCCP version 1 and 2 protocol decoding.
 654                 * - Change protocol to IP
 655                 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
 656                 */
 657                if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
 658                        skb->protocol = htons(ETH_P_IP);
 659                        if ((*(h + offset) & 0xF0) != 0x40)
 660                                offset += 4;
 661                }
 662
 663                skb->mac_header = skb->network_header;
 664                __pskb_pull(skb, offset);
 665                skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
 666                skb->pkt_type = PACKET_HOST;
 667#ifdef CONFIG_NET_IPGRE_BROADCAST
 668                if (ipv4_is_multicast(iph->daddr)) {
 669                        /* Looped back packet, drop it! */
 670                        if (rt_is_output_route(skb_rtable(skb)))
 671                                goto drop;
 672                        tunnel->dev->stats.multicast++;
 673                        skb->pkt_type = PACKET_BROADCAST;
 674                }
 675#endif
 676
 677                if (((flags&GRE_CSUM) && csum) ||
 678                    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
 679                        tunnel->dev->stats.rx_crc_errors++;
 680                        tunnel->dev->stats.rx_errors++;
 681                        goto drop;
 682                }
 683                if (tunnel->parms.i_flags&GRE_SEQ) {
 684                        if (!(flags&GRE_SEQ) ||
 685                            (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
 686                                tunnel->dev->stats.rx_fifo_errors++;
 687                                tunnel->dev->stats.rx_errors++;
 688                                goto drop;
 689                        }
 690                        tunnel->i_seqno = seqno + 1;
 691                }
 692
 693                /* Warning: All skb pointers will be invalidated! */
 694                if (tunnel->dev->type == ARPHRD_ETHER) {
 695                        if (!pskb_may_pull(skb, ETH_HLEN)) {
 696                                tunnel->dev->stats.rx_length_errors++;
 697                                tunnel->dev->stats.rx_errors++;
 698                                goto drop;
 699                        }
 700
 701                        iph = ip_hdr(skb);
 702                        skb->protocol = eth_type_trans(skb, tunnel->dev);
 703                        skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 704                }
 705
 706                tstats = this_cpu_ptr(tunnel->dev->tstats);
 707                u64_stats_update_begin(&tstats->syncp);
 708                tstats->rx_packets++;
 709                tstats->rx_bytes += skb->len;
 710                u64_stats_update_end(&tstats->syncp);
 711
 712                __skb_tunnel_rx(skb, tunnel->dev);
 713
 714                skb_reset_network_header(skb);
 715                ipgre_ecn_decapsulate(iph, skb);
 716
 717                netif_rx(skb);
 718
 719                rcu_read_unlock();
 720                return 0;
 721        }
 722        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 723
 724drop:
 725        rcu_read_unlock();
 726drop_nolock:
 727        kfree_skb(skb);
 728        return 0;
 729}
 730
 731static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 732{
 733        struct ip_tunnel *tunnel = netdev_priv(dev);
 734        struct pcpu_tstats *tstats;
 735        const struct iphdr  *old_iph = ip_hdr(skb);
 736        const struct iphdr  *tiph;
 737        struct flowi4 fl4;
 738        u8     tos;
 739        __be16 df;
 740        struct rtable *rt;                      /* Route to the other host */
 741        struct net_device *tdev;                /* Device to other host */
 742        struct iphdr  *iph;                     /* Our new IP header */
 743        unsigned int max_headroom;              /* The extra header space needed */
 744        int    gre_hlen;
 745        __be32 dst;
 746        int    mtu;
 747
 748        if (dev->type == ARPHRD_ETHER)
 749                IPCB(skb)->flags = 0;
 750
 751        if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
 752                gre_hlen = 0;
 753                tiph = (const struct iphdr *)skb->data;
 754        } else {
 755                gre_hlen = tunnel->hlen;
 756                tiph = &tunnel->parms.iph;
 757        }
 758
 759        if ((dst = tiph->daddr) == 0) {
 760                /* NBMA tunnel */
 761
 762                if (skb_dst(skb) == NULL) {
 763                        dev->stats.tx_fifo_errors++;
 764                        goto tx_error;
 765                }
 766
 767                if (skb->protocol == htons(ETH_P_IP)) {
 768                        rt = skb_rtable(skb);
 769                        dst = rt_nexthop(rt, old_iph->daddr);
 770                }
 771#if IS_ENABLED(CONFIG_IPV6)
 772                else if (skb->protocol == htons(ETH_P_IPV6)) {
 773                        const struct in6_addr *addr6;
 774                        struct neighbour *neigh;
 775                        bool do_tx_error_icmp;
 776                        int addr_type;
 777
 778                        neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
 779                        if (neigh == NULL)
 780                                goto tx_error;
 781
 782                        addr6 = (const struct in6_addr *)&neigh->primary_key;
 783                        addr_type = ipv6_addr_type(addr6);
 784
 785                        if (addr_type == IPV6_ADDR_ANY) {
 786                                addr6 = &ipv6_hdr(skb)->daddr;
 787                                addr_type = ipv6_addr_type(addr6);
 788                        }
 789
 790                        if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 791                                do_tx_error_icmp = true;
 792                        else {
 793                                do_tx_error_icmp = false;
 794                                dst = addr6->s6_addr32[3];
 795                        }
 796                        neigh_release(neigh);
 797                        if (do_tx_error_icmp)
 798                                goto tx_error_icmp;
 799                }
 800#endif
 801                else
 802                        goto tx_error;
 803        }
 804
 805        tos = tiph->tos;
 806        if (tos == 1) {
 807                tos = 0;
 808                if (skb->protocol == htons(ETH_P_IP))
 809                        tos = old_iph->tos;
 810                else if (skb->protocol == htons(ETH_P_IPV6))
 811                        tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
 812        }
 813
 814        rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
 815                                 tunnel->parms.o_key, RT_TOS(tos),
 816                                 tunnel->parms.link);
 817        if (IS_ERR(rt)) {
 818                dev->stats.tx_carrier_errors++;
 819                goto tx_error;
 820        }
 821        tdev = rt->dst.dev;
 822
 823        if (tdev == dev) {
 824                ip_rt_put(rt);
 825                dev->stats.collisions++;
 826                goto tx_error;
 827        }
 828
 829        df = tiph->frag_off;
 830        if (df)
 831                mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
 832        else
 833                mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 834
 835        if (skb_dst(skb))
 836                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 837
 838        if (skb->protocol == htons(ETH_P_IP)) {
 839                df |= (old_iph->frag_off&htons(IP_DF));
 840
 841                if ((old_iph->frag_off&htons(IP_DF)) &&
 842                    mtu < ntohs(old_iph->tot_len)) {
 843                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 844                        ip_rt_put(rt);
 845                        goto tx_error;
 846                }
 847        }
 848#if IS_ENABLED(CONFIG_IPV6)
 849        else if (skb->protocol == htons(ETH_P_IPV6)) {
 850                struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 851
 852                if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
 853                        if ((tunnel->parms.iph.daddr &&
 854                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 855                            rt6->rt6i_dst.plen == 128) {
 856                                rt6->rt6i_flags |= RTF_MODIFIED;
 857                                dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
 858                        }
 859                }
 860
 861                if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
 862                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 863                        ip_rt_put(rt);
 864                        goto tx_error;
 865                }
 866        }
 867#endif
 868
 869        if (tunnel->err_count > 0) {
 870                if (time_before(jiffies,
 871                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 872                        tunnel->err_count--;
 873
 874                        dst_link_failure(skb);
 875                } else
 876                        tunnel->err_count = 0;
 877        }
 878
 879        max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
 880
 881        if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
 882            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 883                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 884                if (max_headroom > dev->needed_headroom)
 885                        dev->needed_headroom = max_headroom;
 886                if (!new_skb) {
 887                        ip_rt_put(rt);
 888                        dev->stats.tx_dropped++;
 889                        dev_kfree_skb(skb);
 890                        return NETDEV_TX_OK;
 891                }
 892                if (skb->sk)
 893                        skb_set_owner_w(new_skb, skb->sk);
 894                dev_kfree_skb(skb);
 895                skb = new_skb;
 896                old_iph = ip_hdr(skb);
 897        }
 898
 899        skb_reset_transport_header(skb);
 900        skb_push(skb, gre_hlen);
 901        skb_reset_network_header(skb);
 902        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 903        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 904                              IPSKB_REROUTED);
 905        skb_dst_drop(skb);
 906        skb_dst_set(skb, &rt->dst);
 907
 908        /*
 909         *      Push down and install the IPIP header.
 910         */
 911
 912        iph                     =       ip_hdr(skb);
 913        iph->version            =       4;
 914        iph->ihl                =       sizeof(struct iphdr) >> 2;
 915        iph->frag_off           =       df;
 916        iph->protocol           =       IPPROTO_GRE;
 917        iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
 918        iph->daddr              =       fl4.daddr;
 919        iph->saddr              =       fl4.saddr;
 920
 921        if ((iph->ttl = tiph->ttl) == 0) {
 922                if (skb->protocol == htons(ETH_P_IP))
 923                        iph->ttl = old_iph->ttl;
 924#if IS_ENABLED(CONFIG_IPV6)
 925                else if (skb->protocol == htons(ETH_P_IPV6))
 926                        iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
 927#endif
 928                else
 929                        iph->ttl = ip4_dst_hoplimit(&rt->dst);
 930        }
 931
 932        ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
 933        ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
 934                                   htons(ETH_P_TEB) : skb->protocol;
 935
 936        if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
 937                __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
 938
 939                if (tunnel->parms.o_flags&GRE_SEQ) {
 940                        ++tunnel->o_seqno;
 941                        *ptr = htonl(tunnel->o_seqno);
 942                        ptr--;
 943                }
 944                if (tunnel->parms.o_flags&GRE_KEY) {
 945                        *ptr = tunnel->parms.o_key;
 946                        ptr--;
 947                }
 948                if (tunnel->parms.o_flags&GRE_CSUM) {
 949                        *ptr = 0;
 950                        *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
 951                }
 952        }
 953
 954        nf_reset(skb);
 955        tstats = this_cpu_ptr(dev->tstats);
 956        __IPTUNNEL_XMIT(tstats, &dev->stats);
 957        return NETDEV_TX_OK;
 958
 959#if IS_ENABLED(CONFIG_IPV6)
 960tx_error_icmp:
 961        dst_link_failure(skb);
 962#endif
 963tx_error:
 964        dev->stats.tx_errors++;
 965        dev_kfree_skb(skb);
 966        return NETDEV_TX_OK;
 967}
 968
 969static int ipgre_tunnel_bind_dev(struct net_device *dev)
 970{
 971        struct net_device *tdev = NULL;
 972        struct ip_tunnel *tunnel;
 973        const struct iphdr *iph;
 974        int hlen = LL_MAX_HEADER;
 975        int mtu = ETH_DATA_LEN;
 976        int addend = sizeof(struct iphdr) + 4;
 977
 978        tunnel = netdev_priv(dev);
 979        iph = &tunnel->parms.iph;
 980
 981        /* Guess output device to choose reasonable mtu and needed_headroom */
 982
 983        if (iph->daddr) {
 984                struct flowi4 fl4;
 985                struct rtable *rt;
 986
 987                rt = ip_route_output_gre(dev_net(dev), &fl4,
 988                                         iph->daddr, iph->saddr,
 989                                         tunnel->parms.o_key,
 990                                         RT_TOS(iph->tos),
 991                                         tunnel->parms.link);
 992                if (!IS_ERR(rt)) {
 993                        tdev = rt->dst.dev;
 994                        ip_rt_put(rt);
 995                }
 996
 997                if (dev->type != ARPHRD_ETHER)
 998                        dev->flags |= IFF_POINTOPOINT;
 999        }
1000
1001        if (!tdev && tunnel->parms.link)
1002                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1003
1004        if (tdev) {
1005                hlen = tdev->hard_header_len + tdev->needed_headroom;
1006                mtu = tdev->mtu;
1007        }
1008        dev->iflink = tunnel->parms.link;
1009
1010        /* Precalculate GRE options length */
1011        if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1012                if (tunnel->parms.o_flags&GRE_CSUM)
1013                        addend += 4;
1014                if (tunnel->parms.o_flags&GRE_KEY)
1015                        addend += 4;
1016                if (tunnel->parms.o_flags&GRE_SEQ)
1017                        addend += 4;
1018        }
1019        dev->needed_headroom = addend + hlen;
1020        mtu -= dev->hard_header_len + addend;
1021
1022        if (mtu < 68)
1023                mtu = 68;
1024
1025        tunnel->hlen = addend;
1026
1027        return mtu;
1028}
1029
1030static int
1031ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1032{
1033        int err = 0;
1034        struct ip_tunnel_parm p;
1035        struct ip_tunnel *t;
1036        struct net *net = dev_net(dev);
1037        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1038
1039        switch (cmd) {
1040        case SIOCGETTUNNEL:
1041                t = NULL;
1042                if (dev == ign->fb_tunnel_dev) {
1043                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1044                                err = -EFAULT;
1045                                break;
1046                        }
1047                        t = ipgre_tunnel_locate(net, &p, 0);
1048                }
1049                if (t == NULL)
1050                        t = netdev_priv(dev);
1051                memcpy(&p, &t->parms, sizeof(p));
1052                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1053                        err = -EFAULT;
1054                break;
1055
1056        case SIOCADDTUNNEL:
1057        case SIOCCHGTUNNEL:
1058                err = -EPERM;
1059                if (!capable(CAP_NET_ADMIN))
1060                        goto done;
1061
1062                err = -EFAULT;
1063                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1064                        goto done;
1065
1066                err = -EINVAL;
1067                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1068                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1069                    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1070                        goto done;
1071                if (p.iph.ttl)
1072                        p.iph.frag_off |= htons(IP_DF);
1073
1074                if (!(p.i_flags&GRE_KEY))
1075                        p.i_key = 0;
1076                if (!(p.o_flags&GRE_KEY))
1077                        p.o_key = 0;
1078
1079                t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1080
1081                if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1082                        if (t != NULL) {
1083                                if (t->dev != dev) {
1084                                        err = -EEXIST;
1085                                        break;
1086                                }
1087                        } else {
1088                                unsigned int nflags = 0;
1089
1090                                t = netdev_priv(dev);
1091
1092                                if (ipv4_is_multicast(p.iph.daddr))
1093                                        nflags = IFF_BROADCAST;
1094                                else if (p.iph.daddr)
1095                                        nflags = IFF_POINTOPOINT;
1096
1097                                if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1098                                        err = -EINVAL;
1099                                        break;
1100                                }
1101                                ipgre_tunnel_unlink(ign, t);
1102                                synchronize_net();
1103                                t->parms.iph.saddr = p.iph.saddr;
1104                                t->parms.iph.daddr = p.iph.daddr;
1105                                t->parms.i_key = p.i_key;
1106                                t->parms.o_key = p.o_key;
1107                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
1108                                memcpy(dev->broadcast, &p.iph.daddr, 4);
1109                                ipgre_tunnel_link(ign, t);
1110                                netdev_state_change(dev);
1111                        }
1112                }
1113
1114                if (t) {
1115                        err = 0;
1116                        if (cmd == SIOCCHGTUNNEL) {
1117                                t->parms.iph.ttl = p.iph.ttl;
1118                                t->parms.iph.tos = p.iph.tos;
1119                                t->parms.iph.frag_off = p.iph.frag_off;
1120                                if (t->parms.link != p.link) {
1121                                        t->parms.link = p.link;
1122                                        dev->mtu = ipgre_tunnel_bind_dev(dev);
1123                                        netdev_state_change(dev);
1124                                }
1125                        }
1126                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1127                                err = -EFAULT;
1128                } else
1129                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1130                break;
1131
1132        case SIOCDELTUNNEL:
1133                err = -EPERM;
1134                if (!capable(CAP_NET_ADMIN))
1135                        goto done;
1136
1137                if (dev == ign->fb_tunnel_dev) {
1138                        err = -EFAULT;
1139                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1140                                goto done;
1141                        err = -ENOENT;
1142                        if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1143                                goto done;
1144                        err = -EPERM;
1145                        if (t == netdev_priv(ign->fb_tunnel_dev))
1146                                goto done;
1147                        dev = t->dev;
1148                }
1149                unregister_netdevice(dev);
1150                err = 0;
1151                break;
1152
1153        default:
1154                err = -EINVAL;
1155        }
1156
1157done:
1158        return err;
1159}
1160
1161static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1162{
1163        struct ip_tunnel *tunnel = netdev_priv(dev);
1164        if (new_mtu < 68 ||
1165            new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1166                return -EINVAL;
1167        dev->mtu = new_mtu;
1168        return 0;
1169}
1170
1171/* Nice toy. Unfortunately, useless in real life :-)
1172   It allows to construct virtual multiprotocol broadcast "LAN"
1173   over the Internet, provided multicast routing is tuned.
1174
1175
1176   I have no idea was this bicycle invented before me,
1177   so that I had to set ARPHRD_IPGRE to a random value.
1178   I have an impression, that Cisco could make something similar,
1179   but this feature is apparently missing in IOS<=11.2(8).
1180
1181   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1182   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1183
1184   ping -t 255 224.66.66.66
1185
1186   If nobody answers, mbone does not work.
1187
1188   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1189   ip addr add 10.66.66.<somewhat>/24 dev Universe
1190   ifconfig Universe up
1191   ifconfig Universe add fe80::<Your_real_addr>/10
1192   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1193   ftp 10.66.66.66
1194   ...
1195   ftp fec0:6666:6666::193.233.7.65
1196   ...
1197
1198 */
1199
1200static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1201                        unsigned short type,
1202                        const void *daddr, const void *saddr, unsigned int len)
1203{
1204        struct ip_tunnel *t = netdev_priv(dev);
1205        struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1206        __be16 *p = (__be16 *)(iph+1);
1207
1208        memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1209        p[0]            = t->parms.o_flags;
1210        p[1]            = htons(type);
1211
1212        /*
1213         *      Set the source hardware address.
1214         */
1215
1216        if (saddr)
1217                memcpy(&iph->saddr, saddr, 4);
1218        if (daddr)
1219                memcpy(&iph->daddr, daddr, 4);
1220        if (iph->daddr)
1221                return t->hlen;
1222
1223        return -t->hlen;
1224}
1225
1226static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1227{
1228        const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1229        memcpy(haddr, &iph->saddr, 4);
1230        return 4;
1231}
1232
1233static const struct header_ops ipgre_header_ops = {
1234        .create = ipgre_header,
1235        .parse  = ipgre_header_parse,
1236};
1237
1238#ifdef CONFIG_NET_IPGRE_BROADCAST
1239static int ipgre_open(struct net_device *dev)
1240{
1241        struct ip_tunnel *t = netdev_priv(dev);
1242
1243        if (ipv4_is_multicast(t->parms.iph.daddr)) {
1244                struct flowi4 fl4;
1245                struct rtable *rt;
1246
1247                rt = ip_route_output_gre(dev_net(dev), &fl4,
1248                                         t->parms.iph.daddr,
1249                                         t->parms.iph.saddr,
1250                                         t->parms.o_key,
1251                                         RT_TOS(t->parms.iph.tos),
1252                                         t->parms.link);
1253                if (IS_ERR(rt))
1254                        return -EADDRNOTAVAIL;
1255                dev = rt->dst.dev;
1256                ip_rt_put(rt);
1257                if (__in_dev_get_rtnl(dev) == NULL)
1258                        return -EADDRNOTAVAIL;
1259                t->mlink = dev->ifindex;
1260                ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1261        }
1262        return 0;
1263}
1264
1265static int ipgre_close(struct net_device *dev)
1266{
1267        struct ip_tunnel *t = netdev_priv(dev);
1268
1269        if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1270                struct in_device *in_dev;
1271                in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1272                if (in_dev)
1273                        ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1274        }
1275        return 0;
1276}
1277
1278#endif
1279
1280static const struct net_device_ops ipgre_netdev_ops = {
1281        .ndo_init               = ipgre_tunnel_init,
1282        .ndo_uninit             = ipgre_tunnel_uninit,
1283#ifdef CONFIG_NET_IPGRE_BROADCAST
1284        .ndo_open               = ipgre_open,
1285        .ndo_stop               = ipgre_close,
1286#endif
1287        .ndo_start_xmit         = ipgre_tunnel_xmit,
1288        .ndo_do_ioctl           = ipgre_tunnel_ioctl,
1289        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1290        .ndo_get_stats64        = ipgre_get_stats64,
1291};
1292
1293static void ipgre_dev_free(struct net_device *dev)
1294{
1295        free_percpu(dev->tstats);
1296        free_netdev(dev);
1297}
1298
1299static void ipgre_tunnel_setup(struct net_device *dev)
1300{
1301        dev->netdev_ops         = &ipgre_netdev_ops;
1302        dev->destructor         = ipgre_dev_free;
1303
1304        dev->type               = ARPHRD_IPGRE;
1305        dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1306        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1307        dev->flags              = IFF_NOARP;
1308        dev->iflink             = 0;
1309        dev->addr_len           = 4;
1310        dev->features           |= NETIF_F_NETNS_LOCAL;
1311        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
1312}
1313
1314static int ipgre_tunnel_init(struct net_device *dev)
1315{
1316        struct ip_tunnel *tunnel;
1317        struct iphdr *iph;
1318
1319        tunnel = netdev_priv(dev);
1320        iph = &tunnel->parms.iph;
1321
1322        tunnel->dev = dev;
1323        strcpy(tunnel->parms.name, dev->name);
1324
1325        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1326        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1327
1328        if (iph->daddr) {
1329#ifdef CONFIG_NET_IPGRE_BROADCAST
1330                if (ipv4_is_multicast(iph->daddr)) {
1331                        if (!iph->saddr)
1332                                return -EINVAL;
1333                        dev->flags = IFF_BROADCAST;
1334                        dev->header_ops = &ipgre_header_ops;
1335                }
1336#endif
1337        } else
1338                dev->header_ops = &ipgre_header_ops;
1339
1340        dev->tstats = alloc_percpu(struct pcpu_tstats);
1341        if (!dev->tstats)
1342                return -ENOMEM;
1343
1344        return 0;
1345}
1346
1347static void ipgre_fb_tunnel_init(struct net_device *dev)
1348{
1349        struct ip_tunnel *tunnel = netdev_priv(dev);
1350        struct iphdr *iph = &tunnel->parms.iph;
1351
1352        tunnel->dev = dev;
1353        strcpy(tunnel->parms.name, dev->name);
1354
1355        iph->version            = 4;
1356        iph->protocol           = IPPROTO_GRE;
1357        iph->ihl                = 5;
1358        tunnel->hlen            = sizeof(struct iphdr) + 4;
1359
1360        dev_hold(dev);
1361}
1362
1363
1364static const struct gre_protocol ipgre_protocol = {
1365        .handler     = ipgre_rcv,
1366        .err_handler = ipgre_err,
1367};
1368
1369static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1370{
1371        int prio;
1372
1373        for (prio = 0; prio < 4; prio++) {
1374                int h;
1375                for (h = 0; h < HASH_SIZE; h++) {
1376                        struct ip_tunnel *t;
1377
1378                        t = rtnl_dereference(ign->tunnels[prio][h]);
1379
1380                        while (t != NULL) {
1381                                unregister_netdevice_queue(t->dev, head);
1382                                t = rtnl_dereference(t->next);
1383                        }
1384                }
1385        }
1386}
1387
1388static int __net_init ipgre_init_net(struct net *net)
1389{
1390        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1391        int err;
1392
1393        ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1394                                           ipgre_tunnel_setup);
1395        if (!ign->fb_tunnel_dev) {
1396                err = -ENOMEM;
1397                goto err_alloc_dev;
1398        }
1399        dev_net_set(ign->fb_tunnel_dev, net);
1400
1401        ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1402        ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1403
1404        if ((err = register_netdev(ign->fb_tunnel_dev)))
1405                goto err_reg_dev;
1406
1407        rcu_assign_pointer(ign->tunnels_wc[0],
1408                           netdev_priv(ign->fb_tunnel_dev));
1409        return 0;
1410
1411err_reg_dev:
1412        ipgre_dev_free(ign->fb_tunnel_dev);
1413err_alloc_dev:
1414        return err;
1415}
1416
1417static void __net_exit ipgre_exit_net(struct net *net)
1418{
1419        struct ipgre_net *ign;
1420        LIST_HEAD(list);
1421
1422        ign = net_generic(net, ipgre_net_id);
1423        rtnl_lock();
1424        ipgre_destroy_tunnels(ign, &list);
1425        unregister_netdevice_many(&list);
1426        rtnl_unlock();
1427}
1428
1429static struct pernet_operations ipgre_net_ops = {
1430        .init = ipgre_init_net,
1431        .exit = ipgre_exit_net,
1432        .id   = &ipgre_net_id,
1433        .size = sizeof(struct ipgre_net),
1434};
1435
1436static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1437{
1438        __be16 flags;
1439
1440        if (!data)
1441                return 0;
1442
1443        flags = 0;
1444        if (data[IFLA_GRE_IFLAGS])
1445                flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1446        if (data[IFLA_GRE_OFLAGS])
1447                flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1448        if (flags & (GRE_VERSION|GRE_ROUTING))
1449                return -EINVAL;
1450
1451        return 0;
1452}
1453
1454static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1455{
1456        __be32 daddr;
1457
1458        if (tb[IFLA_ADDRESS]) {
1459                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1460                        return -EINVAL;
1461                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1462                        return -EADDRNOTAVAIL;
1463        }
1464
1465        if (!data)
1466                goto out;
1467
1468        if (data[IFLA_GRE_REMOTE]) {
1469                memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1470                if (!daddr)
1471                        return -EINVAL;
1472        }
1473
1474out:
1475        return ipgre_tunnel_validate(tb, data);
1476}
1477
1478static void ipgre_netlink_parms(struct nlattr *data[],
1479                                struct ip_tunnel_parm *parms)
1480{
1481        memset(parms, 0, sizeof(*parms));
1482
1483        parms->iph.protocol = IPPROTO_GRE;
1484
1485        if (!data)
1486                return;
1487
1488        if (data[IFLA_GRE_LINK])
1489                parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1490
1491        if (data[IFLA_GRE_IFLAGS])
1492                parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1493
1494        if (data[IFLA_GRE_OFLAGS])
1495                parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1496
1497        if (data[IFLA_GRE_IKEY])
1498                parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1499
1500        if (data[IFLA_GRE_OKEY])
1501                parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1502
1503        if (data[IFLA_GRE_LOCAL])
1504                parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1505
1506        if (data[IFLA_GRE_REMOTE])
1507                parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1508
1509        if (data[IFLA_GRE_TTL])
1510                parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1511
1512        if (data[IFLA_GRE_TOS])
1513                parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1514
1515        if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1516                parms->iph.frag_off = htons(IP_DF);
1517}
1518
1519static int ipgre_tap_init(struct net_device *dev)
1520{
1521        struct ip_tunnel *tunnel;
1522
1523        tunnel = netdev_priv(dev);
1524
1525        tunnel->dev = dev;
1526        strcpy(tunnel->parms.name, dev->name);
1527
1528        ipgre_tunnel_bind_dev(dev);
1529
1530        dev->tstats = alloc_percpu(struct pcpu_tstats);
1531        if (!dev->tstats)
1532                return -ENOMEM;
1533
1534        return 0;
1535}
1536
1537static const struct net_device_ops ipgre_tap_netdev_ops = {
1538        .ndo_init               = ipgre_tap_init,
1539        .ndo_uninit             = ipgre_tunnel_uninit,
1540        .ndo_start_xmit         = ipgre_tunnel_xmit,
1541        .ndo_set_mac_address    = eth_mac_addr,
1542        .ndo_validate_addr      = eth_validate_addr,
1543        .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1544        .ndo_get_stats64        = ipgre_get_stats64,
1545};
1546
1547static void ipgre_tap_setup(struct net_device *dev)
1548{
1549
1550        ether_setup(dev);
1551
1552        dev->netdev_ops         = &ipgre_tap_netdev_ops;
1553        dev->destructor         = ipgre_dev_free;
1554
1555        dev->iflink             = 0;
1556        dev->features           |= NETIF_F_NETNS_LOCAL;
1557}
1558
1559static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1560                         struct nlattr *data[])
1561{
1562        struct ip_tunnel *nt;
1563        struct net *net = dev_net(dev);
1564        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1565        int mtu;
1566        int err;
1567
1568        nt = netdev_priv(dev);
1569        ipgre_netlink_parms(data, &nt->parms);
1570
1571        if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1572                return -EEXIST;
1573
1574        if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1575                eth_hw_addr_random(dev);
1576
1577        mtu = ipgre_tunnel_bind_dev(dev);
1578        if (!tb[IFLA_MTU])
1579                dev->mtu = mtu;
1580
1581        /* Can use a lockless transmit, unless we generate output sequences */
1582        if (!(nt->parms.o_flags & GRE_SEQ))
1583                dev->features |= NETIF_F_LLTX;
1584
1585        err = register_netdevice(dev);
1586        if (err)
1587                goto out;
1588
1589        dev_hold(dev);
1590        ipgre_tunnel_link(ign, nt);
1591
1592out:
1593        return err;
1594}
1595
1596static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1597                            struct nlattr *data[])
1598{
1599        struct ip_tunnel *t, *nt;
1600        struct net *net = dev_net(dev);
1601        struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1602        struct ip_tunnel_parm p;
1603        int mtu;
1604
1605        if (dev == ign->fb_tunnel_dev)
1606                return -EINVAL;
1607
1608        nt = netdev_priv(dev);
1609        ipgre_netlink_parms(data, &p);
1610
1611        t = ipgre_tunnel_locate(net, &p, 0);
1612
1613        if (t) {
1614                if (t->dev != dev)
1615                        return -EEXIST;
1616        } else {
1617                t = nt;
1618
1619                if (dev->type != ARPHRD_ETHER) {
1620                        unsigned int nflags = 0;
1621
1622                        if (ipv4_is_multicast(p.iph.daddr))
1623                                nflags = IFF_BROADCAST;
1624                        else if (p.iph.daddr)
1625                                nflags = IFF_POINTOPOINT;
1626
1627                        if ((dev->flags ^ nflags) &
1628                            (IFF_POINTOPOINT | IFF_BROADCAST))
1629                                return -EINVAL;
1630                }
1631
1632                ipgre_tunnel_unlink(ign, t);
1633                t->parms.iph.saddr = p.iph.saddr;
1634                t->parms.iph.daddr = p.iph.daddr;
1635                t->parms.i_key = p.i_key;
1636                if (dev->type != ARPHRD_ETHER) {
1637                        memcpy(dev->dev_addr, &p.iph.saddr, 4);
1638                        memcpy(dev->broadcast, &p.iph.daddr, 4);
1639                }
1640                ipgre_tunnel_link(ign, t);
1641                netdev_state_change(dev);
1642        }
1643
1644        t->parms.o_key = p.o_key;
1645        t->parms.iph.ttl = p.iph.ttl;
1646        t->parms.iph.tos = p.iph.tos;
1647        t->parms.iph.frag_off = p.iph.frag_off;
1648
1649        if (t->parms.link != p.link) {
1650                t->parms.link = p.link;
1651                mtu = ipgre_tunnel_bind_dev(dev);
1652                if (!tb[IFLA_MTU])
1653                        dev->mtu = mtu;
1654                netdev_state_change(dev);
1655        }
1656
1657        return 0;
1658}
1659
1660static size_t ipgre_get_size(const struct net_device *dev)
1661{
1662        return
1663                /* IFLA_GRE_LINK */
1664                nla_total_size(4) +
1665                /* IFLA_GRE_IFLAGS */
1666                nla_total_size(2) +
1667                /* IFLA_GRE_OFLAGS */
1668                nla_total_size(2) +
1669                /* IFLA_GRE_IKEY */
1670                nla_total_size(4) +
1671                /* IFLA_GRE_OKEY */
1672                nla_total_size(4) +
1673                /* IFLA_GRE_LOCAL */
1674                nla_total_size(4) +
1675                /* IFLA_GRE_REMOTE */
1676                nla_total_size(4) +
1677                /* IFLA_GRE_TTL */
1678                nla_total_size(1) +
1679                /* IFLA_GRE_TOS */
1680                nla_total_size(1) +
1681                /* IFLA_GRE_PMTUDISC */
1682                nla_total_size(1) +
1683                0;
1684}
1685
1686static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1687{
1688        struct ip_tunnel *t = netdev_priv(dev);
1689        struct ip_tunnel_parm *p = &t->parms;
1690
1691        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1692            nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1693            nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1694            nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1695            nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1696            nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1697            nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1698            nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1699            nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1700            nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1701                       !!(p->iph.frag_off & htons(IP_DF))))
1702                goto nla_put_failure;
1703        return 0;
1704
1705nla_put_failure:
1706        return -EMSGSIZE;
1707}
1708
1709static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1710        [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1711        [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1712        [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1713        [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1714        [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1715        [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1716        [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1717        [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1718        [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1719        [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1720};
1721
1722static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1723        .kind           = "gre",
1724        .maxtype        = IFLA_GRE_MAX,
1725        .policy         = ipgre_policy,
1726        .priv_size      = sizeof(struct ip_tunnel),
1727        .setup          = ipgre_tunnel_setup,
1728        .validate       = ipgre_tunnel_validate,
1729        .newlink        = ipgre_newlink,
1730        .changelink     = ipgre_changelink,
1731        .get_size       = ipgre_get_size,
1732        .fill_info      = ipgre_fill_info,
1733};
1734
1735static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1736        .kind           = "gretap",
1737        .maxtype        = IFLA_GRE_MAX,
1738        .policy         = ipgre_policy,
1739        .priv_size      = sizeof(struct ip_tunnel),
1740        .setup          = ipgre_tap_setup,
1741        .validate       = ipgre_tap_validate,
1742        .newlink        = ipgre_newlink,
1743        .changelink     = ipgre_changelink,
1744        .get_size       = ipgre_get_size,
1745        .fill_info      = ipgre_fill_info,
1746};
1747
1748/*
1749 *      And now the modules code and kernel interface.
1750 */
1751
1752static int __init ipgre_init(void)
1753{
1754        int err;
1755
1756        pr_info("GRE over IPv4 tunneling driver\n");
1757
1758        err = register_pernet_device(&ipgre_net_ops);
1759        if (err < 0)
1760                return err;
1761
1762        err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1763        if (err < 0) {
1764                pr_info("%s: can't add protocol\n", __func__);
1765                goto add_proto_failed;
1766        }
1767
1768        err = rtnl_link_register(&ipgre_link_ops);
1769        if (err < 0)
1770                goto rtnl_link_failed;
1771
1772        err = rtnl_link_register(&ipgre_tap_ops);
1773        if (err < 0)
1774                goto tap_ops_failed;
1775
1776out:
1777        return err;
1778
1779tap_ops_failed:
1780        rtnl_link_unregister(&ipgre_link_ops);
1781rtnl_link_failed:
1782        gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1783add_proto_failed:
1784        unregister_pernet_device(&ipgre_net_ops);
1785        goto out;
1786}
1787
1788static void __exit ipgre_fini(void)
1789{
1790        rtnl_link_unregister(&ipgre_tap_ops);
1791        rtnl_link_unregister(&ipgre_link_ops);
1792        if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1793                pr_info("%s: can't remove protocol\n", __func__);
1794        unregister_pernet_device(&ipgre_net_ops);
1795}
1796
1797module_init(ipgre_init);
1798module_exit(ipgre_fini);
1799MODULE_LICENSE("GPL");
1800MODULE_ALIAS_RTNL_LINK("gre");
1801MODULE_ALIAS_RTNL_LINK("gretap");
1802MODULE_ALIAS_NETDEV("gre0");
1803
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.