linux-old/net/ipv4/ip_gre.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     GRE over IP protocol decoder. 
   3 *
   4 *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   5 *
   6 *      This program is free software; you can redistribute it and/or
   7 *      modify it under the terms of the GNU General Public License
   8 *      as published by the Free Software Foundation; either version
   9 *      2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#include <linux/config.h>
  14#include <linux/module.h>
  15#include <linux/types.h>
  16#include <linux/sched.h>
  17#include <linux/kernel.h>
  18#include <asm/uaccess.h>
  19#include <linux/skbuff.h>
  20#include <linux/netdevice.h>
  21#include <linux/in.h>
  22#include <linux/tcp.h>
  23#include <linux/udp.h>
  24#include <linux/if_arp.h>
  25#include <linux/mroute.h>
  26#include <linux/init.h>
  27#include <linux/in6.h>
  28#include <linux/inetdevice.h>
  29#include <linux/igmp.h>
  30
  31#include <net/sock.h>
  32#include <net/ip.h>
  33#include <net/icmp.h>
  34#include <net/protocol.h>
  35#include <net/ipip.h>
  36#include <net/arp.h>
  37#include <net/checksum.h>
  38
  39#ifdef CONFIG_IPV6
  40#include <net/ipv6.h>
  41#include <net/ip6_fib.h>
  42#include <net/ip6_route.h>
  43#endif
  44
  45/*
  46   Problems & solutions
  47   --------------------
  48
  49   1. The most important issue is detecting local dead loops.
  50   They would cause complete host lockup in transmit, which
  51   would be "resolved" by stack overflow or, if queueing is enabled,
  52   with infinite looping in net_bh.
  53
  54   We cannot track such dead loops during route installation,
  55   it is infeasible task. The most general solutions would be
  56   to keep skb->encapsulation counter (sort of local ttl),
  57   and silently drop packet when it expires. It is the best
  58   solution, but it supposes maintaing new variable in ALL
  59   skb, even if no tunneling is used.
  60
  61   Current solution: t->recursion lock breaks dead loops. It looks 
  62   like dev->tbusy flag, but I preferred new variable, because
  63   the semantics is different. One day, when hard_start_xmit
  64   will be multithreaded we will have to use skb->encapsulation.
  65
  66
  67
  68   2. Networking dead loops would not kill routers, but would really
  69   kill network. IP hop limit plays role of "t->recursion" in this case,
  70   if we copy it from packet being encapsulated to upper header.
  71   It is very good solution, but it introduces two problems:
  72
  73   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  74     do not work over tunnels.
  75   - traceroute does not work. I planned to relay ICMP from tunnel,
  76     so that this problem would be solved and traceroute output
  77     would even more informative. This idea appeared to be wrong:
  78     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  79     true router now :-)), all routers (at least, in neighbourhood of mine)
  80     return only 8 bytes of payload. It is the end.
  81
  82   Hence, if we want that OSPF worked or traceroute said something reasonable,
  83   we should search for another solution.
  84
  85   One of them is to parse packet trying to detect inner encapsulation
  86   made by our node. It is difficult or even impossible, especially,
  87   taking into account fragmentation. TO be short, tt is not solution at all.
  88
  89   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  90   We force DF flag on tunnels with preconfigured hop limit,
  91   that is ALL. :-) Well, it does not remove the problem completely,
  92   but exponential growth of network traffic is changed to linear
  93   (branches, that exceed pmtu are pruned) and tunnel mtu
  94   fastly degrades to value <68, where looping stops.
  95   Yes, it is not good if there exists a router in the loop,
  96   which does not force DF, even when encapsulating packets have DF set.
  97   But it is not our problem! Nobody could accuse us, we made
  98   all that we could make. Even if it is your gated who injected
  99   fatal route to network, even if it were you who configured
 100   fatal static route: you are innocent. :-)
 101
 102
 103
 104   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
 105   practically identical code. It would be good to glue them
 106   together, but it is not very evident, how to make them modular.
 107   sit is integral part of IPv6, ipip and gre are naturally modular.
 108   We could extract common parts (hash table, ioctl etc)
 109   to a separate module (ip_tunnel.c).
 110
 111   Alexey Kuznetsov.
 112 */
 113
 114static int ipgre_tunnel_init(struct device *dev);
 115
 116/* Fallback tunnel: no source, no destination, no key, no options */
 117
 118static int ipgre_fb_tunnel_init(struct device *dev);
 119
 120static struct device ipgre_fb_tunnel_dev = {
 121        NULL, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, ipgre_fb_tunnel_init,
 122};
 123
 124static struct ip_tunnel ipgre_fb_tunnel = {
 125        NULL, &ipgre_fb_tunnel_dev, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
 126};
 127
 128/* Tunnel hash table */
 129
 130/*
 131   4 hash tables:
 132
 133   3: (remote,local)
 134   2: (remote,*)
 135   1: (*,local)
 136   0: (*,*)
 137
 138   We require exact key match i.e. if a key is present in packet
 139   it will match only tunnel with the same key; if it is not present,
 140   it will match only keyless tunnel.
 141
 142   All keysless packets, if not matched configured keyless tunnels
 143   will match fallback tunnel.
 144 */
 145
 146#define HASH_SIZE  16
 147#define HASH(addr) ((addr^(addr>>4))&0xF)
 148
 149static struct ip_tunnel *tunnels[4][HASH_SIZE];
 150
 151#define tunnels_r_l     (tunnels[3])
 152#define tunnels_r       (tunnels[2])
 153#define tunnels_l       (tunnels[1])
 154#define tunnels_wc      (tunnels[0])
 155
 156/* Given src, dst and key, find approriate for input tunnel. */
 157
 158static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key)
 159{
 160        unsigned h0 = HASH(remote);
 161        unsigned h1 = HASH(key);
 162        struct ip_tunnel *t;
 163
 164        for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
 165                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
 166                        if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 167                                return t;
 168                }
 169        }
 170        for (t = tunnels_r[h0^h1]; t; t = t->next) {
 171                if (remote == t->parms.iph.daddr) {
 172                        if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 173                                return t;
 174                }
 175        }
 176        for (t = tunnels_l[h1]; t; t = t->next) {
 177                if (local == t->parms.iph.saddr ||
 178                     (local == t->parms.iph.daddr && MULTICAST(local))) {
 179                        if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 180                                return t;
 181                }
 182        }
 183        for (t = tunnels_wc[h1]; t; t = t->next) {
 184                if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
 185                        return t;
 186        }
 187        if (ipgre_fb_tunnel_dev.flags&IFF_UP)
 188                return &ipgre_fb_tunnel;
 189        return NULL;
 190}
 191
 192static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
 193{
 194        u32 remote = t->parms.iph.daddr;
 195        u32 local = t->parms.iph.saddr;
 196        u32 key = t->parms.i_key;
 197        unsigned h = HASH(key);
 198        int prio = 0;
 199
 200        if (local)
 201                prio |= 1;
 202        if (remote && !MULTICAST(remote)) {
 203                prio |= 2;
 204                h ^= HASH(remote);
 205        }
 206
 207        return &tunnels[prio][h];
 208}
 209
 210static void ipgre_tunnel_link(struct ip_tunnel *t)
 211{
 212        struct ip_tunnel **tp = ipgre_bucket(t);
 213
 214        t->next = *tp;
 215        wmb();
 216        *tp = t;
 217}
 218
 219static void ipgre_tunnel_unlink(struct ip_tunnel *t)
 220{
 221        struct ip_tunnel **tp;
 222
 223        for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) {
 224                if (t == *tp) {
 225                        *tp = t->next;
 226                        synchronize_bh();
 227                        break;
 228                }
 229        }
 230}
 231
 232static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create)
 233{
 234        u32 remote = parms->iph.daddr;
 235        u32 local = parms->iph.saddr;
 236        u32 key = parms->i_key;
 237        struct ip_tunnel *t, **tp, *nt;
 238        struct device *dev;
 239        unsigned h = HASH(key);
 240        int prio = 0;
 241
 242        if (local)
 243                prio |= 1;
 244        if (remote && !MULTICAST(remote)) {
 245                prio |= 2;
 246                h ^= HASH(remote);
 247        }
 248        for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
 249                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
 250                        if (key == t->parms.i_key)
 251                                return t;
 252                }
 253        }
 254        if (!create)
 255                return NULL;
 256
 257        MOD_INC_USE_COUNT;
 258        dev = kmalloc(sizeof(*dev) + sizeof(*t), GFP_KERNEL);
 259        if (dev == NULL) {
 260                MOD_DEC_USE_COUNT;
 261                return NULL;
 262        }
 263        memset(dev, 0, sizeof(*dev) + sizeof(*t));
 264        dev->priv = (void*)(dev+1);
 265        nt = (struct ip_tunnel*)dev->priv;
 266        nt->dev = dev;
 267        dev->name = nt->parms.name;
 268        dev->init = ipgre_tunnel_init;
 269        memcpy(&nt->parms, parms, sizeof(*parms));
 270        if (dev->name[0] == 0) {
 271                int i;
 272                for (i=1; i<100; i++) {
 273                        sprintf(dev->name, "gre%d", i);
 274                        if (dev_get(dev->name) == NULL)
 275                                break;
 276                }
 277                if (i==100)
 278                        goto failed;
 279                memcpy(parms->name, dev->name, IFNAMSIZ);
 280        }
 281        if (register_netdevice(dev) < 0)
 282                goto failed;
 283
 284        ipgre_tunnel_link(nt);
 285        /* Do not decrement MOD_USE_COUNT here. */
 286        return nt;
 287
 288failed:
 289        kfree(dev);
 290        MOD_DEC_USE_COUNT;
 291        return NULL;
 292}
 293
 294static void ipgre_tunnel_destroy(struct device *dev)
 295{
 296        ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv);
 297
 298        if (dev != &ipgre_fb_tunnel_dev) {
 299                kfree(dev);
 300                MOD_DEC_USE_COUNT;
 301        }
 302}
 303
 304
 305void ipgre_err(struct sk_buff *skb, unsigned char *dp, int len)
 306{
 307#ifndef I_WISH_WORLD_WERE_PERFECT
 308
 309/* It is not :-( All the routers (except for Linux) return only
 310   8 bytes of packet payload. It means, that precise relaying of
 311   ICMP in the real Internet is absolutely infeasible.
 312
 313   Moreover, Cisco "wise men" put GRE key to the third word
 314   in GRE header. It makes impossible maintaining even soft state for keyed
 315   GRE tunnels with enabled checksum. Tell them "thank you".
 316
 317   Well, I wonder, rfc1812 was written by Cisco employee,
 318   what the hell these idiots break standrads established
 319   by themself???
 320 */
 321
 322        struct iphdr *iph = (struct iphdr*)dp;
 323        u16          *p = (u16*)(dp+(iph->ihl<<2));
 324        int grehlen = (iph->ihl<<2) + 4;
 325        int type = skb->h.icmph->type;
 326        int code = skb->h.icmph->code;
 327        struct ip_tunnel *t;
 328        u16 flags;
 329
 330        flags = p[0];
 331        if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
 332                if (flags&(GRE_VERSION|GRE_ROUTING))
 333                        return;
 334                if (flags&GRE_KEY) {
 335                        grehlen += 4;
 336                        if (flags&GRE_CSUM)
 337                                grehlen += 4;
 338                }
 339        }
 340
 341        /* If only 8 bytes returned, keyed message will be dropped here */
 342        if (len < grehlen)
 343                return;
 344
 345        switch (type) {
 346        default:
 347        case ICMP_PARAMETERPROB:
 348                return;
 349
 350        case ICMP_DEST_UNREACH:
 351                switch (code) {
 352                case ICMP_SR_FAILED:
 353                case ICMP_PORT_UNREACH:
 354                        /* Impossible event. */
 355                        return;
 356                case ICMP_FRAG_NEEDED:
 357                        /* Soft state for pmtu is maintained by IP core. */
 358                        return;
 359                default:
 360                        /* All others are translated to HOST_UNREACH.
 361                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 362                           I believe they are just ether pollution. --ANK
 363                         */
 364                        break;
 365                }
 366                break;
 367        case ICMP_TIME_EXCEEDED:
 368                if (code != ICMP_EXC_TTL)
 369                        return;
 370                break;
 371        }
 372
 373        t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0);
 374        if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr))
 375                return;
 376
 377        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 378                return;
 379
 380        if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
 381                t->err_count++;
 382        else
 383                t->err_count = 1;
 384        t->err_time = jiffies;
 385        return;
 386#else
 387        struct iphdr *iph = (struct iphdr*)dp;
 388        struct iphdr *eiph;
 389        u16          *p = (u16*)(dp+(iph->ihl<<2));
 390        int type = skb->h.icmph->type;
 391        int code = skb->h.icmph->code;
 392        int rel_type = 0;
 393        int rel_code = 0;
 394        int rel_info = 0;
 395        u16 flags;
 396        int grehlen = (iph->ihl<<2) + 4;
 397        struct sk_buff *skb2;
 398        struct rtable *rt;
 399
 400        if (p[1] != __constant_htons(ETH_P_IP))
 401                return;
 402
 403        flags = p[0];
 404        if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
 405                if (flags&(GRE_VERSION|GRE_ROUTING))
 406                        return;
 407                if (flags&GRE_CSUM)
 408                        grehlen += 4;
 409                if (flags&GRE_KEY)
 410                        grehlen += 4;
 411                if (flags&GRE_SEQ)
 412                        grehlen += 4;
 413        }
 414        if (len < grehlen + sizeof(struct iphdr))
 415                return;
 416        eiph = (struct iphdr*)(dp + grehlen);
 417
 418        switch (type) {
 419        default:
 420                return;
 421        case ICMP_PARAMETERPROB:
 422                if (skb->h.icmph->un.gateway < (iph->ihl<<2))
 423                        return;
 424
 425                /* So... This guy found something strange INSIDE encapsulated
 426                   packet. Well, he is fool, but what can we do ?
 427                 */
 428                rel_type = ICMP_PARAMETERPROB;
 429                rel_info = skb->h.icmph->un.gateway - grehlen;
 430                break;
 431
 432        case ICMP_DEST_UNREACH:
 433                switch (code) {
 434                case ICMP_SR_FAILED:
 435                case ICMP_PORT_UNREACH:
 436                        /* Impossible event. */
 437                        return;
 438                case ICMP_FRAG_NEEDED:
 439                        /* And it is the only really necesary thing :-) */
 440                        rel_info = ntohs(skb->h.icmph->un.frag.mtu);
 441                        if (rel_info < grehlen+68)
 442                                return;
 443                        rel_info -= grehlen;
 444                        /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
 445                        if (rel_info > ntohs(eiph->tot_len))
 446                                return;
 447                        break;
 448                default:
 449                        /* All others are translated to HOST_UNREACH.
 450                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 451                           I believe, it is just ether pollution. --ANK
 452                         */
 453                        rel_type = ICMP_DEST_UNREACH;
 454                        rel_code = ICMP_HOST_UNREACH;
 455                        break;
 456                }
 457                break;
 458        case ICMP_TIME_EXCEEDED:
 459                if (code != ICMP_EXC_TTL)
 460                        return;
 461                break;
 462        }
 463
 464        /* Prepare fake skb to feed it to icmp_send */
 465        skb2 = skb_clone(skb, GFP_ATOMIC);
 466        if (skb2 == NULL)
 467                return;
 468        dst_release(skb2->dst);
 469        skb2->dst = NULL;
 470        skb_pull(skb2, skb->data - (u8*)eiph);
 471        skb2->nh.raw = skb2->data;
 472
 473        /* Try to guess incoming interface */
 474        if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
 475                kfree_skb(skb2);
 476                return;
 477        }
 478        skb2->dev = rt->u.dst.dev;
 479
 480        /* route "incoming" packet */
 481        if (rt->rt_flags&RTCF_LOCAL) {
 482                ip_rt_put(rt);
 483                rt = NULL;
 484                if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
 485                    rt->u.dst.dev->type != ARPHRD_IPGRE) {
 486                        ip_rt_put(rt);
 487                        kfree_skb(skb2);
 488                        return;
 489                }
 490        } else {
 491                ip_rt_put(rt);
 492                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
 493                    skb2->dst->dev->type != ARPHRD_IPGRE) {
 494                        kfree_skb(skb2);
 495                        return;
 496                }
 497        }
 498
 499        /* change mtu on this route */
 500        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 501                if (rel_info > skb2->dst->pmtu) {
 502                        kfree_skb(skb2);
 503                        return;
 504                }
 505                skb2->dst->pmtu = rel_info;
 506                rel_info = htonl(rel_info);
 507        } else if (type == ICMP_TIME_EXCEEDED) {
 508                struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
 509                if (t->parms.iph.ttl) {
 510                        rel_type = ICMP_DEST_UNREACH;
 511                        rel_code = ICMP_HOST_UNREACH;
 512                }
 513        }
 514
 515        icmp_send(skb2, rel_type, rel_code, rel_info);
 516        kfree_skb(skb2);
 517#endif
 518}
 519
 520int ipgre_rcv(struct sk_buff *skb, unsigned short len)
 521{
 522        struct iphdr *iph = skb->nh.iph;
 523        u8     *h = skb->h.raw;
 524        u16    flags = *(u16*)h;
 525        u16    csum = 0;
 526        u32    key = 0;
 527        u32    seqno = 0;
 528        struct ip_tunnel *tunnel;
 529        int    offset = 4;
 530
 531        if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
 532                /* - Version must be 0.
 533                   - We do not support routing headers.
 534                 */
 535                if (flags&(GRE_VERSION|GRE_ROUTING))
 536                        goto drop;
 537
 538                if (flags&GRE_CSUM) {
 539                        csum = ip_compute_csum(h, len);
 540                        offset += 4;
 541                }
 542                if (flags&GRE_KEY) {
 543                        key = *(u32*)(h + offset);
 544                        offset += 4;
 545                }
 546                if (flags&GRE_SEQ) {
 547                        seqno = ntohl(*(u32*)(h + offset));
 548                        offset += 4;
 549                }
 550        }
 551
 552        if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
 553                skb->mac.raw = skb->nh.raw;
 554                skb->nh.raw = skb_pull(skb, h + offset - skb->data);
 555                memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 556                skb->ip_summed = 0;
 557                skb->protocol = *(u16*)(h + 2);
 558                skb->pkt_type = PACKET_HOST;
 559#ifdef CONFIG_NET_IPGRE_BROADCAST
 560                if (MULTICAST(iph->daddr)) {
 561                        /* Looped back packet, drop it! */
 562                        if (((struct rtable*)skb->dst)->key.iif == 0)
 563                                goto drop;
 564                        tunnel->stat.multicast++;
 565                        skb->pkt_type = PACKET_BROADCAST;
 566                }
 567#endif
 568
 569                if (((flags&GRE_CSUM) && csum) ||
 570                    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
 571                        tunnel->stat.rx_crc_errors++;
 572                        tunnel->stat.rx_errors++;
 573                        goto drop;
 574                }
 575                if (tunnel->parms.i_flags&GRE_SEQ) {
 576                        if (!(flags&GRE_SEQ) ||
 577                            (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
 578                                tunnel->stat.rx_fifo_errors++;
 579                                tunnel->stat.rx_errors++;
 580                                goto drop;
 581                        }
 582                        tunnel->i_seqno = seqno + 1;
 583                }
 584                tunnel->stat.rx_packets++;
 585                tunnel->stat.rx_bytes += skb->len;
 586                skb->dev = tunnel->dev;
 587                dst_release(skb->dst);
 588                skb->dst = NULL;
 589                netif_rx(skb);
 590                return(0);
 591        }
 592        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
 593
 594drop:
 595        kfree_skb(skb);
 596        return(0);
 597}
 598
 599static int ipgre_tunnel_xmit(struct sk_buff *skb, struct device *dev)
 600{
 601        struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
 602        struct net_device_stats *stats = &tunnel->stat;
 603        struct iphdr  *old_iph = skb->nh.iph;
 604        struct iphdr  *tiph;
 605        u8     tos;
 606        u16    df;
 607        struct rtable *rt;                      /* Route to the other host */
 608        struct device *tdev;                    /* Device to other host */
 609        struct iphdr  *iph;                     /* Our new IP header */
 610        int    max_headroom;                    /* The extra header space needed */
 611        int    gre_hlen;
 612        u32    dst;
 613        int    mtu;
 614
 615        if (tunnel->recursion++) {
 616                tunnel->stat.collisions++;
 617                goto tx_error;
 618        }
 619
 620        if (dev->hard_header) {
 621                gre_hlen = 0;
 622                tiph = (struct iphdr*)skb->data;
 623        } else {
 624                gre_hlen = tunnel->hlen;
 625                tiph = &tunnel->parms.iph;
 626        }
 627
 628        if ((dst = tiph->daddr) == 0) {
 629                /* NBMA tunnel */
 630
 631                if (skb->dst == NULL) {
 632                        tunnel->stat.tx_fifo_errors++;
 633                        goto tx_error;
 634                }
 635
 636                if (skb->protocol == __constant_htons(ETH_P_IP)) {
 637                        rt = (struct rtable*)skb->dst;
 638                        if ((dst = rt->rt_gateway) == 0)
 639                                goto tx_error_icmp;
 640                }
 641#ifdef CONFIG_IPV6
 642                else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
 643                        struct in6_addr *addr6;
 644                        int addr_type;
 645                        struct neighbour *neigh = skb->dst->neighbour;
 646
 647                        if (neigh == NULL)
 648                                goto tx_error;
 649
 650                        addr6 = (struct in6_addr*)&neigh->primary_key;
 651                        addr_type = ipv6_addr_type(addr6);
 652
 653                        if (addr_type == IPV6_ADDR_ANY) {
 654                                addr6 = &skb->nh.ipv6h->daddr;
 655                                addr_type = ipv6_addr_type(addr6);
 656                        }
 657
 658                        if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
 659                                goto tx_error_icmp;
 660
 661                        dst = addr6->s6_addr32[3];
 662                }
 663#endif
 664                else
 665                        goto tx_error;
 666        }
 667
 668        tos = tiph->tos;
 669        if (tos&1) {
 670                if (skb->protocol == __constant_htons(ETH_P_IP))
 671                        tos = old_iph->tos;
 672                tos &= ~1;
 673        }
 674
 675        if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
 676                tunnel->stat.tx_carrier_errors++;
 677                goto tx_error;
 678        }
 679        tdev = rt->u.dst.dev;
 680
 681        if (tdev == dev) {
 682                ip_rt_put(rt);
 683                tunnel->stat.collisions++;
 684                goto tx_error;
 685        }
 686
 687        df = tiph->frag_off;
 688        mtu = rt->u.dst.pmtu - tunnel->hlen;
 689
 690        if (skb->protocol == __constant_htons(ETH_P_IP)) {
 691                if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
 692                        skb->dst->pmtu = mtu;
 693
 694                df |= (old_iph->frag_off&__constant_htons(IP_DF));
 695
 696                if ((old_iph->frag_off&__constant_htons(IP_DF)) &&
 697                    mtu < ntohs(old_iph->tot_len)) {
 698                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 699                        ip_rt_put(rt);
 700                        goto tx_error;
 701                }
 702        }
 703#ifdef CONFIG_IPV6
 704        else if (skb->protocol == __constant_htons(ETH_P_IPV6)) {
 705                struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
 706
 707                if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= IPV6_MIN_MTU) {
 708                        if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
 709                            rt6->rt6i_dst.plen == 128) {
 710                                rt6->rt6i_flags |= RTF_MODIFIED;
 711                                skb->dst->pmtu = mtu;
 712                        }
 713                }
 714
 715                if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
 716                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
 717                        ip_rt_put(rt);
 718                        goto tx_error;
 719                }
 720        }
 721#endif
 722
 723        if (tunnel->err_count > 0) {
 724                if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
 725                        tunnel->err_count--;
 726
 727                        dst_link_failure(skb);
 728                } else
 729                        tunnel->err_count = 0;
 730        }
 731
 732        skb->h.raw = skb->nh.raw;
 733
 734        max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen;
 735
 736        if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
 737                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 738                if (!new_skb) {
 739                        ip_rt_put(rt);
 740                        stats->tx_dropped++;
 741                        dev_kfree_skb(skb);
 742                        tunnel->recursion--;
 743                        return 0;
 744                }
 745                if (skb->sk)
 746                        skb_set_owner_w(new_skb, skb->sk);
 747                dev_kfree_skb(skb);
 748                skb = new_skb;
 749        }
 750
 751        skb->nh.raw = skb_push(skb, gre_hlen);
 752        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 753        dst_release(skb->dst);
 754        skb->dst = &rt->u.dst;
 755
 756        /*
 757         *      Push down and install the IPIP header.
 758         */
 759
 760        iph                     =       skb->nh.iph;
 761        iph->version            =       4;
 762        iph->ihl                =       sizeof(struct iphdr) >> 2;
 763        iph->frag_off           =       df;
 764        iph->protocol           =       IPPROTO_GRE;
 765        iph->tos                =       tos;
 766        iph->daddr              =       rt->rt_dst;
 767        iph->saddr              =       rt->rt_src;
 768
 769        if ((iph->ttl = tiph->ttl) == 0) {
 770                if (skb->protocol == __constant_htons(ETH_P_IP))
 771                        iph->ttl = old_iph->ttl;
 772#ifdef CONFIG_IPV6
 773                else if (skb->protocol == __constant_htons(ETH_P_IPV6))
 774                        iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
 775#endif
 776                else
 777                        iph->ttl = ip_statistics.IpDefaultTTL;
 778        }
 779
 780        ((u16*)(iph+1))[0] = tunnel->parms.o_flags;
 781        ((u16*)(iph+1))[1] = skb->protocol;
 782
 783        if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
 784                u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4);
 785
 786                if (tunnel->parms.o_flags&GRE_SEQ) {
 787                        ++tunnel->o_seqno;
 788                        *ptr = htonl(tunnel->o_seqno);
 789                        ptr--;
 790                }
 791                if (tunnel->parms.o_flags&GRE_KEY) {
 792                        *ptr = tunnel->parms.o_key;
 793                        ptr--;
 794                }
 795                if (tunnel->parms.o_flags&GRE_CSUM) {
 796                        *ptr = 0;
 797                        *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
 798                }
 799        }
 800
 801        iph->tot_len            =       htons(skb->len);
 802        iph->id                 =       htons(ip_id_count++);
 803        ip_send_check(iph);
 804
 805        stats->tx_bytes += skb->len;
 806        stats->tx_packets++;
 807        ip_send(skb);
 808        tunnel->recursion--;
 809        return 0;
 810
 811tx_error_icmp:
 812        dst_link_failure(skb);
 813
 814tx_error:
 815        stats->tx_errors++;
 816        dev_kfree_skb(skb);
 817        tunnel->recursion--;
 818        return 0;
 819}
 820
 821static int
 822ipgre_tunnel_ioctl (struct device *dev, struct ifreq *ifr, int cmd)
 823{
 824        int err = 0;
 825        struct ip_tunnel_parm p;
 826        struct ip_tunnel *t;
 827
 828        MOD_INC_USE_COUNT;
 829
 830        switch (cmd) {
 831        case SIOCGETTUNNEL:
 832                t = NULL;
 833                if (dev == &ipgre_fb_tunnel_dev) {
 834                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 835                                err = -EFAULT;
 836                                break;
 837                        }
 838                        t = ipgre_tunnel_locate(&p, 0);
 839                }
 840                if (t == NULL)
 841                        t = (struct ip_tunnel*)dev->priv;
 842                memcpy(&p, &t->parms, sizeof(p));
 843                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 844                        err = -EFAULT;
 845                break;
 846
 847        case SIOCADDTUNNEL:
 848        case SIOCCHGTUNNEL:
 849                err = -EPERM;
 850                if (!capable(CAP_NET_ADMIN))
 851                        goto done;
 852
 853                err = -EFAULT;
 854                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 855                        goto done;
 856
 857                err = -EINVAL;
 858                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
 859                    p.iph.ihl != 5 || (p.iph.frag_off&__constant_htons(~IP_DF)) ||
 860                    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
 861                        goto done;
 862                if (p.iph.ttl)
 863                        p.iph.frag_off |= __constant_htons(IP_DF);
 864
 865                if (!(p.i_flags&GRE_KEY))
 866                        p.i_key = 0;
 867                if (!(p.o_flags&GRE_KEY))
 868                        p.o_key = 0;
 869
 870                t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
 871
 872                if (dev != &ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL &&
 873                    t != &ipgre_fb_tunnel) {
 874                        if (t != NULL) {
 875                                if (t->dev != dev) {
 876                                        err = -EEXIST;
 877                                        break;
 878                                }
 879                        } else {
 880                                unsigned nflags=0;
 881
 882                                t = (struct ip_tunnel*)dev->priv;
 883
 884                                if (MULTICAST(p.iph.daddr))
 885                                        nflags = IFF_BROADCAST;
 886                                else if (p.iph.daddr)
 887                                        nflags = IFF_POINTOPOINT;
 888
 889                                if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
 890                                        err = -EINVAL;
 891                                        break;
 892                                }
 893                                start_bh_atomic();
 894                                ipgre_tunnel_unlink(t);
 895                                t->parms.iph.saddr = p.iph.saddr;
 896                                t->parms.iph.daddr = p.iph.daddr;
 897                                t->parms.i_key = p.i_key;
 898                                t->parms.o_key = p.o_key;
 899                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
 900                                memcpy(dev->broadcast, &p.iph.daddr, 4);
 901                                ipgre_tunnel_link(t);
 902                                end_bh_atomic();
 903                                netdev_state_change(dev);
 904                        }
 905                }
 906
 907                if (t) {
 908                        err = 0;
 909                        if (cmd == SIOCCHGTUNNEL) {
 910                                t->parms.iph.ttl = p.iph.ttl;
 911                                t->parms.iph.tos = p.iph.tos;
 912                                t->parms.iph.frag_off = p.iph.frag_off;
 913                        }
 914                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 915                                err = -EFAULT;
 916                } else
 917                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 918                break;
 919
 920        case SIOCDELTUNNEL:
 921                err = -EPERM;
 922                if (!capable(CAP_NET_ADMIN))
 923                        goto done;
 924
 925                if (dev == &ipgre_fb_tunnel_dev) {
 926                        err = -EFAULT;
 927                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 928                                goto done;
 929                        err = -ENOENT;
 930                        if ((t = ipgre_tunnel_locate(&p, 0)) == NULL)
 931                                goto done;
 932                        err = -EPERM;
 933                        if (t == &ipgre_fb_tunnel)
 934                                goto done;
 935                }
 936                err = unregister_netdevice(dev);
 937                break;
 938
 939        default:
 940                err = -EINVAL;
 941        }
 942
 943done:
 944        MOD_DEC_USE_COUNT;
 945        return err;
 946}
 947
 948static struct net_device_stats *ipgre_tunnel_get_stats(struct device *dev)
 949{
 950        return &(((struct ip_tunnel*)dev->priv)->stat);
 951}
 952
 953static int ipgre_tunnel_change_mtu(struct device *dev, int new_mtu)
 954{
 955        struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
 956        if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen)
 957                return -EINVAL;
 958        dev->mtu = new_mtu;
 959        return 0;
 960}
 961
 962#ifdef CONFIG_NET_IPGRE_BROADCAST
 963/* Nice toy. Unfortunately, useless in real life :-)
 964   It allows to construct virtual multiprotocol broadcast "LAN"
 965   over the Internet, provided multicast routing is tuned.
 966
 967
 968   I have no idea was this bicycle invented before me,
 969   so that I had to set ARPHRD_IPGRE to a random value.
 970   I have an impression, that Cisco could make something similar,
 971   but this feature is apparently missing in IOS<=11.2(8).
 972   
 973   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
 974   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
 975
 976   ping -t 255 224.66.66.66
 977
 978   If nobody answers, mbone does not work.
 979
 980   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
 981   ip addr add 10.66.66.<somewhat>/24 dev Universe
 982   ifconfig Universe up
 983   ifconfig Universe add fe80::<Your_real_addr>/10
 984   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
 985   ftp 10.66.66.66
 986   ...
 987   ftp fec0:6666:6666::193.233.7.65
 988   ...
 989
 990 */
 991
 992static int ipgre_header(struct sk_buff *skb, struct device *dev, unsigned short type,
 993                        void *daddr, void *saddr, unsigned len)
 994{
 995        struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
 996        struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
 997        u16 *p = (u16*)(iph+1);
 998
 999        memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1000        p[0]            = t->parms.o_flags;
1001        p[1]            = htons(type);
1002
1003        /*
1004         *      Set the source hardware address. 
1005         */
1006         
1007        if (saddr)
1008                memcpy(&iph->saddr, saddr, 4);
1009
1010        if (daddr) {
1011                memcpy(&iph->daddr, daddr, 4);
1012                return t->hlen;
1013        }
1014        if (iph->daddr && !MULTICAST(iph->daddr))
1015                return t->hlen;
1016        
1017        return -t->hlen;
1018}
1019
1020static int ipgre_open(struct device *dev)
1021{
1022        struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
1023
1024        MOD_INC_USE_COUNT;
1025        if (MULTICAST(t->parms.iph.daddr)) {
1026                struct rtable *rt;
1027                if (ip_route_output(&rt, t->parms.iph.daddr,
1028                                    t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), 
1029                                    t->parms.link)) {
1030                        MOD_DEC_USE_COUNT;
1031                        return -EADDRNOTAVAIL;
1032                }
1033                dev = rt->u.dst.dev;
1034                ip_rt_put(rt);
1035                if (dev->ip_ptr == NULL) {
1036                        MOD_DEC_USE_COUNT;
1037                        return -EADDRNOTAVAIL;
1038                }
1039                t->mlink = dev->ifindex;
1040                ip_mc_inc_group(dev->ip_ptr, t->parms.iph.daddr);
1041        }
1042        return 0;
1043}
1044
1045static int ipgre_close(struct device *dev)
1046{
1047        struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
1048        if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
1049                dev = dev_get_by_index(t->mlink);
1050                if (dev && dev->ip_ptr)
1051                        ip_mc_dec_group(dev->ip_ptr, t->parms.iph.daddr);
1052        }
1053        MOD_DEC_USE_COUNT;
1054        return 0;
1055}
1056
1057#endif
1058
1059static void ipgre_tunnel_init_gen(struct device *dev)
1060{
1061        struct ip_tunnel *t = (struct ip_tunnel*)dev->priv;
1062
1063        dev->destructor         = ipgre_tunnel_destroy;
1064        dev->hard_start_xmit    = ipgre_tunnel_xmit;
1065        dev->get_stats          = ipgre_tunnel_get_stats;
1066        dev->do_ioctl           = ipgre_tunnel_ioctl;
1067        dev->change_mtu         = ipgre_tunnel_change_mtu;
1068
1069        dev_init_buffers(dev);
1070
1071        dev->type               = ARPHRD_IPGRE;
1072        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1073        dev->mtu                = 1500 - sizeof(struct iphdr) - 4;
1074        dev->flags              = IFF_NOARP;
1075        dev->iflink             = 0;
1076        dev->addr_len           = 4;
1077        memcpy(dev->dev_addr, &t->parms.iph.saddr, 4);
1078        memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
1079}
1080
1081static int ipgre_tunnel_init(struct device *dev)
1082{
1083        struct device *tdev = NULL;
1084        struct ip_tunnel *tunnel;
1085        struct iphdr *iph;
1086        int hlen = LL_MAX_HEADER;
1087        int mtu = 1500;
1088        int addend = sizeof(struct iphdr) + 4;
1089
1090        tunnel = (struct ip_tunnel*)dev->priv;
1091        iph = &tunnel->parms.iph;
1092
1093        ipgre_tunnel_init_gen(dev);
1094
1095        /* Guess output device to choose reasonable mtu and hard_header_len */
1096
1097        if (iph->daddr) {
1098                struct rtable *rt;
1099                if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
1100                        tdev = rt->u.dst.dev;
1101                        ip_rt_put(rt);
1102                }
1103
1104                dev->flags |= IFF_POINTOPOINT;
1105
1106#ifdef CONFIG_NET_IPGRE_BROADCAST
1107                if (MULTICAST(iph->daddr)) {
1108                        if (!iph->saddr)
1109                                return -EINVAL;
1110                        dev->flags = IFF_BROADCAST;
1111                        dev->hard_header = ipgre_header;
1112                        dev->open = ipgre_open;
1113                        dev->stop = ipgre_close;
1114                }
1115#endif
1116        }
1117
1118        if (!tdev && tunnel->parms.link)
1119                tdev = dev_get_by_index(tunnel->parms.link);
1120
1121        if (tdev) {
1122                hlen = tdev->hard_header_len;
1123                mtu = tdev->mtu;
1124        }
1125        dev->iflink = tunnel->parms.link;
1126
1127        /* Precalculate GRE options length */
1128        if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1129                if (tunnel->parms.o_flags&GRE_CSUM)
1130                        addend += 4;
1131                if (tunnel->parms.o_flags&GRE_KEY)
1132                        addend += 4;
1133                if (tunnel->parms.o_flags&GRE_SEQ)
1134                        addend += 4;
1135        }
1136        dev->hard_header_len = hlen + addend;
1137        dev->mtu = mtu - addend;
1138        tunnel->hlen = addend;
1139        return 0;
1140}
1141
1142#ifdef MODULE
1143static int ipgre_fb_tunnel_open(struct device *dev)
1144{
1145        MOD_INC_USE_COUNT;
1146        return 0;
1147}
1148
1149static int ipgre_fb_tunnel_close(struct device *dev)
1150{
1151        MOD_DEC_USE_COUNT;
1152        return 0;
1153}
1154#endif
1155
1156__initfunc(int ipgre_fb_tunnel_init(struct device *dev))
1157{
1158        struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
1159        struct iphdr *iph;
1160
1161        ipgre_tunnel_init_gen(dev);
1162#ifdef MODULE
1163        dev->open               = ipgre_fb_tunnel_open;
1164        dev->stop               = ipgre_fb_tunnel_close;
1165#endif
1166
1167        iph = &ipgre_fb_tunnel.parms.iph;
1168        iph->version            = 4;
1169        iph->protocol           = IPPROTO_GRE;
1170        iph->ihl                = 5;
1171        tunnel->hlen            = sizeof(struct iphdr) + 4;
1172
1173        tunnels_wc[0]           = &ipgre_fb_tunnel;
1174        return 0;
1175}
1176
1177
1178static struct inet_protocol ipgre_protocol = {
1179  ipgre_rcv,             /* GRE handler          */
1180  ipgre_err,             /* TUNNEL error control */
1181  0,                    /* next                 */
1182  IPPROTO_GRE,          /* protocol ID          */
1183  0,                    /* copy                 */
1184  NULL,                 /* data                 */
1185  "GRE"                 /* name                 */
1186};
1187
1188
1189/*
1190 *      And now the modules code and kernel interface.
1191 */
1192
1193#ifdef MODULE
1194int init_module(void) 
1195#else
1196__initfunc(int ipgre_init(void))
1197#endif
1198{
1199        printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1200
1201        ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel;
1202        ipgre_fb_tunnel_dev.name = ipgre_fb_tunnel.parms.name;
1203#ifdef MODULE
1204        register_netdev(&ipgre_fb_tunnel_dev);
1205#else
1206        register_netdevice(&ipgre_fb_tunnel_dev);
1207#endif
1208
1209        inet_add_protocol(&ipgre_protocol);
1210        return 0;
1211}
1212
1213#ifdef MODULE
1214
1215void cleanup_module(void)
1216{
1217        if ( inet_del_protocol(&ipgre_protocol) < 0 )
1218                printk(KERN_INFO "ipgre close: can't remove protocol\n");
1219
1220        unregister_netdev(&ipgre_fb_tunnel_dev);
1221}
1222
1223#endif
1224
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.