linux-bk/net/ipv4/fib_semantics.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 Forwarding Information Base: semantics.
   7 *
   8 * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
   9 *
  10 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 */
  17
  18#include <linux/config.h>
  19#include <asm/uaccess.h>
  20#include <asm/system.h>
  21#include <asm/bitops.h>
  22#include <linux/types.h>
  23#include <linux/kernel.h>
  24#include <linux/jiffies.h>
  25#include <linux/mm.h>
  26#include <linux/string.h>
  27#include <linux/socket.h>
  28#include <linux/sockios.h>
  29#include <linux/errno.h>
  30#include <linux/in.h>
  31#include <linux/inet.h>
  32#include <linux/netdevice.h>
  33#include <linux/if_arp.h>
  34#include <linux/proc_fs.h>
  35#include <linux/skbuff.h>
  36#include <linux/netlink.h>
  37#include <linux/init.h>
  38
  39#include <net/ip.h>
  40#include <net/protocol.h>
  41#include <net/route.h>
  42#include <net/tcp.h>
  43#include <net/sock.h>
  44#include <net/ip_fib.h>
  45
  46#define FSprintk(a...)
  47
  48static struct fib_info  *fib_info_list;
  49static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
  50int fib_info_cnt;
  51
  52#define for_fib_info() { struct fib_info *fi; \
  53        for (fi = fib_info_list; fi; fi = fi->fib_next)
  54
  55#define endfor_fib_info() }
  56
  57#ifdef CONFIG_IP_ROUTE_MULTIPATH
  58
  59static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
  60
  61#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
  62for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
  63
  64#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
  65for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
  66
  67#else /* CONFIG_IP_ROUTE_MULTIPATH */
  68
  69/* Hope, that gcc will optimize it to get rid of dummy loop */
  70
  71#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
  72for (nhsel=0; nhsel < 1; nhsel++)
  73
  74#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
  75for (nhsel=0; nhsel < 1; nhsel++)
  76
  77#endif /* CONFIG_IP_ROUTE_MULTIPATH */
  78
  79#define endfor_nexthops(fi) }
  80
  81
  82static struct 
  83{
  84        int     error;
  85        u8      scope;
  86} fib_props[RTA_MAX + 1] = {
  87        {
  88                .error  = 0,
  89                .scope  = RT_SCOPE_NOWHERE,
  90        },      /* RTN_UNSPEC */
  91        {
  92                .error  = 0,
  93                .scope  = RT_SCOPE_UNIVERSE,
  94        },      /* RTN_UNICAST */
  95        {
  96                .error  = 0,
  97                .scope  = RT_SCOPE_HOST,
  98        },      /* RTN_LOCAL */
  99        {
 100                .error  = 0,
 101                .scope  = RT_SCOPE_LINK,
 102        },      /* RTN_BROADCAST */
 103        {
 104                .error  = 0,
 105                .scope  = RT_SCOPE_LINK,
 106        },      /* RTN_ANYCAST */
 107        {
 108                .error  = 0,
 109                .scope  = RT_SCOPE_UNIVERSE,
 110        },      /* RTN_MULTICAST */
 111        {
 112                .error  = -EINVAL,
 113                .scope  = RT_SCOPE_UNIVERSE,
 114        },      /* RTN_BLACKHOLE */
 115        {
 116                .error  = -EHOSTUNREACH,
 117                .scope  = RT_SCOPE_UNIVERSE,
 118        },      /* RTN_UNREACHABLE */
 119        {
 120                .error  = -EACCES,
 121                .scope  = RT_SCOPE_UNIVERSE,
 122        },      /* RTN_PROHIBIT */
 123        {
 124                .error  = -EAGAIN,
 125                .scope  = RT_SCOPE_UNIVERSE,
 126        },      /* RTN_THROW */
 127#ifdef CONFIG_IP_ROUTE_NAT
 128        {
 129                .error  = 0,
 130                .scope  = RT_SCOPE_HOST,
 131        },      /* RTN_NAT */
 132#else
 133        {
 134                .error  = -EINVAL,
 135                .scope  = RT_SCOPE_NOWHERE,
 136        },      /* RTN_NAT */
 137#endif
 138        {
 139                .error  = -EINVAL,
 140                .scope  = RT_SCOPE_NOWHERE,
 141        },      /* RTN_XRESOLVE */
 142};
 143
 144
 145/* Release a nexthop info record */
 146
 147void free_fib_info(struct fib_info *fi)
 148{
 149        if (fi->fib_dead == 0) {
 150                printk("Freeing alive fib_info %p\n", fi);
 151                return;
 152        }
 153        change_nexthops(fi) {
 154                if (nh->nh_dev)
 155                        dev_put(nh->nh_dev);
 156                nh->nh_dev = NULL;
 157        } endfor_nexthops(fi);
 158        fib_info_cnt--;
 159        kfree(fi);
 160}
 161
 162void fib_release_info(struct fib_info *fi)
 163{
 164        write_lock(&fib_info_lock);
 165        if (fi && --fi->fib_treeref == 0) {
 166                if (fi->fib_next)
 167                        fi->fib_next->fib_prev = fi->fib_prev;
 168                if (fi->fib_prev)
 169                        fi->fib_prev->fib_next = fi->fib_next;
 170                if (fi == fib_info_list)
 171                        fib_info_list = fi->fib_next;
 172                fi->fib_dead = 1;
 173                fib_info_put(fi);
 174        }
 175        write_unlock(&fib_info_lock);
 176}
 177
 178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 179{
 180        const struct fib_nh *onh = ofi->fib_nh;
 181
 182        for_nexthops(fi) {
 183                if (nh->nh_oif != onh->nh_oif ||
 184                    nh->nh_gw  != onh->nh_gw ||
 185                    nh->nh_scope != onh->nh_scope ||
 186#ifdef CONFIG_IP_ROUTE_MULTIPATH
 187                    nh->nh_weight != onh->nh_weight ||
 188#endif
 189#ifdef CONFIG_NET_CLS_ROUTE
 190                    nh->nh_tclassid != onh->nh_tclassid ||
 191#endif
 192                    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
 193                        return -1;
 194                onh++;
 195        } endfor_nexthops(fi);
 196        return 0;
 197}
 198
 199static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
 200{
 201        for_fib_info() {
 202                if (fi->fib_nhs != nfi->fib_nhs)
 203                        continue;
 204                if (nfi->fib_protocol == fi->fib_protocol &&
 205                    nfi->fib_prefsrc == fi->fib_prefsrc &&
 206                    nfi->fib_priority == fi->fib_priority &&
 207                    memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
 208                    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
 209                    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
 210                        return fi;
 211        } endfor_fib_info();
 212        return NULL;
 213}
 214
 215/* Check, that the gateway is already configured.
 216   Used only by redirect accept routine.
 217 */
 218
 219int ip_fib_check_default(u32 gw, struct net_device *dev)
 220{
 221        read_lock(&fib_info_lock);
 222        for_fib_info() {
 223                if (fi->fib_flags & RTNH_F_DEAD)
 224                        continue;
 225                for_nexthops(fi) {
 226                        if (nh->nh_dev == dev && nh->nh_gw == gw &&
 227                            nh->nh_scope == RT_SCOPE_LINK &&
 228                            !(nh->nh_flags&RTNH_F_DEAD)) {
 229                                read_unlock(&fib_info_lock);
 230                                return 0;
 231                        }
 232                } endfor_nexthops(fi);
 233        } endfor_fib_info();
 234        read_unlock(&fib_info_lock);
 235        return -1;
 236}
 237
 238#ifdef CONFIG_IP_ROUTE_MULTIPATH
 239
 240static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
 241{
 242        while (RTA_OK(attr,attrlen)) {
 243                if (attr->rta_type == type)
 244                        return *(u32*)RTA_DATA(attr);
 245                attr = RTA_NEXT(attr, attrlen);
 246        }
 247        return 0;
 248}
 249
 250static int
 251fib_count_nexthops(struct rtattr *rta)
 252{
 253        int nhs = 0;
 254        struct rtnexthop *nhp = RTA_DATA(rta);
 255        int nhlen = RTA_PAYLOAD(rta);
 256
 257        while (nhlen >= (int)sizeof(struct rtnexthop)) {
 258                if ((nhlen -= nhp->rtnh_len) < 0)
 259                        return 0;
 260                nhs++;
 261                nhp = RTNH_NEXT(nhp);
 262        };
 263        return nhs;
 264}
 265
 266static int
 267fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
 268{
 269        struct rtnexthop *nhp = RTA_DATA(rta);
 270        int nhlen = RTA_PAYLOAD(rta);
 271
 272        change_nexthops(fi) {
 273                int attrlen = nhlen - sizeof(struct rtnexthop);
 274                if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
 275                        return -EINVAL;
 276                nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
 277                nh->nh_oif = nhp->rtnh_ifindex;
 278                nh->nh_weight = nhp->rtnh_hops + 1;
 279                if (attrlen) {
 280                        nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
 281#ifdef CONFIG_NET_CLS_ROUTE
 282                        nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
 283#endif
 284                }
 285                nhp = RTNH_NEXT(nhp);
 286        } endfor_nexthops(fi);
 287        return 0;
 288}
 289
 290#endif
 291
 292int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
 293                 struct fib_info *fi)
 294{
 295#ifdef CONFIG_IP_ROUTE_MULTIPATH
 296        struct rtnexthop *nhp;
 297        int nhlen;
 298#endif
 299
 300        if (rta->rta_priority &&
 301            *rta->rta_priority != fi->fib_priority)
 302                return 1;
 303
 304        if (rta->rta_oif || rta->rta_gw) {
 305                if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
 306                    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
 307                        return 0;
 308                return 1;
 309        }
 310
 311#ifdef CONFIG_IP_ROUTE_MULTIPATH
 312        if (rta->rta_mp == NULL)
 313                return 0;
 314        nhp = RTA_DATA(rta->rta_mp);
 315        nhlen = RTA_PAYLOAD(rta->rta_mp);
 316        
 317        for_nexthops(fi) {
 318                int attrlen = nhlen - sizeof(struct rtnexthop);
 319                u32 gw;
 320
 321                if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
 322                        return -EINVAL;
 323                if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
 324                        return 1;
 325                if (attrlen) {
 326                        gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
 327                        if (gw && gw != nh->nh_gw)
 328                                return 1;
 329#ifdef CONFIG_NET_CLS_ROUTE
 330                        gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
 331                        if (gw && gw != nh->nh_tclassid)
 332                                return 1;
 333#endif
 334                }
 335                nhp = RTNH_NEXT(nhp);
 336        } endfor_nexthops(fi);
 337#endif
 338        return 0;
 339}
 340
 341
 342/*
 343   Picture
 344   -------
 345
 346   Semantics of nexthop is very messy by historical reasons.
 347   We have to take into account, that:
 348   a) gateway can be actually local interface address,
 349      so that gatewayed route is direct.
 350   b) gateway must be on-link address, possibly
 351      described not by an ifaddr, but also by a direct route.
 352   c) If both gateway and interface are specified, they should not
 353      contradict.
 354   d) If we use tunnel routes, gateway could be not on-link.
 355
 356   Attempt to reconcile all of these (alas, self-contradictory) conditions
 357   results in pretty ugly and hairy code with obscure logic.
 358
 359   I chose to generalized it instead, so that the size
 360   of code does not increase practically, but it becomes
 361   much more general.
 362   Every prefix is assigned a "scope" value: "host" is local address,
 363   "link" is direct route,
 364   [ ... "site" ... "interior" ... ]
 365   and "universe" is true gateway route with global meaning.
 366
 367   Every prefix refers to a set of "nexthop"s (gw, oif),
 368   where gw must have narrower scope. This recursion stops
 369   when gw has LOCAL scope or if "nexthop" is declared ONLINK,
 370   which means that gw is forced to be on link.
 371
 372   Code is still hairy, but now it is apparently logically
 373   consistent and very flexible. F.e. as by-product it allows
 374   to co-exists in peace independent exterior and interior
 375   routing processes.
 376
 377   Normally it looks as following.
 378
 379   {universe prefix}  -> (gw, oif) [scope link]
 380                          |
 381                          |-> {link prefix} -> (gw, oif) [scope local]
 382                                                |
 383                                                |-> {local prefix} (terminal node)
 384 */
 385
 386static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
 387{
 388        int err;
 389
 390        if (nh->nh_gw) {
 391                struct fib_result res;
 392
 393#ifdef CONFIG_IP_ROUTE_PERVASIVE
 394                if (nh->nh_flags&RTNH_F_PERVASIVE)
 395                        return 0;
 396#endif
 397                if (nh->nh_flags&RTNH_F_ONLINK) {
 398                        struct net_device *dev;
 399
 400                        if (r->rtm_scope >= RT_SCOPE_LINK)
 401                                return -EINVAL;
 402                        if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
 403                                return -EINVAL;
 404                        if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
 405                                return -ENODEV;
 406                        if (!(dev->flags&IFF_UP))
 407                                return -ENETDOWN;
 408                        nh->nh_dev = dev;
 409                        dev_hold(dev);
 410                        nh->nh_scope = RT_SCOPE_LINK;
 411                        return 0;
 412                }
 413                {
 414                        struct flowi fl = { .nl_u = { .ip4_u =
 415                                                      { .daddr = nh->nh_gw,
 416                                                        .scope = r->rtm_scope + 1 } },
 417                                            .oif = nh->nh_oif };
 418
 419                        /* It is not necessary, but requires a bit of thinking */
 420                        if (fl.fl4_scope < RT_SCOPE_LINK)
 421                                fl.fl4_scope = RT_SCOPE_LINK;
 422                        if ((err = fib_lookup(&fl, &res)) != 0)
 423                                return err;
 424                }
 425                err = -EINVAL;
 426                if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
 427                        goto out;
 428                nh->nh_scope = res.scope;
 429                nh->nh_oif = FIB_RES_OIF(res);
 430                if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
 431                        goto out;
 432                dev_hold(nh->nh_dev);
 433                err = -ENETDOWN;
 434                if (!(nh->nh_dev->flags & IFF_UP))
 435                        goto out;
 436                err = 0;
 437out:
 438                fib_res_put(&res);
 439                return err;
 440        } else {
 441                struct in_device *in_dev;
 442
 443                if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
 444                        return -EINVAL;
 445
 446                in_dev = inetdev_by_index(nh->nh_oif);
 447                if (in_dev == NULL)
 448                        return -ENODEV;
 449                if (!(in_dev->dev->flags&IFF_UP)) {
 450                        in_dev_put(in_dev);
 451                        return -ENETDOWN;
 452                }
 453                nh->nh_dev = in_dev->dev;
 454                dev_hold(nh->nh_dev);
 455                nh->nh_scope = RT_SCOPE_HOST;
 456                in_dev_put(in_dev);
 457        }
 458        return 0;
 459}
 460
 461struct fib_info *
 462fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
 463                const struct nlmsghdr *nlh, int *errp)
 464{
 465        int err;
 466        struct fib_info *fi = NULL;
 467        struct fib_info *ofi;
 468#ifdef CONFIG_IP_ROUTE_MULTIPATH
 469        int nhs = 1;
 470#else
 471        const int nhs = 1;
 472#endif
 473
 474        /* Fast check to catch the most weird cases */
 475        if (fib_props[r->rtm_type].scope > r->rtm_scope)
 476                goto err_inval;
 477
 478#ifdef CONFIG_IP_ROUTE_MULTIPATH
 479        if (rta->rta_mp) {
 480                nhs = fib_count_nexthops(rta->rta_mp);
 481                if (nhs == 0)
 482                        goto err_inval;
 483        }
 484#endif
 485
 486        fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
 487        err = -ENOBUFS;
 488        if (fi == NULL)
 489                goto failure;
 490        fib_info_cnt++;
 491        memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
 492
 493        fi->fib_protocol = r->rtm_protocol;
 494        fi->fib_nhs = nhs;
 495        fi->fib_flags = r->rtm_flags;
 496        if (rta->rta_priority)
 497                fi->fib_priority = *rta->rta_priority;
 498        if (rta->rta_mx) {
 499                int attrlen = RTA_PAYLOAD(rta->rta_mx);
 500                struct rtattr *attr = RTA_DATA(rta->rta_mx);
 501
 502                while (RTA_OK(attr, attrlen)) {
 503                        unsigned flavor = attr->rta_type;
 504                        if (flavor) {
 505                                if (flavor > RTAX_MAX)
 506                                        goto err_inval;
 507                                fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
 508                        }
 509                        attr = RTA_NEXT(attr, attrlen);
 510                }
 511        }
 512        if (rta->rta_prefsrc)
 513                memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
 514
 515        if (rta->rta_mp) {
 516#ifdef CONFIG_IP_ROUTE_MULTIPATH
 517                if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
 518                        goto failure;
 519                if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
 520                        goto err_inval;
 521                if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
 522                        goto err_inval;
 523#ifdef CONFIG_NET_CLS_ROUTE
 524                if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
 525                        goto err_inval;
 526#endif
 527#else
 528                goto err_inval;
 529#endif
 530        } else {
 531                struct fib_nh *nh = fi->fib_nh;
 532                if (rta->rta_oif)
 533                        nh->nh_oif = *rta->rta_oif;
 534                if (rta->rta_gw)
 535                        memcpy(&nh->nh_gw, rta->rta_gw, 4);
 536#ifdef CONFIG_NET_CLS_ROUTE
 537                if (rta->rta_flow)
 538                        memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
 539#endif
 540                nh->nh_flags = r->rtm_flags;
 541#ifdef CONFIG_IP_ROUTE_MULTIPATH
 542                nh->nh_weight = 1;
 543#endif
 544        }
 545
 546#ifdef CONFIG_IP_ROUTE_NAT
 547        if (r->rtm_type == RTN_NAT) {
 548                if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
 549                        goto err_inval;
 550                memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
 551                goto link_it;
 552        }
 553#endif
 554
 555        if (fib_props[r->rtm_type].error) {
 556                if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
 557                        goto err_inval;
 558                goto link_it;
 559        }
 560
 561        if (r->rtm_scope > RT_SCOPE_HOST)
 562                goto err_inval;
 563
 564        if (r->rtm_scope == RT_SCOPE_HOST) {
 565                struct fib_nh *nh = fi->fib_nh;
 566
 567                /* Local address is added. */
 568                if (nhs != 1 || nh->nh_gw)
 569                        goto err_inval;
 570                nh->nh_scope = RT_SCOPE_NOWHERE;
 571                nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
 572                err = -ENODEV;
 573                if (nh->nh_dev == NULL)
 574                        goto failure;
 575        } else {
 576                change_nexthops(fi) {
 577                        if ((err = fib_check_nh(r, fi, nh)) != 0)
 578                                goto failure;
 579                } endfor_nexthops(fi)
 580        }
 581
 582        if (fi->fib_prefsrc) {
 583                if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
 584                    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
 585                        if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
 586                                goto err_inval;
 587        }
 588
 589link_it:
 590        if ((ofi = fib_find_info(fi)) != NULL) {
 591                fi->fib_dead = 1;
 592                free_fib_info(fi);
 593                ofi->fib_treeref++;
 594                return ofi;
 595        }
 596
 597        fi->fib_treeref++;
 598        atomic_inc(&fi->fib_clntref);
 599        write_lock(&fib_info_lock);
 600        fi->fib_next = fib_info_list;
 601        fi->fib_prev = NULL;
 602        if (fib_info_list)
 603                fib_info_list->fib_prev = fi;
 604        fib_info_list = fi;
 605        write_unlock(&fib_info_lock);
 606        return fi;
 607
 608err_inval:
 609        err = -EINVAL;
 610
 611failure:
 612        *errp = err;
 613        if (fi) {
 614                fi->fib_dead = 1;
 615                free_fib_info(fi);
 616        }
 617        return NULL;
 618}
 619
 620int 
 621fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
 622{
 623        int err = fib_props[type].error;
 624
 625        if (err == 0) {
 626                if (fi->fib_flags&RTNH_F_DEAD)
 627                        return 1;
 628
 629                res->fi = fi;
 630
 631                switch (type) {
 632#ifdef CONFIG_IP_ROUTE_NAT
 633                case RTN_NAT:
 634                        FIB_RES_RESET(*res);
 635                        atomic_inc(&fi->fib_clntref);
 636                        return 0;
 637#endif
 638                case RTN_UNICAST:
 639                case RTN_LOCAL:
 640                case RTN_BROADCAST:
 641                case RTN_ANYCAST:
 642                case RTN_MULTICAST:
 643                        for_nexthops(fi) {
 644                                if (nh->nh_flags&RTNH_F_DEAD)
 645                                        continue;
 646                                if (!flp->oif || flp->oif == nh->nh_oif)
 647                                        break;
 648                        }
 649#ifdef CONFIG_IP_ROUTE_MULTIPATH
 650                        if (nhsel < fi->fib_nhs) {
 651                                res->nh_sel = nhsel;
 652                                atomic_inc(&fi->fib_clntref);
 653                                return 0;
 654                        }
 655#else
 656                        if (nhsel < 1) {
 657                                atomic_inc(&fi->fib_clntref);
 658                                return 0;
 659                        }
 660#endif
 661                        endfor_nexthops(fi);
 662                        res->fi = NULL;
 663                        return 1;
 664                default:
 665                        res->fi = NULL;
 666                        printk(KERN_DEBUG "impossible 102\n");
 667                        return -EINVAL;
 668                }
 669        }
 670        return err;
 671}
 672
 673/* Find appropriate source address to this destination */
 674
 675u32 __fib_res_prefsrc(struct fib_result *res)
 676{
 677        return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
 678}
 679
 680int
 681fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 682              u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
 683              struct fib_info *fi)
 684{
 685        struct rtmsg *rtm;
 686        struct nlmsghdr  *nlh;
 687        unsigned char    *b = skb->tail;
 688
 689        nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
 690        rtm = NLMSG_DATA(nlh);
 691        rtm->rtm_family = AF_INET;
 692        rtm->rtm_dst_len = dst_len;
 693        rtm->rtm_src_len = 0;
 694        rtm->rtm_tos = tos;
 695        rtm->rtm_table = tb_id;
 696        rtm->rtm_type = type;
 697        rtm->rtm_flags = fi->fib_flags;
 698        rtm->rtm_scope = scope;
 699        if (rtm->rtm_dst_len)
 700                RTA_PUT(skb, RTA_DST, 4, dst);
 701        rtm->rtm_protocol = fi->fib_protocol;
 702        if (fi->fib_priority)
 703                RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
 704#ifdef CONFIG_NET_CLS_ROUTE
 705        if (fi->fib_nh[0].nh_tclassid)
 706                RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
 707#endif
 708        if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
 709                goto rtattr_failure;
 710        if (fi->fib_prefsrc)
 711                RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
 712        if (fi->fib_nhs == 1) {
 713                if (fi->fib_nh->nh_gw)
 714                        RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
 715                if (fi->fib_nh->nh_oif)
 716                        RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
 717        }
 718#ifdef CONFIG_IP_ROUTE_MULTIPATH
 719        if (fi->fib_nhs > 1) {
 720                struct rtnexthop *nhp;
 721                struct rtattr *mp_head;
 722                if (skb_tailroom(skb) <= RTA_SPACE(0))
 723                        goto rtattr_failure;
 724                mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
 725
 726                for_nexthops(fi) {
 727                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 728                                goto rtattr_failure;
 729                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 730                        nhp->rtnh_flags = nh->nh_flags & 0xFF;
 731                        nhp->rtnh_hops = nh->nh_weight-1;
 732                        nhp->rtnh_ifindex = nh->nh_oif;
 733                        if (nh->nh_gw)
 734                                RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
 735                        nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
 736                } endfor_nexthops(fi);
 737                mp_head->rta_type = RTA_MULTIPATH;
 738                mp_head->rta_len = skb->tail - (u8*)mp_head;
 739        }
 740#endif
 741        nlh->nlmsg_len = skb->tail - b;
 742        return skb->len;
 743
 744nlmsg_failure:
 745rtattr_failure:
 746        skb_trim(skb, b - skb->data);
 747        return -1;
 748}
 749
 750#ifndef CONFIG_IP_NOSIOCRT
 751
 752int
 753fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
 754                    struct kern_rta *rta, struct rtentry *r)
 755{
 756        int    plen;
 757        u32    *ptr;
 758
 759        memset(rtm, 0, sizeof(*rtm));
 760        memset(rta, 0, sizeof(*rta));
 761
 762        if (r->rt_dst.sa_family != AF_INET)
 763                return -EAFNOSUPPORT;
 764
 765        /* Check mask for validity:
 766           a) it must be contiguous.
 767           b) destination must have all host bits clear.
 768           c) if application forgot to set correct family (AF_INET),
 769              reject request unless it is absolutely clear i.e.
 770              both family and mask are zero.
 771         */
 772        plen = 32;
 773        ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
 774        if (!(r->rt_flags&RTF_HOST)) {
 775                u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
 776                if (r->rt_genmask.sa_family != AF_INET) {
 777                        if (mask || r->rt_genmask.sa_family)
 778                                return -EAFNOSUPPORT;
 779                }
 780                if (bad_mask(mask, *ptr))
 781                        return -EINVAL;
 782                plen = inet_mask_len(mask);
 783        }
 784
 785        nl->nlmsg_flags = NLM_F_REQUEST;
 786        nl->nlmsg_pid = 0;
 787        nl->nlmsg_seq = 0;
 788        nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
 789        if (cmd == SIOCDELRT) {
 790                nl->nlmsg_type = RTM_DELROUTE;
 791                nl->nlmsg_flags = 0;
 792        } else {
 793                nl->nlmsg_type = RTM_NEWROUTE;
 794                nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
 795                rtm->rtm_protocol = RTPROT_BOOT;
 796        }
 797
 798        rtm->rtm_dst_len = plen;
 799        rta->rta_dst = ptr;
 800
 801        if (r->rt_metric) {
 802                *(u32*)&r->rt_pad3 = r->rt_metric - 1;
 803                rta->rta_priority = (u32*)&r->rt_pad3;
 804        }
 805        if (r->rt_flags&RTF_REJECT) {
 806                rtm->rtm_scope = RT_SCOPE_HOST;
 807                rtm->rtm_type = RTN_UNREACHABLE;
 808                return 0;
 809        }
 810        rtm->rtm_scope = RT_SCOPE_NOWHERE;
 811        rtm->rtm_type = RTN_UNICAST;
 812
 813        if (r->rt_dev) {
 814                char *colon;
 815                struct net_device *dev;
 816                char   devname[IFNAMSIZ];
 817
 818                if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
 819                        return -EFAULT;
 820                devname[IFNAMSIZ-1] = 0;
 821                colon = strchr(devname, ':');
 822                if (colon)
 823                        *colon = 0;
 824                dev = __dev_get_by_name(devname);
 825                if (!dev)
 826                        return -ENODEV;
 827                rta->rta_oif = &dev->ifindex;
 828                if (colon) {
 829                        struct in_ifaddr *ifa;
 830                        struct in_device *in_dev = __in_dev_get(dev);
 831                        if (!in_dev)
 832                                return -ENODEV;
 833                        *colon = ':';
 834                        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
 835                                if (strcmp(ifa->ifa_label, devname) == 0)
 836                                        break;
 837                        if (ifa == NULL)
 838                                return -ENODEV;
 839                        rta->rta_prefsrc = &ifa->ifa_local;
 840                }
 841        }
 842
 843        ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
 844        if (r->rt_gateway.sa_family == AF_INET && *ptr) {
 845                rta->rta_gw = ptr;
 846                if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
 847                        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
 848        }
 849
 850        if (cmd == SIOCDELRT)
 851                return 0;
 852
 853        if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
 854                return -EINVAL;
 855
 856        if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
 857                rtm->rtm_scope = RT_SCOPE_LINK;
 858
 859        if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
 860                struct rtattr *rec;
 861                struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
 862                if (mx == NULL)
 863                        return -ENOMEM;
 864                rta->rta_mx = mx;
 865                mx->rta_type = RTA_METRICS;
 866                mx->rta_len  = RTA_LENGTH(0);
 867                if (r->rt_flags&RTF_MTU) {
 868                        rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
 869                        rec->rta_type = RTAX_ADVMSS;
 870                        rec->rta_len = RTA_LENGTH(4);
 871                        mx->rta_len += RTA_LENGTH(4);
 872                        *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
 873                }
 874                if (r->rt_flags&RTF_WINDOW) {
 875                        rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
 876                        rec->rta_type = RTAX_WINDOW;
 877                        rec->rta_len = RTA_LENGTH(4);
 878                        mx->rta_len += RTA_LENGTH(4);
 879                        *(u32*)RTA_DATA(rec) = r->rt_window;
 880                }
 881                if (r->rt_flags&RTF_IRTT) {
 882                        rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
 883                        rec->rta_type = RTAX_RTT;
 884                        rec->rta_len = RTA_LENGTH(4);
 885                        mx->rta_len += RTA_LENGTH(4);
 886                        *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
 887                }
 888        }
 889        return 0;
 890}
 891
 892#endif
 893
 894/*
 895   Update FIB if:
 896   - local address disappeared -> we must delete all the entries
 897     referring to it.
 898   - device went down -> we must shutdown all nexthops going via it.
 899 */
 900
 901int fib_sync_down(u32 local, struct net_device *dev, int force)
 902{
 903        int ret = 0;
 904        int scope = RT_SCOPE_NOWHERE;
 905        
 906        if (force)
 907                scope = -1;
 908
 909        for_fib_info() {
 910                if (local && fi->fib_prefsrc == local) {
 911                        fi->fib_flags |= RTNH_F_DEAD;
 912                        ret++;
 913                } else if (dev && fi->fib_nhs) {
 914                        int dead = 0;
 915
 916                        change_nexthops(fi) {
 917                                if (nh->nh_flags&RTNH_F_DEAD)
 918                                        dead++;
 919                                else if (nh->nh_dev == dev &&
 920                                         nh->nh_scope != scope) {
 921                                        nh->nh_flags |= RTNH_F_DEAD;
 922#ifdef CONFIG_IP_ROUTE_MULTIPATH
 923                                        spin_lock_bh(&fib_multipath_lock);
 924                                        fi->fib_power -= nh->nh_power;
 925                                        nh->nh_power = 0;
 926                                        spin_unlock_bh(&fib_multipath_lock);
 927#endif
 928                                        dead++;
 929                                }
 930#ifdef CONFIG_IP_ROUTE_MULTIPATH
 931                                if (force > 1 && nh->nh_dev == dev) {
 932                                        dead = fi->fib_nhs;
 933                                        break;
 934                                }
 935#endif
 936                        } endfor_nexthops(fi)
 937                        if (dead == fi->fib_nhs) {
 938                                fi->fib_flags |= RTNH_F_DEAD;
 939                                ret++;
 940                        }
 941                }
 942        } endfor_fib_info();
 943        return ret;
 944}
 945
 946#ifdef CONFIG_IP_ROUTE_MULTIPATH
 947
 948/*
 949   Dead device goes up. We wake up dead nexthops.
 950   It takes sense only on multipath routes.
 951 */
 952
 953int fib_sync_up(struct net_device *dev)
 954{
 955        int ret = 0;
 956
 957        if (!(dev->flags&IFF_UP))
 958                return 0;
 959
 960        for_fib_info() {
 961                int alive = 0;
 962
 963                change_nexthops(fi) {
 964                        if (!(nh->nh_flags&RTNH_F_DEAD)) {
 965                                alive++;
 966                                continue;
 967                        }
 968                        if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
 969                                continue;
 970                        if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
 971                                continue;
 972                        alive++;
 973                        spin_lock_bh(&fib_multipath_lock);
 974                        nh->nh_power = 0;
 975                        nh->nh_flags &= ~RTNH_F_DEAD;
 976                        spin_unlock_bh(&fib_multipath_lock);
 977                } endfor_nexthops(fi)
 978
 979                if (alive > 0) {
 980                        fi->fib_flags &= ~RTNH_F_DEAD;
 981                        ret++;
 982                }
 983        } endfor_fib_info();
 984        return ret;
 985}
 986
 987/*
 988   The algorithm is suboptimal, but it provides really
 989   fair weighted route distribution.
 990 */
 991
 992void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
 993{
 994        struct fib_info *fi = res->fi;
 995        int w;
 996
 997        spin_lock_bh(&fib_multipath_lock);
 998        if (fi->fib_power <= 0) {
 999                int power = 0;
1000                change_nexthops(fi) {
1001                        if (!(nh->nh_flags&RTNH_F_DEAD)) {
1002                                power += nh->nh_weight;
1003                                nh->nh_power = nh->nh_weight;
1004                        }
1005                } endfor_nexthops(fi);
1006                fi->fib_power = power;
1007                if (power <= 0) {
1008                        spin_unlock_bh(&fib_multipath_lock);
1009                        /* Race condition: route has just become dead. */
1010                        res->nh_sel = 0;
1011                        return;
1012                }
1013        }
1014
1015
1016        /* w should be random number [0..fi->fib_power-1],
1017           it is pretty bad approximation.
1018         */
1019
1020        w = jiffies % fi->fib_power;
1021
1022        change_nexthops(fi) {
1023                if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1024                        if ((w -= nh->nh_power) <= 0) {
1025                                nh->nh_power--;
1026                                fi->fib_power--;
1027                                res->nh_sel = nhsel;
1028                                spin_unlock_bh(&fib_multipath_lock);
1029                                return;
1030                        }
1031                }
1032        } endfor_nexthops(fi);
1033
1034        /* Race condition: route has just become dead. */
1035        res->nh_sel = 0;
1036        spin_unlock_bh(&fib_multipath_lock);
1037}
1038#endif
1039
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.