linux/net/ipv4/fib_frontend.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 Forwarding Information Base: FIB frontend.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 *
  10 *              This program is free software; you can redistribute it and/or
  11 *              modify it under the terms of the GNU General Public License
  12 *              as published by the Free Software Foundation; either version
  13 *              2 of the License, or (at your option) any later version.
  14 */
  15
  16#include <linux/module.h>
  17#include <asm/uaccess.h>
  18#include <asm/system.h>
  19#include <linux/bitops.h>
  20#include <linux/capability.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/mm.h>
  24#include <linux/string.h>
  25#include <linux/socket.h>
  26#include <linux/sockios.h>
  27#include <linux/errno.h>
  28#include <linux/in.h>
  29#include <linux/inet.h>
  30#include <linux/inetdevice.h>
  31#include <linux/netdevice.h>
  32#include <linux/if_addr.h>
  33#include <linux/if_arp.h>
  34#include <linux/skbuff.h>
  35#include <linux/init.h>
  36#include <linux/list.h>
  37
  38#include <net/ip.h>
  39#include <net/protocol.h>
  40#include <net/route.h>
  41#include <net/tcp.h>
  42#include <net/sock.h>
  43#include <net/icmp.h>
  44#include <net/arp.h>
  45#include <net/ip_fib.h>
  46#include <net/rtnetlink.h>
  47
  48#ifndef CONFIG_IP_MULTIPLE_TABLES
  49
  50static int __net_init fib4_rules_init(struct net *net)
  51{
  52        struct fib_table *local_table, *main_table;
  53
  54        local_table = fib_hash_table(RT_TABLE_LOCAL);
  55        if (local_table == NULL)
  56                return -ENOMEM;
  57
  58        main_table  = fib_hash_table(RT_TABLE_MAIN);
  59        if (main_table == NULL)
  60                goto fail;
  61
  62        hlist_add_head_rcu(&local_table->tb_hlist,
  63                                &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
  64        hlist_add_head_rcu(&main_table->tb_hlist,
  65                                &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
  66        return 0;
  67
  68fail:
  69        kfree(local_table);
  70        return -ENOMEM;
  71}
  72#else
  73
  74struct fib_table *fib_new_table(struct net *net, u32 id)
  75{
  76        struct fib_table *tb;
  77        unsigned int h;
  78
  79        if (id == 0)
  80                id = RT_TABLE_MAIN;
  81        tb = fib_get_table(net, id);
  82        if (tb)
  83                return tb;
  84
  85        tb = fib_hash_table(id);
  86        if (!tb)
  87                return NULL;
  88        h = id & (FIB_TABLE_HASHSZ - 1);
  89        hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
  90        return tb;
  91}
  92
  93struct fib_table *fib_get_table(struct net *net, u32 id)
  94{
  95        struct fib_table *tb;
  96        struct hlist_node *node;
  97        struct hlist_head *head;
  98        unsigned int h;
  99
 100        if (id == 0)
 101                id = RT_TABLE_MAIN;
 102        h = id & (FIB_TABLE_HASHSZ - 1);
 103
 104        rcu_read_lock();
 105        head = &net->ipv4.fib_table_hash[h];
 106        hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
 107                if (tb->tb_id == id) {
 108                        rcu_read_unlock();
 109                        return tb;
 110                }
 111        }
 112        rcu_read_unlock();
 113        return NULL;
 114}
 115#endif /* CONFIG_IP_MULTIPLE_TABLES */
 116
 117void fib_select_default(struct net *net,
 118                        const struct flowi *flp, struct fib_result *res)
 119{
 120        struct fib_table *tb;
 121        int table = RT_TABLE_MAIN;
 122#ifdef CONFIG_IP_MULTIPLE_TABLES
 123        if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
 124                return;
 125        table = res->r->table;
 126#endif
 127        tb = fib_get_table(net, table);
 128        if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 129                tb->tb_select_default(tb, flp, res);
 130}
 131
 132static void fib_flush(struct net *net)
 133{
 134        int flushed = 0;
 135        struct fib_table *tb;
 136        struct hlist_node *node;
 137        struct hlist_head *head;
 138        unsigned int h;
 139
 140        for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 141                head = &net->ipv4.fib_table_hash[h];
 142                hlist_for_each_entry(tb, node, head, tb_hlist)
 143                        flushed += tb->tb_flush(tb);
 144        }
 145
 146        if (flushed)
 147                rt_cache_flush(net, -1);
 148}
 149
 150/*
 151 *      Find the first device with a given source address.
 152 */
 153
 154struct net_device * ip_dev_find(struct net *net, __be32 addr)
 155{
 156        struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 157        struct fib_result res;
 158        struct net_device *dev = NULL;
 159        struct fib_table *local_table;
 160
 161#ifdef CONFIG_IP_MULTIPLE_TABLES
 162        res.r = NULL;
 163#endif
 164
 165        local_table = fib_get_table(net, RT_TABLE_LOCAL);
 166        if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
 167                return NULL;
 168        if (res.type != RTN_LOCAL)
 169                goto out;
 170        dev = FIB_RES_DEV(res);
 171
 172        if (dev)
 173                dev_hold(dev);
 174out:
 175        fib_res_put(&res);
 176        return dev;
 177}
 178
 179/*
 180 * Find address type as if only "dev" was present in the system. If
 181 * on_dev is NULL then all interfaces are taken into consideration.
 182 */
 183static inline unsigned __inet_dev_addr_type(struct net *net,
 184                                            const struct net_device *dev,
 185                                            __be32 addr)
 186{
 187        struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 188        struct fib_result       res;
 189        unsigned ret = RTN_BROADCAST;
 190        struct fib_table *local_table;
 191
 192        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
 193                return RTN_BROADCAST;
 194        if (ipv4_is_multicast(addr))
 195                return RTN_MULTICAST;
 196
 197#ifdef CONFIG_IP_MULTIPLE_TABLES
 198        res.r = NULL;
 199#endif
 200
 201        local_table = fib_get_table(net, RT_TABLE_LOCAL);
 202        if (local_table) {
 203                ret = RTN_UNICAST;
 204                if (!local_table->tb_lookup(local_table, &fl, &res)) {
 205                        if (!dev || dev == res.fi->fib_dev)
 206                                ret = res.type;
 207                        fib_res_put(&res);
 208                }
 209        }
 210        return ret;
 211}
 212
 213unsigned int inet_addr_type(struct net *net, __be32 addr)
 214{
 215        return __inet_dev_addr_type(net, NULL, addr);
 216}
 217
 218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 219                                __be32 addr)
 220{
 221       return __inet_dev_addr_type(net, dev, addr);
 222}
 223
 224/* Given (packet source, input interface) and optional (dst, oif, tos):
 225   - (main) check, that source is valid i.e. not broadcast or our local
 226     address.
 227   - figure out what "logical" interface this packet arrived
 228     and calculate "specific destination" address.
 229   - check, that packet arrived from expected physical interface.
 230 */
 231
 232int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 233                        struct net_device *dev, __be32 *spec_dst, u32 *itag)
 234{
 235        struct in_device *in_dev;
 236        struct flowi fl = { .nl_u = { .ip4_u =
 237                                      { .daddr = src,
 238                                        .saddr = dst,
 239                                        .tos = tos } },
 240                            .iif = oif };
 241        struct fib_result res;
 242        int no_addr, rpf;
 243        int ret;
 244        struct net *net;
 245
 246        no_addr = rpf = 0;
 247        rcu_read_lock();
 248        in_dev = __in_dev_get_rcu(dev);
 249        if (in_dev) {
 250                no_addr = in_dev->ifa_list == NULL;
 251                rpf = IN_DEV_RPFILTER(in_dev);
 252        }
 253        rcu_read_unlock();
 254
 255        if (in_dev == NULL)
 256                goto e_inval;
 257
 258        net = dev_net(dev);
 259        if (fib_lookup(net, &fl, &res))
 260                goto last_resort;
 261        if (res.type != RTN_UNICAST)
 262                goto e_inval_res;
 263        *spec_dst = FIB_RES_PREFSRC(res);
 264        fib_combine_itag(itag, &res);
 265#ifdef CONFIG_IP_ROUTE_MULTIPATH
 266        if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
 267#else
 268        if (FIB_RES_DEV(res) == dev)
 269#endif
 270        {
 271                ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 272                fib_res_put(&res);
 273                return ret;
 274        }
 275        fib_res_put(&res);
 276        if (no_addr)
 277                goto last_resort;
 278        if (rpf)
 279                goto e_inval;
 280        fl.oif = dev->ifindex;
 281
 282        ret = 0;
 283        if (fib_lookup(net, &fl, &res) == 0) {
 284                if (res.type == RTN_UNICAST) {
 285                        *spec_dst = FIB_RES_PREFSRC(res);
 286                        ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 287                }
 288                fib_res_put(&res);
 289        }
 290        return ret;
 291
 292last_resort:
 293        if (rpf)
 294                goto e_inval;
 295        *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
 296        *itag = 0;
 297        return 0;
 298
 299e_inval_res:
 300        fib_res_put(&res);
 301e_inval:
 302        return -EINVAL;
 303}
 304
 305static inline __be32 sk_extract_addr(struct sockaddr *addr)
 306{
 307        return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 308}
 309
 310static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
 311{
 312        struct nlattr *nla;
 313
 314        nla = (struct nlattr *) ((char *) mx + len);
 315        nla->nla_type = type;
 316        nla->nla_len = nla_attr_size(4);
 317        *(u32 *) nla_data(nla) = value;
 318
 319        return len + nla_total_size(4);
 320}
 321
 322static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 323                                 struct fib_config *cfg)
 324{
 325        __be32 addr;
 326        int plen;
 327
 328        memset(cfg, 0, sizeof(*cfg));
 329        cfg->fc_nlinfo.nl_net = net;
 330
 331        if (rt->rt_dst.sa_family != AF_INET)
 332                return -EAFNOSUPPORT;
 333
 334        /*
 335         * Check mask for validity:
 336         * a) it must be contiguous.
 337         * b) destination must have all host bits clear.
 338         * c) if application forgot to set correct family (AF_INET),
 339         *    reject request unless it is absolutely clear i.e.
 340         *    both family and mask are zero.
 341         */
 342        plen = 32;
 343        addr = sk_extract_addr(&rt->rt_dst);
 344        if (!(rt->rt_flags & RTF_HOST)) {
 345                __be32 mask = sk_extract_addr(&rt->rt_genmask);
 346
 347                if (rt->rt_genmask.sa_family != AF_INET) {
 348                        if (mask || rt->rt_genmask.sa_family)
 349                                return -EAFNOSUPPORT;
 350                }
 351
 352                if (bad_mask(mask, addr))
 353                        return -EINVAL;
 354
 355                plen = inet_mask_len(mask);
 356        }
 357
 358        cfg->fc_dst_len = plen;
 359        cfg->fc_dst = addr;
 360
 361        if (cmd != SIOCDELRT) {
 362                cfg->fc_nlflags = NLM_F_CREATE;
 363                cfg->fc_protocol = RTPROT_BOOT;
 364        }
 365
 366        if (rt->rt_metric)
 367                cfg->fc_priority = rt->rt_metric - 1;
 368
 369        if (rt->rt_flags & RTF_REJECT) {
 370                cfg->fc_scope = RT_SCOPE_HOST;
 371                cfg->fc_type = RTN_UNREACHABLE;
 372                return 0;
 373        }
 374
 375        cfg->fc_scope = RT_SCOPE_NOWHERE;
 376        cfg->fc_type = RTN_UNICAST;
 377
 378        if (rt->rt_dev) {
 379                char *colon;
 380                struct net_device *dev;
 381                char devname[IFNAMSIZ];
 382
 383                if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
 384                        return -EFAULT;
 385
 386                devname[IFNAMSIZ-1] = 0;
 387                colon = strchr(devname, ':');
 388                if (colon)
 389                        *colon = 0;
 390                dev = __dev_get_by_name(net, devname);
 391                if (!dev)
 392                        return -ENODEV;
 393                cfg->fc_oif = dev->ifindex;
 394                if (colon) {
 395                        struct in_ifaddr *ifa;
 396                        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 397                        if (!in_dev)
 398                                return -ENODEV;
 399                        *colon = ':';
 400                        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
 401                                if (strcmp(ifa->ifa_label, devname) == 0)
 402                                        break;
 403                        if (ifa == NULL)
 404                                return -ENODEV;
 405                        cfg->fc_prefsrc = ifa->ifa_local;
 406                }
 407        }
 408
 409        addr = sk_extract_addr(&rt->rt_gateway);
 410        if (rt->rt_gateway.sa_family == AF_INET && addr) {
 411                cfg->fc_gw = addr;
 412                if (rt->rt_flags & RTF_GATEWAY &&
 413                    inet_addr_type(net, addr) == RTN_UNICAST)
 414                        cfg->fc_scope = RT_SCOPE_UNIVERSE;
 415        }
 416
 417        if (cmd == SIOCDELRT)
 418                return 0;
 419
 420        if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
 421                return -EINVAL;
 422
 423        if (cfg->fc_scope == RT_SCOPE_NOWHERE)
 424                cfg->fc_scope = RT_SCOPE_LINK;
 425
 426        if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
 427                struct nlattr *mx;
 428                int len = 0;
 429
 430                mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
 431                if (mx == NULL)
 432                        return -ENOMEM;
 433
 434                if (rt->rt_flags & RTF_MTU)
 435                        len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
 436
 437                if (rt->rt_flags & RTF_WINDOW)
 438                        len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
 439
 440                if (rt->rt_flags & RTF_IRTT)
 441                        len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
 442
 443                cfg->fc_mx = mx;
 444                cfg->fc_mx_len = len;
 445        }
 446
 447        return 0;
 448}
 449
 450/*
 451 *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
 452 */
 453
 454int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 455{
 456        struct fib_config cfg;
 457        struct rtentry rt;
 458        int err;
 459
 460        switch (cmd) {
 461        case SIOCADDRT:         /* Add a route */
 462        case SIOCDELRT:         /* Delete a route */
 463                if (!capable(CAP_NET_ADMIN))
 464                        return -EPERM;
 465
 466                if (copy_from_user(&rt, arg, sizeof(rt)))
 467                        return -EFAULT;
 468
 469                rtnl_lock();
 470                err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
 471                if (err == 0) {
 472                        struct fib_table *tb;
 473
 474                        if (cmd == SIOCDELRT) {
 475                                tb = fib_get_table(net, cfg.fc_table);
 476                                if (tb)
 477                                        err = tb->tb_delete(tb, &cfg);
 478                                else
 479                                        err = -ESRCH;
 480                        } else {
 481                                tb = fib_new_table(net, cfg.fc_table);
 482                                if (tb)
 483                                        err = tb->tb_insert(tb, &cfg);
 484                                else
 485                                        err = -ENOBUFS;
 486                        }
 487
 488                        /* allocated by rtentry_to_fib_config() */
 489                        kfree(cfg.fc_mx);
 490                }
 491                rtnl_unlock();
 492                return err;
 493        }
 494        return -EINVAL;
 495}
 496
 497const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
 498        [RTA_DST]               = { .type = NLA_U32 },
 499        [RTA_SRC]               = { .type = NLA_U32 },
 500        [RTA_IIF]               = { .type = NLA_U32 },
 501        [RTA_OIF]               = { .type = NLA_U32 },
 502        [RTA_GATEWAY]           = { .type = NLA_U32 },
 503        [RTA_PRIORITY]          = { .type = NLA_U32 },
 504        [RTA_PREFSRC]           = { .type = NLA_U32 },
 505        [RTA_METRICS]           = { .type = NLA_NESTED },
 506        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
 507        [RTA_FLOW]              = { .type = NLA_U32 },
 508};
 509
 510static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 511                            struct nlmsghdr *nlh, struct fib_config *cfg)
 512{
 513        struct nlattr *attr;
 514        int err, remaining;
 515        struct rtmsg *rtm;
 516
 517        err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
 518        if (err < 0)
 519                goto errout;
 520
 521        memset(cfg, 0, sizeof(*cfg));
 522
 523        rtm = nlmsg_data(nlh);
 524        cfg->fc_dst_len = rtm->rtm_dst_len;
 525        cfg->fc_tos = rtm->rtm_tos;
 526        cfg->fc_table = rtm->rtm_table;
 527        cfg->fc_protocol = rtm->rtm_protocol;
 528        cfg->fc_scope = rtm->rtm_scope;
 529        cfg->fc_type = rtm->rtm_type;
 530        cfg->fc_flags = rtm->rtm_flags;
 531        cfg->fc_nlflags = nlh->nlmsg_flags;
 532
 533        cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
 534        cfg->fc_nlinfo.nlh = nlh;
 535        cfg->fc_nlinfo.nl_net = net;
 536
 537        if (cfg->fc_type > RTN_MAX) {
 538                err = -EINVAL;
 539                goto errout;
 540        }
 541
 542        nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
 543                switch (nla_type(attr)) {
 544                case RTA_DST:
 545                        cfg->fc_dst = nla_get_be32(attr);
 546                        break;
 547                case RTA_OIF:
 548                        cfg->fc_oif = nla_get_u32(attr);
 549                        break;
 550                case RTA_GATEWAY:
 551                        cfg->fc_gw = nla_get_be32(attr);
 552                        break;
 553                case RTA_PRIORITY:
 554                        cfg->fc_priority = nla_get_u32(attr);
 555                        break;
 556                case RTA_PREFSRC:
 557                        cfg->fc_prefsrc = nla_get_be32(attr);
 558                        break;
 559                case RTA_METRICS:
 560                        cfg->fc_mx = nla_data(attr);
 561                        cfg->fc_mx_len = nla_len(attr);
 562                        break;
 563                case RTA_MULTIPATH:
 564                        cfg->fc_mp = nla_data(attr);
 565                        cfg->fc_mp_len = nla_len(attr);
 566                        break;
 567                case RTA_FLOW:
 568                        cfg->fc_flow = nla_get_u32(attr);
 569                        break;
 570                case RTA_TABLE:
 571                        cfg->fc_table = nla_get_u32(attr);
 572                        break;
 573                }
 574        }
 575
 576        return 0;
 577errout:
 578        return err;
 579}
 580
 581static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 582{
 583        struct net *net = sock_net(skb->sk);
 584        struct fib_config cfg;
 585        struct fib_table *tb;
 586        int err;
 587
 588        err = rtm_to_fib_config(net, skb, nlh, &cfg);
 589        if (err < 0)
 590                goto errout;
 591
 592        tb = fib_get_table(net, cfg.fc_table);
 593        if (tb == NULL) {
 594                err = -ESRCH;
 595                goto errout;
 596        }
 597
 598        err = tb->tb_delete(tb, &cfg);
 599errout:
 600        return err;
 601}
 602
 603static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 604{
 605        struct net *net = sock_net(skb->sk);
 606        struct fib_config cfg;
 607        struct fib_table *tb;
 608        int err;
 609
 610        err = rtm_to_fib_config(net, skb, nlh, &cfg);
 611        if (err < 0)
 612                goto errout;
 613
 614        tb = fib_new_table(net, cfg.fc_table);
 615        if (tb == NULL) {
 616                err = -ENOBUFS;
 617                goto errout;
 618        }
 619
 620        err = tb->tb_insert(tb, &cfg);
 621errout:
 622        return err;
 623}
 624
 625static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 626{
 627        struct net *net = sock_net(skb->sk);
 628        unsigned int h, s_h;
 629        unsigned int e = 0, s_e;
 630        struct fib_table *tb;
 631        struct hlist_node *node;
 632        struct hlist_head *head;
 633        int dumped = 0;
 634
 635        if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 636            ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 637                return ip_rt_dump(skb, cb);
 638
 639        s_h = cb->args[0];
 640        s_e = cb->args[1];
 641
 642        for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
 643                e = 0;
 644                head = &net->ipv4.fib_table_hash[h];
 645                hlist_for_each_entry(tb, node, head, tb_hlist) {
 646                        if (e < s_e)
 647                                goto next;
 648                        if (dumped)
 649                                memset(&cb->args[2], 0, sizeof(cb->args) -
 650                                                 2 * sizeof(cb->args[0]));
 651                        if (tb->tb_dump(tb, skb, cb) < 0)
 652                                goto out;
 653                        dumped = 1;
 654next:
 655                        e++;
 656                }
 657        }
 658out:
 659        cb->args[1] = e;
 660        cb->args[0] = h;
 661
 662        return skb->len;
 663}
 664
 665/* Prepare and feed intra-kernel routing request.
 666   Really, it should be netlink message, but :-( netlink
 667   can be not configured, so that we feed it directly
 668   to fib engine. It is legal, because all events occur
 669   only when netlink is already locked.
 670 */
 671
 672static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
 673{
 674        struct net *net = dev_net(ifa->ifa_dev->dev);
 675        struct fib_table *tb;
 676        struct fib_config cfg = {
 677                .fc_protocol = RTPROT_KERNEL,
 678                .fc_type = type,
 679                .fc_dst = dst,
 680                .fc_dst_len = dst_len,
 681                .fc_prefsrc = ifa->ifa_local,
 682                .fc_oif = ifa->ifa_dev->dev->ifindex,
 683                .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
 684                .fc_nlinfo = {
 685                        .nl_net = net,
 686                },
 687        };
 688
 689        if (type == RTN_UNICAST)
 690                tb = fib_new_table(net, RT_TABLE_MAIN);
 691        else
 692                tb = fib_new_table(net, RT_TABLE_LOCAL);
 693
 694        if (tb == NULL)
 695                return;
 696
 697        cfg.fc_table = tb->tb_id;
 698
 699        if (type != RTN_LOCAL)
 700                cfg.fc_scope = RT_SCOPE_LINK;
 701        else
 702                cfg.fc_scope = RT_SCOPE_HOST;
 703
 704        if (cmd == RTM_NEWROUTE)
 705                tb->tb_insert(tb, &cfg);
 706        else
 707                tb->tb_delete(tb, &cfg);
 708}
 709
 710void fib_add_ifaddr(struct in_ifaddr *ifa)
 711{
 712        struct in_device *in_dev = ifa->ifa_dev;
 713        struct net_device *dev = in_dev->dev;
 714        struct in_ifaddr *prim = ifa;
 715        __be32 mask = ifa->ifa_mask;
 716        __be32 addr = ifa->ifa_local;
 717        __be32 prefix = ifa->ifa_address&mask;
 718
 719        if (ifa->ifa_flags&IFA_F_SECONDARY) {
 720                prim = inet_ifa_byprefix(in_dev, prefix, mask);
 721                if (prim == NULL) {
 722                        printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
 723                        return;
 724                }
 725        }
 726
 727        fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
 728
 729        if (!(dev->flags&IFF_UP))
 730                return;
 731
 732        /* Add broadcast address, if it is explicitly assigned. */
 733        if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
 734                fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 735
 736        if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
 737            (prefix != addr || ifa->ifa_prefixlen < 32)) {
 738                fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
 739                          RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
 740
 741                /* Add network specific broadcasts, when it takes a sense */
 742                if (ifa->ifa_prefixlen < 31) {
 743                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
 744                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
 745                }
 746        }
 747}
 748
 749static void fib_del_ifaddr(struct in_ifaddr *ifa)
 750{
 751        struct in_device *in_dev = ifa->ifa_dev;
 752        struct net_device *dev = in_dev->dev;
 753        struct in_ifaddr *ifa1;
 754        struct in_ifaddr *prim = ifa;
 755        __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
 756        __be32 any = ifa->ifa_address&ifa->ifa_mask;
 757#define LOCAL_OK        1
 758#define BRD_OK          2
 759#define BRD0_OK         4
 760#define BRD1_OK         8
 761        unsigned ok = 0;
 762
 763        if (!(ifa->ifa_flags&IFA_F_SECONDARY))
 764                fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
 765                          RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
 766        else {
 767                prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 768                if (prim == NULL) {
 769                        printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
 770                        return;
 771                }
 772        }
 773
 774        /* Deletion is more complicated than add.
 775           We should take care of not to delete too much :-)
 776
 777           Scan address list to be sure that addresses are really gone.
 778         */
 779
 780        for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
 781                if (ifa->ifa_local == ifa1->ifa_local)
 782                        ok |= LOCAL_OK;
 783                if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
 784                        ok |= BRD_OK;
 785                if (brd == ifa1->ifa_broadcast)
 786                        ok |= BRD1_OK;
 787                if (any == ifa1->ifa_broadcast)
 788                        ok |= BRD0_OK;
 789        }
 790
 791        if (!(ok&BRD_OK))
 792                fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 793        if (!(ok&BRD1_OK))
 794                fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
 795        if (!(ok&BRD0_OK))
 796                fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
 797        if (!(ok&LOCAL_OK)) {
 798                fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 799
 800                /* Check, that this local address finally disappeared. */
 801                if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
 802                        /* And the last, but not the least thing.
 803                           We must flush stray FIB entries.
 804
 805                           First of all, we scan fib_info list searching
 806                           for stray nexthop entries, then ignite fib_flush.
 807                        */
 808                        if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
 809                                fib_flush(dev_net(dev));
 810                }
 811        }
 812#undef LOCAL_OK
 813#undef BRD_OK
 814#undef BRD0_OK
 815#undef BRD1_OK
 816}
 817
 818static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
 819{
 820
 821        struct fib_result       res;
 822        struct flowi            fl = { .mark = frn->fl_mark,
 823                                       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
 824                                                            .tos = frn->fl_tos,
 825                                                            .scope = frn->fl_scope } } };
 826
 827#ifdef CONFIG_IP_MULTIPLE_TABLES
 828        res.r = NULL;
 829#endif
 830
 831        frn->err = -ENOENT;
 832        if (tb) {
 833                local_bh_disable();
 834
 835                frn->tb_id = tb->tb_id;
 836                frn->err = tb->tb_lookup(tb, &fl, &res);
 837
 838                if (!frn->err) {
 839                        frn->prefixlen = res.prefixlen;
 840                        frn->nh_sel = res.nh_sel;
 841                        frn->type = res.type;
 842                        frn->scope = res.scope;
 843                        fib_res_put(&res);
 844                }
 845                local_bh_enable();
 846        }
 847}
 848
 849static void nl_fib_input(struct sk_buff *skb)
 850{
 851        struct net *net;
 852        struct fib_result_nl *frn;
 853        struct nlmsghdr *nlh;
 854        struct fib_table *tb;
 855        u32 pid;
 856
 857        net = sock_net(skb->sk);
 858        nlh = nlmsg_hdr(skb);
 859        if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
 860            nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
 861                return;
 862
 863        skb = skb_clone(skb, GFP_KERNEL);
 864        if (skb == NULL)
 865                return;
 866        nlh = nlmsg_hdr(skb);
 867
 868        frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
 869        tb = fib_get_table(net, frn->tb_id_in);
 870
 871        nl_fib_lookup(frn, tb);
 872
 873        pid = NETLINK_CB(skb).pid;       /* pid of sending process */
 874        NETLINK_CB(skb).pid = 0;         /* from kernel */
 875        NETLINK_CB(skb).dst_group = 0;  /* unicast */
 876        netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
 877}
 878
 879static int nl_fib_lookup_init(struct net *net)
 880{
 881        struct sock *sk;
 882        sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
 883                                   nl_fib_input, NULL, THIS_MODULE);
 884        if (sk == NULL)
 885                return -EAFNOSUPPORT;
 886        net->ipv4.fibnl = sk;
 887        return 0;
 888}
 889
 890static void nl_fib_lookup_exit(struct net *net)
 891{
 892        netlink_kernel_release(net->ipv4.fibnl);
 893        net->ipv4.fibnl = NULL;
 894}
 895
 896static void fib_disable_ip(struct net_device *dev, int force)
 897{
 898        if (fib_sync_down_dev(dev, force))
 899                fib_flush(dev_net(dev));
 900        rt_cache_flush(dev_net(dev), 0);
 901        arp_ifdown(dev);
 902}
 903
 904static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
 905{
 906        struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
 907        struct net_device *dev = ifa->ifa_dev->dev;
 908
 909        switch (event) {
 910        case NETDEV_UP:
 911                fib_add_ifaddr(ifa);
 912#ifdef CONFIG_IP_ROUTE_MULTIPATH
 913                fib_sync_up(dev);
 914#endif
 915                rt_cache_flush(dev_net(dev), -1);
 916                break;
 917        case NETDEV_DOWN:
 918                fib_del_ifaddr(ifa);
 919                if (ifa->ifa_dev->ifa_list == NULL) {
 920                        /* Last address was deleted from this interface.
 921                           Disable IP.
 922                         */
 923                        fib_disable_ip(dev, 1);
 924                } else {
 925                        rt_cache_flush(dev_net(dev), -1);
 926                }
 927                break;
 928        }
 929        return NOTIFY_DONE;
 930}
 931
 932static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 933{
 934        struct net_device *dev = ptr;
 935        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 936
 937        if (event == NETDEV_UNREGISTER) {
 938                fib_disable_ip(dev, 2);
 939                return NOTIFY_DONE;
 940        }
 941
 942        if (!in_dev)
 943                return NOTIFY_DONE;
 944
 945        switch (event) {
 946        case NETDEV_UP:
 947                for_ifa(in_dev) {
 948                        fib_add_ifaddr(ifa);
 949                } endfor_ifa(in_dev);
 950#ifdef CONFIG_IP_ROUTE_MULTIPATH
 951                fib_sync_up(dev);
 952#endif
 953                rt_cache_flush(dev_net(dev), -1);
 954                break;
 955        case NETDEV_DOWN:
 956                fib_disable_ip(dev, 0);
 957                break;
 958        case NETDEV_CHANGEMTU:
 959        case NETDEV_CHANGE:
 960                rt_cache_flush(dev_net(dev), 0);
 961                break;
 962        }
 963        return NOTIFY_DONE;
 964}
 965
 966static struct notifier_block fib_inetaddr_notifier = {
 967        .notifier_call =fib_inetaddr_event,
 968};
 969
 970static struct notifier_block fib_netdev_notifier = {
 971        .notifier_call =fib_netdev_event,
 972};
 973
 974static int __net_init ip_fib_net_init(struct net *net)
 975{
 976        int err;
 977        unsigned int i;
 978
 979        net->ipv4.fib_table_hash = kzalloc(
 980                        sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
 981        if (net->ipv4.fib_table_hash == NULL)
 982                return -ENOMEM;
 983
 984        for (i = 0; i < FIB_TABLE_HASHSZ; i++)
 985                INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
 986
 987        err = fib4_rules_init(net);
 988        if (err < 0)
 989                goto fail;
 990        return 0;
 991
 992fail:
 993        kfree(net->ipv4.fib_table_hash);
 994        return err;
 995}
 996
 997static void __net_exit ip_fib_net_exit(struct net *net)
 998{
 999        unsigned int i;
1000
1001#ifdef CONFIG_IP_MULTIPLE_TABLES
1002        fib4_rules_exit(net);
1003#endif
1004
1005        for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1006                struct fib_table *tb;
1007                struct hlist_head *head;
1008                struct hlist_node *node, *tmp;
1009
1010                head = &net->ipv4.fib_table_hash[i];
1011                hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1012                        hlist_del(node);
1013                        tb->tb_flush(tb);
1014                        kfree(tb);
1015                }
1016        }
1017        kfree(net->ipv4.fib_table_hash);
1018}
1019
1020static int __net_init fib_net_init(struct net *net)
1021{
1022        int error;
1023
1024        error = ip_fib_net_init(net);
1025        if (error < 0)
1026                goto out;
1027        error = nl_fib_lookup_init(net);
1028        if (error < 0)
1029                goto out_nlfl;
1030        error = fib_proc_init(net);
1031        if (error < 0)
1032                goto out_proc;
1033out:
1034        return error;
1035
1036out_proc:
1037        nl_fib_lookup_exit(net);
1038out_nlfl:
1039        ip_fib_net_exit(net);
1040        goto out;
1041}
1042
1043static void __net_exit fib_net_exit(struct net *net)
1044{
1045        fib_proc_exit(net);
1046        nl_fib_lookup_exit(net);
1047        ip_fib_net_exit(net);
1048}
1049
1050static struct pernet_operations fib_net_ops = {
1051        .init = fib_net_init,
1052        .exit = fib_net_exit,
1053};
1054
1055void __init ip_fib_init(void)
1056{
1057        rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1058        rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1059        rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1060
1061        register_pernet_subsys(&fib_net_ops);
1062        register_netdevice_notifier(&fib_netdev_notifier);
1063        register_inetaddr_notifier(&fib_inetaddr_notifier);
1064
1065        fib_hash_init();
1066}
1067
1068EXPORT_SYMBOL(inet_addr_type);
1069EXPORT_SYMBOL(inet_dev_addr_type);
1070EXPORT_SYMBOL(ip_dev_find);
1071
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.