linux-bk/net/ipv4/fib_frontend.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 Forwarding Information Base: FIB frontend.
   7 *
   8 * Version:     $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
   9 *
  10 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 */
  17
  18#include <linux/config.h>
  19#include <linux/module.h>
  20#include <asm/uaccess.h>
  21#include <asm/system.h>
  22#include <asm/bitops.h>
  23#include <linux/types.h>
  24#include <linux/kernel.h>
  25#include <linux/sched.h>
  26#include <linux/mm.h>
  27#include <linux/string.h>
  28#include <linux/socket.h>
  29#include <linux/sockios.h>
  30#include <linux/errno.h>
  31#include <linux/in.h>
  32#include <linux/inet.h>
  33#include <linux/netdevice.h>
  34#include <linux/if_arp.h>
  35#include <linux/skbuff.h>
  36#include <linux/netlink.h>
  37#include <linux/init.h>
  38
  39#include <net/ip.h>
  40#include <net/protocol.h>
  41#include <net/route.h>
  42#include <net/tcp.h>
  43#include <net/sock.h>
  44#include <net/icmp.h>
  45#include <net/arp.h>
  46#include <net/ip_fib.h>
  47
  48#define FFprint(a...) printk(KERN_DEBUG a)
  49
  50#ifndef CONFIG_IP_MULTIPLE_TABLES
  51
  52#define RT_TABLE_MIN RT_TABLE_MAIN
  53
  54struct fib_table *ip_fib_local_table;
  55struct fib_table *ip_fib_main_table;
  56
  57#else
  58
  59#define RT_TABLE_MIN 1
  60
  61struct fib_table *fib_tables[RT_TABLE_MAX+1];
  62
  63struct fib_table *__fib_new_table(int id)
  64{
  65        struct fib_table *tb;
  66
  67        tb = fib_hash_init(id);
  68        if (!tb)
  69                return NULL;
  70        fib_tables[id] = tb;
  71        return tb;
  72}
  73
  74
  75#endif /* CONFIG_IP_MULTIPLE_TABLES */
  76
  77
  78void fib_flush(void)
  79{
  80        int flushed = 0;
  81#ifdef CONFIG_IP_MULTIPLE_TABLES
  82        struct fib_table *tb;
  83        int id;
  84
  85        for (id = RT_TABLE_MAX; id>0; id--) {
  86                if ((tb = fib_get_table(id))==NULL)
  87                        continue;
  88                flushed += tb->tb_flush(tb);
  89        }
  90#else /* CONFIG_IP_MULTIPLE_TABLES */
  91        flushed += ip_fib_main_table->tb_flush(ip_fib_main_table);
  92        flushed += ip_fib_local_table->tb_flush(ip_fib_local_table);
  93#endif /* CONFIG_IP_MULTIPLE_TABLES */
  94
  95        if (flushed)
  96                rt_cache_flush(-1);
  97}
  98
  99/*
 100 *      Find the first device with a given source address.
 101 */
 102
 103struct net_device * ip_dev_find(u32 addr)
 104{
 105        struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 106        struct fib_result res;
 107        struct net_device *dev = NULL;
 108
 109#ifdef CONFIG_IP_MULTIPLE_TABLES
 110        res.r = NULL;
 111#endif
 112
 113        if (!ip_fib_local_table ||
 114            ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
 115                return NULL;
 116        if (res.type != RTN_LOCAL)
 117                goto out;
 118        dev = FIB_RES_DEV(res);
 119
 120        if (dev)
 121                dev_hold(dev);
 122out:
 123        fib_res_put(&res);
 124        return dev;
 125}
 126
 127unsigned inet_addr_type(u32 addr)
 128{
 129        struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 130        struct fib_result       res;
 131        unsigned ret = RTN_BROADCAST;
 132
 133        if (ZERONET(addr) || BADCLASS(addr))
 134                return RTN_BROADCAST;
 135        if (MULTICAST(addr))
 136                return RTN_MULTICAST;
 137
 138#ifdef CONFIG_IP_MULTIPLE_TABLES
 139        res.r = NULL;
 140#endif
 141        
 142        if (ip_fib_local_table) {
 143                ret = RTN_UNICAST;
 144                if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
 145                                                   &fl, &res)) {
 146                        ret = res.type;
 147                        fib_res_put(&res);
 148                }
 149        }
 150        return ret;
 151}
 152
 153/* Given (packet source, input interface) and optional (dst, oif, tos):
 154   - (main) check, that source is valid i.e. not broadcast or our local
 155     address.
 156   - figure out what "logical" interface this packet arrived
 157     and calculate "specific destination" address.
 158   - check, that packet arrived from expected physical interface.
 159 */
 160
 161int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
 162                        struct net_device *dev, u32 *spec_dst, u32 *itag)
 163{
 164        struct in_device *in_dev;
 165        struct flowi fl = { .nl_u = { .ip4_u =
 166                                      { .daddr = src,
 167                                        .saddr = dst,
 168                                        .tos = tos } },
 169                            .iif = oif };
 170        struct fib_result res;
 171        int no_addr, rpf;
 172        int ret;
 173
 174        no_addr = rpf = 0;
 175        read_lock(&inetdev_lock);
 176        in_dev = __in_dev_get(dev);
 177        if (in_dev) {
 178                no_addr = in_dev->ifa_list == NULL;
 179                rpf = IN_DEV_RPFILTER(in_dev);
 180        }
 181        read_unlock(&inetdev_lock);
 182
 183        if (in_dev == NULL)
 184                goto e_inval;
 185
 186        if (fib_lookup(&fl, &res))
 187                goto last_resort;
 188        if (res.type != RTN_UNICAST)
 189                goto e_inval_res;
 190        *spec_dst = FIB_RES_PREFSRC(res);
 191        fib_combine_itag(itag, &res);
 192#ifdef CONFIG_IP_ROUTE_MULTIPATH
 193        if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
 194#else
 195        if (FIB_RES_DEV(res) == dev)
 196#endif
 197        {
 198                ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 199                fib_res_put(&res);
 200                return ret;
 201        }
 202        fib_res_put(&res);
 203        if (no_addr)
 204                goto last_resort;
 205        if (rpf)
 206                goto e_inval;
 207        fl.oif = dev->ifindex;
 208
 209        ret = 0;
 210        if (fib_lookup(&fl, &res) == 0) {
 211                if (res.type == RTN_UNICAST) {
 212                        *spec_dst = FIB_RES_PREFSRC(res);
 213                        ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 214                }
 215                fib_res_put(&res);
 216        }
 217        return ret;
 218
 219last_resort:
 220        if (rpf)
 221                goto e_inval;
 222        *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
 223        *itag = 0;
 224        return 0;
 225
 226e_inval_res:
 227        fib_res_put(&res);
 228e_inval:
 229        return -EINVAL;
 230}
 231
 232#ifndef CONFIG_IP_NOSIOCRT
 233
 234/*
 235 *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
 236 */
 237 
 238int ip_rt_ioctl(unsigned int cmd, void *arg)
 239{
 240        int err;
 241        struct kern_rta rta;
 242        struct rtentry  r;
 243        struct {
 244                struct nlmsghdr nlh;
 245                struct rtmsg    rtm;
 246        } req;
 247
 248        switch (cmd) {
 249        case SIOCADDRT:         /* Add a route */
 250        case SIOCDELRT:         /* Delete a route */
 251                if (!capable(CAP_NET_ADMIN))
 252                        return -EPERM;
 253                if (copy_from_user(&r, arg, sizeof(struct rtentry)))
 254                        return -EFAULT;
 255                rtnl_lock();
 256                err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
 257                if (err == 0) {
 258                        if (cmd == SIOCDELRT) {
 259                                struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
 260                                err = -ESRCH;
 261                                if (tb)
 262                                        err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
 263                        } else {
 264                                struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
 265                                err = -ENOBUFS;
 266                                if (tb)
 267                                        err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
 268                        }
 269                        if (rta.rta_mx)
 270                                kfree(rta.rta_mx);
 271                }
 272                rtnl_unlock();
 273                return err;
 274        }
 275        return -EINVAL;
 276}
 277
 278#else
 279
 280int ip_rt_ioctl(unsigned int cmd, void *arg)
 281{
 282        return -EINVAL;
 283}
 284
 285#endif
 286
 287static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
 288{
 289        int i;
 290
 291        for (i=1; i<=RTA_MAX; i++) {
 292                struct rtattr *attr = rta[i-1];
 293                if (attr) {
 294                        if (RTA_PAYLOAD(attr) < 4)
 295                                return -EINVAL;
 296                        if (i != RTA_MULTIPATH && i != RTA_METRICS)
 297                                rta[i-1] = (struct rtattr*)RTA_DATA(attr);
 298                }
 299        }
 300        return 0;
 301}
 302
 303int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 304{
 305        struct fib_table * tb;
 306        struct rtattr **rta = arg;
 307        struct rtmsg *r = NLMSG_DATA(nlh);
 308
 309        if (inet_check_attr(r, rta))
 310                return -EINVAL;
 311
 312        tb = fib_get_table(r->rtm_table);
 313        if (tb)
 314                return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
 315        return -ESRCH;
 316}
 317
 318int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 319{
 320        struct fib_table * tb;
 321        struct rtattr **rta = arg;
 322        struct rtmsg *r = NLMSG_DATA(nlh);
 323
 324        if (inet_check_attr(r, rta))
 325                return -EINVAL;
 326
 327        tb = fib_new_table(r->rtm_table);
 328        if (tb)
 329                return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
 330        return -ENOBUFS;
 331}
 332
 333int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 334{
 335        int t;
 336        int s_t;
 337        struct fib_table *tb;
 338
 339        if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
 340            ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
 341                return ip_rt_dump(skb, cb);
 342
 343        s_t = cb->args[0];
 344        if (s_t == 0)
 345                s_t = cb->args[0] = RT_TABLE_MIN;
 346
 347        for (t=s_t; t<=RT_TABLE_MAX; t++) {
 348                if (t < s_t) continue;
 349                if (t > s_t)
 350                        memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
 351                if ((tb = fib_get_table(t))==NULL)
 352                        continue;
 353                if (tb->tb_dump(tb, skb, cb) < 0) 
 354                        break;
 355        }
 356
 357        cb->args[0] = t;
 358
 359        return skb->len;
 360}
 361
 362/* Prepare and feed intra-kernel routing request.
 363   Really, it should be netlink message, but :-( netlink
 364   can be not configured, so that we feed it directly
 365   to fib engine. It is legal, because all events occur
 366   only when netlink is already locked.
 367 */
 368
 369static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
 370{
 371        struct fib_table * tb;
 372        struct {
 373                struct nlmsghdr nlh;
 374                struct rtmsg    rtm;
 375        } req;
 376        struct kern_rta rta;
 377
 378        memset(&req.rtm, 0, sizeof(req.rtm));
 379        memset(&rta, 0, sizeof(rta));
 380
 381        if (type == RTN_UNICAST)
 382                tb = fib_new_table(RT_TABLE_MAIN);
 383        else
 384                tb = fib_new_table(RT_TABLE_LOCAL);
 385
 386        if (tb == NULL)
 387                return;
 388
 389        req.nlh.nlmsg_len = sizeof(req);
 390        req.nlh.nlmsg_type = cmd;
 391        req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
 392        req.nlh.nlmsg_pid = 0;
 393        req.nlh.nlmsg_seq = 0;
 394
 395        req.rtm.rtm_dst_len = dst_len;
 396        req.rtm.rtm_table = tb->tb_id;
 397        req.rtm.rtm_protocol = RTPROT_KERNEL;
 398        req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
 399        req.rtm.rtm_type = type;
 400
 401        rta.rta_dst = &dst;
 402        rta.rta_prefsrc = &ifa->ifa_local;
 403        rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
 404
 405        if (cmd == RTM_NEWROUTE)
 406                tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
 407        else
 408                tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
 409}
 410
 411static void fib_add_ifaddr(struct in_ifaddr *ifa)
 412{
 413        struct in_device *in_dev = ifa->ifa_dev;
 414        struct net_device *dev = in_dev->dev;
 415        struct in_ifaddr *prim = ifa;
 416        u32 mask = ifa->ifa_mask;
 417        u32 addr = ifa->ifa_local;
 418        u32 prefix = ifa->ifa_address&mask;
 419
 420        if (ifa->ifa_flags&IFA_F_SECONDARY) {
 421                prim = inet_ifa_byprefix(in_dev, prefix, mask);
 422                if (prim == NULL) {
 423                        printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
 424                        return;
 425                }
 426        }
 427
 428        fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
 429
 430        if (!(dev->flags&IFF_UP))
 431                return;
 432
 433        /* Add broadcast address, if it is explicitly assigned. */
 434        if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
 435                fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 436
 437        if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
 438            (prefix != addr || ifa->ifa_prefixlen < 32)) {
 439                fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
 440                          RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
 441
 442                /* Add network specific broadcasts, when it takes a sense */
 443                if (ifa->ifa_prefixlen < 31) {
 444                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
 445                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
 446                }
 447        }
 448}
 449
 450static void fib_del_ifaddr(struct in_ifaddr *ifa)
 451{
 452        struct in_device *in_dev = ifa->ifa_dev;
 453        struct net_device *dev = in_dev->dev;
 454        struct in_ifaddr *ifa1;
 455        struct in_ifaddr *prim = ifa;
 456        u32 brd = ifa->ifa_address|~ifa->ifa_mask;
 457        u32 any = ifa->ifa_address&ifa->ifa_mask;
 458#define LOCAL_OK        1
 459#define BRD_OK          2
 460#define BRD0_OK         4
 461#define BRD1_OK         8
 462        unsigned ok = 0;
 463
 464        if (!(ifa->ifa_flags&IFA_F_SECONDARY))
 465                fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
 466                          RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
 467        else {
 468                prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 469                if (prim == NULL) {
 470                        printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
 471                        return;
 472                }
 473        }
 474
 475        /* Deletion is more complicated than add.
 476           We should take care of not to delete too much :-)
 477
 478           Scan address list to be sure that addresses are really gone.
 479         */
 480
 481        for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
 482                if (ifa->ifa_local == ifa1->ifa_local)
 483                        ok |= LOCAL_OK;
 484                if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
 485                        ok |= BRD_OK;
 486                if (brd == ifa1->ifa_broadcast)
 487                        ok |= BRD1_OK;
 488                if (any == ifa1->ifa_broadcast)
 489                        ok |= BRD0_OK;
 490        }
 491
 492        if (!(ok&BRD_OK))
 493                fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 494        if (!(ok&BRD1_OK))
 495                fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
 496        if (!(ok&BRD0_OK))
 497                fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
 498        if (!(ok&LOCAL_OK)) {
 499                fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 500
 501                /* Check, that this local address finally disappeared. */
 502                if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
 503                        /* And the last, but not the least thing.
 504                           We must flush stray FIB entries.
 505
 506                           First of all, we scan fib_info list searching
 507                           for stray nexthop entries, then ignite fib_flush.
 508                        */
 509                        if (fib_sync_down(ifa->ifa_local, NULL, 0))
 510                                fib_flush();
 511                }
 512        }
 513#undef LOCAL_OK
 514#undef BRD_OK
 515#undef BRD0_OK
 516#undef BRD1_OK
 517}
 518
 519static void fib_disable_ip(struct net_device *dev, int force)
 520{
 521        if (fib_sync_down(0, dev, force))
 522                fib_flush();
 523        rt_cache_flush(0);
 524        arp_ifdown(dev);
 525}
 526
 527static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
 528{
 529        struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
 530
 531        switch (event) {
 532        case NETDEV_UP:
 533                fib_add_ifaddr(ifa);
 534#ifdef CONFIG_IP_ROUTE_MULTIPATH
 535                fib_sync_up(ifa->ifa_dev->dev);
 536#endif
 537                rt_cache_flush(-1);
 538                break;
 539        case NETDEV_DOWN:
 540                fib_del_ifaddr(ifa);
 541                if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
 542                        /* Last address was deleted from this interface.
 543                           Disable IP.
 544                         */
 545                        fib_disable_ip(ifa->ifa_dev->dev, 1);
 546                } else {
 547                        rt_cache_flush(-1);
 548                }
 549                break;
 550        }
 551        return NOTIFY_DONE;
 552}
 553
 554static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 555{
 556        struct net_device *dev = ptr;
 557        struct in_device *in_dev = __in_dev_get(dev);
 558
 559        if (event == NETDEV_UNREGISTER) {
 560                fib_disable_ip(dev, 2);
 561                return NOTIFY_DONE;
 562        }
 563
 564        if (!in_dev)
 565                return NOTIFY_DONE;
 566
 567        switch (event) {
 568        case NETDEV_UP:
 569                for_ifa(in_dev) {
 570                        fib_add_ifaddr(ifa);
 571                } endfor_ifa(in_dev);
 572#ifdef CONFIG_IP_ROUTE_MULTIPATH
 573                fib_sync_up(dev);
 574#endif
 575                rt_cache_flush(-1);
 576                break;
 577        case NETDEV_DOWN:
 578                fib_disable_ip(dev, 0);
 579                break;
 580        case NETDEV_CHANGEMTU:
 581        case NETDEV_CHANGE:
 582                rt_cache_flush(0);
 583                break;
 584        }
 585        return NOTIFY_DONE;
 586}
 587
 588struct notifier_block fib_inetaddr_notifier = {
 589        .notifier_call =fib_inetaddr_event,
 590};
 591
 592struct notifier_block fib_netdev_notifier = {
 593        .notifier_call =fib_netdev_event,
 594};
 595
 596void __init ip_fib_init(void)
 597{
 598#ifndef CONFIG_IP_MULTIPLE_TABLES
 599        ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
 600        ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
 601#else
 602        fib_rules_init();
 603#endif
 604
 605        register_netdevice_notifier(&fib_netdev_notifier);
 606        register_inetaddr_notifier(&fib_inetaddr_notifier);
 607}
 608
 609EXPORT_SYMBOL(inet_addr_type);
 610EXPORT_SYMBOL(ip_dev_find);
 611EXPORT_SYMBOL(ip_rt_ioctl);
 612
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.