linux-bk/net/ipv4/fib_hash.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 FIB: lookup engine and maintenance routines.
   7 *
   8 * Version:     $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
   9 *
  10 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 */
  17
  18#include <linux/config.h>
  19#include <asm/uaccess.h>
  20#include <asm/system.h>
  21#include <asm/bitops.h>
  22#include <linux/types.h>
  23#include <linux/kernel.h>
  24#include <linux/sched.h>
  25#include <linux/mm.h>
  26#include <linux/string.h>
  27#include <linux/socket.h>
  28#include <linux/sockios.h>
  29#include <linux/errno.h>
  30#include <linux/in.h>
  31#include <linux/inet.h>
  32#include <linux/netdevice.h>
  33#include <linux/if_arp.h>
  34#include <linux/proc_fs.h>
  35#include <linux/skbuff.h>
  36#include <linux/netlink.h>
  37#include <linux/init.h>
  38
  39#include <net/ip.h>
  40#include <net/protocol.h>
  41#include <net/route.h>
  42#include <net/tcp.h>
  43#include <net/sock.h>
  44#include <net/ip_fib.h>
  45
  46#define FTprint(a...)
  47/*
  48   printk(KERN_DEBUG a)
  49 */
  50
  51static kmem_cache_t * fn_hash_kmem;
  52
  53/*
  54   These bizarre types are just to force strict type checking.
  55   When I reversed order of bytes and changed to natural mask lengths,
  56   I forgot to make fixes in several places. Now I am lazy to return
  57   it back.
  58 */
  59
  60typedef struct {
  61        u32     datum;
  62} fn_key_t;
  63
  64typedef struct {
  65        u32     datum;
  66} fn_hash_idx_t;
  67
  68struct fib_node
  69{
  70        struct fib_node         *fn_next;
  71        struct fib_info         *fn_info;
  72#define FIB_INFO(f)     ((f)->fn_info)
  73        fn_key_t                fn_key;
  74        u8                      fn_tos;
  75        u8                      fn_type;
  76        u8                      fn_scope;
  77        u8                      fn_state;
  78};
  79
  80#define FN_S_ZOMBIE     1
  81#define FN_S_ACCESSED   2
  82
  83static int fib_hash_zombies;
  84
  85struct fn_zone
  86{
  87        struct fn_zone  *fz_next;       /* Next not empty zone  */
  88        struct fib_node **fz_hash;      /* Hash table pointer   */
  89        int             fz_nent;        /* Number of entries    */
  90
  91        int             fz_divisor;     /* Hash divisor         */
  92        u32             fz_hashmask;    /* (fz_divisor - 1)     */
  93#define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
  94
  95        int             fz_order;       /* Zone order           */
  96        u32             fz_mask;
  97#define FZ_MASK(fz)     ((fz)->fz_mask)
  98};
  99
 100/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
 101   can be cheaper than memory lookup, so that FZ_* macros are used.
 102 */
 103
 104struct fn_hash
 105{
 106        struct fn_zone  *fn_zones[33];
 107        struct fn_zone  *fn_zone_list;
 108};
 109
 110static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
 111{
 112        u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
 113        h ^= (h>>20);
 114        h ^= (h>>10);
 115        h ^= (h>>5);
 116        h &= FZ_HASHMASK(fz);
 117        return *(fn_hash_idx_t*)&h;
 118}
 119
 120#define fz_key_0(key)           ((key).datum = 0)
 121#define fz_prefix(key,fz)       ((key).datum)
 122
 123static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz)
 124{
 125        fn_key_t k;
 126        k.datum = dst & FZ_MASK(fz);
 127        return k;
 128}
 129
 130static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz)
 131{
 132        return &fz->fz_hash[fn_hash(key, fz).datum];
 133}
 134
 135static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz)
 136{
 137        return fz->fz_hash[fn_hash(key, fz).datum];
 138}
 139
 140static __inline__ int fn_key_eq(fn_key_t a, fn_key_t b)
 141{
 142        return a.datum == b.datum;
 143}
 144
 145static __inline__ int fn_key_leq(fn_key_t a, fn_key_t b)
 146{
 147        return a.datum <= b.datum;
 148}
 149
 150static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
 151
 152#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct fib_node *))
 153
 154static struct fib_node **fz_hash_alloc(int divisor)
 155{
 156        unsigned long size = divisor * sizeof(struct fib_node *);
 157
 158        if (divisor <= 1024) {
 159                return kmalloc(size, GFP_KERNEL);
 160        } else {
 161                return (struct fib_node **)
 162                        __get_free_pages(GFP_KERNEL, get_order(size));
 163        }
 164}
 165
 166/* The fib hash lock must be held when this is called. */
 167static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
 168                                       struct fib_node **old_ht,
 169                                       int old_divisor)
 170{
 171        int i;
 172        struct fib_node *f, **fp, *next;
 173
 174        for (i=0; i<old_divisor; i++) {
 175                for (f=old_ht[i]; f; f=next) {
 176                        next = f->fn_next;
 177                        for (fp = fz_chain_p(f->fn_key, fz);
 178                             *fp && fn_key_leq((*fp)->fn_key, f->fn_key);
 179                             fp = &(*fp)->fn_next)
 180                                /* NONE */;
 181                        f->fn_next = *fp;
 182                        *fp = f;
 183                }
 184        }
 185}
 186
 187static void fz_hash_free(struct fib_node **hash, int divisor)
 188{
 189        if (divisor <= 1024)
 190                kfree(hash);
 191        else
 192                free_pages((unsigned long) hash,
 193                           get_order(divisor * sizeof(struct fib_node *)));
 194}
 195
 196static void fn_rehash_zone(struct fn_zone *fz)
 197{
 198        struct fib_node **ht, **old_ht;
 199        int old_divisor, new_divisor;
 200        u32 new_hashmask;
 201                
 202        old_divisor = fz->fz_divisor;
 203
 204        switch (old_divisor) {
 205        case 16:
 206                new_divisor = 256;
 207                break;
 208        case 256:
 209                new_divisor = 1024;
 210                break;
 211        default:
 212                if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
 213                        printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
 214                        return;
 215                }
 216                new_divisor = (old_divisor << 1);
 217                break;
 218        }
 219
 220        new_hashmask = (new_divisor - 1);
 221
 222#if RT_CACHE_DEBUG >= 2
 223        printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
 224#endif
 225
 226        ht = fz_hash_alloc(new_divisor);
 227
 228        if (ht) {
 229                memset(ht, 0, new_divisor*sizeof(struct fib_node*));
 230
 231                write_lock_bh(&fib_hash_lock);
 232                old_ht = fz->fz_hash;
 233                fz->fz_hash = ht;
 234                fz->fz_hashmask = new_hashmask;
 235                fz->fz_divisor = new_divisor;
 236                fn_rebuild_zone(fz, old_ht, old_divisor);
 237                write_unlock_bh(&fib_hash_lock);
 238
 239                fz_hash_free(old_ht, old_divisor);
 240        }
 241}
 242
 243static void fn_free_node(struct fib_node * f)
 244{
 245        fib_release_info(FIB_INFO(f));
 246        kmem_cache_free(fn_hash_kmem, f);
 247}
 248
 249
 250static struct fn_zone *
 251fn_new_zone(struct fn_hash *table, int z)
 252{
 253        int i;
 254        struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
 255        if (!fz)
 256                return NULL;
 257
 258        memset(fz, 0, sizeof(struct fn_zone));
 259        if (z) {
 260                fz->fz_divisor = 16;
 261        } else {
 262                fz->fz_divisor = 1;
 263        }
 264        fz->fz_hashmask = (fz->fz_divisor - 1);
 265        fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
 266        if (!fz->fz_hash) {
 267                kfree(fz);
 268                return NULL;
 269        }
 270        memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
 271        fz->fz_order = z;
 272        fz->fz_mask = inet_make_mask(z);
 273
 274        /* Find the first not empty zone with more specific mask */
 275        for (i=z+1; i<=32; i++)
 276                if (table->fn_zones[i])
 277                        break;
 278        write_lock_bh(&fib_hash_lock);
 279        if (i>32) {
 280                /* No more specific masks, we are the first. */
 281                fz->fz_next = table->fn_zone_list;
 282                table->fn_zone_list = fz;
 283        } else {
 284                fz->fz_next = table->fn_zones[i]->fz_next;
 285                table->fn_zones[i]->fz_next = fz;
 286        }
 287        table->fn_zones[z] = fz;
 288        write_unlock_bh(&fib_hash_lock);
 289        return fz;
 290}
 291
 292static int
 293fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 294{
 295        int err;
 296        struct fn_zone *fz;
 297        struct fn_hash *t = (struct fn_hash*)tb->tb_data;
 298
 299        read_lock(&fib_hash_lock);
 300        for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
 301                struct fib_node *f;
 302                fn_key_t k = fz_key(flp->fl4_dst, fz);
 303
 304                for (f = fz_chain(k, fz); f; f = f->fn_next) {
 305                        if (!fn_key_eq(k, f->fn_key)) {
 306                                if (fn_key_leq(k, f->fn_key))
 307                                        break;
 308                                else
 309                                        continue;
 310                        }
 311#ifdef CONFIG_IP_ROUTE_TOS
 312                        if (f->fn_tos && f->fn_tos != flp->fl4_tos)
 313                                continue;
 314#endif
 315                        f->fn_state |= FN_S_ACCESSED;
 316
 317                        if (f->fn_state&FN_S_ZOMBIE)
 318                                continue;
 319                        if (f->fn_scope < flp->fl4_scope)
 320                                continue;
 321
 322                        err = fib_semantic_match(f->fn_type, FIB_INFO(f), flp, res);
 323                        if (err == 0) {
 324                                res->type = f->fn_type;
 325                                res->scope = f->fn_scope;
 326                                res->prefixlen = fz->fz_order;
 327                                goto out;
 328                        }
 329                        if (err < 0)
 330                                goto out;
 331                }
 332        }
 333        err = 1;
 334out:
 335        read_unlock(&fib_hash_lock);
 336        return err;
 337}
 338
 339static int fn_hash_last_dflt=-1;
 340
 341static int fib_detect_death(struct fib_info *fi, int order,
 342                            struct fib_info **last_resort, int *last_idx)
 343{
 344        struct neighbour *n;
 345        int state = NUD_NONE;
 346
 347        n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
 348        if (n) {
 349                state = n->nud_state;
 350                neigh_release(n);
 351        }
 352        if (state==NUD_REACHABLE)
 353                return 0;
 354        if ((state&NUD_VALID) && order != fn_hash_last_dflt)
 355                return 0;
 356        if ((state&NUD_VALID) ||
 357            (*last_idx<0 && order > fn_hash_last_dflt)) {
 358                *last_resort = fi;
 359                *last_idx = order;
 360        }
 361        return 1;
 362}
 363
 364static void
 365fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 366{
 367        int order, last_idx;
 368        struct fib_node *f;
 369        struct fib_info *fi = NULL;
 370        struct fib_info *last_resort;
 371        struct fn_hash *t = (struct fn_hash*)tb->tb_data;
 372        struct fn_zone *fz = t->fn_zones[0];
 373
 374        if (fz == NULL)
 375                return;
 376
 377        last_idx = -1;
 378        last_resort = NULL;
 379        order = -1;
 380
 381        read_lock(&fib_hash_lock);
 382        for (f = fz->fz_hash[0]; f; f = f->fn_next) {
 383                struct fib_info *next_fi = FIB_INFO(f);
 384
 385                if ((f->fn_state&FN_S_ZOMBIE) ||
 386                    f->fn_scope != res->scope ||
 387                    f->fn_type != RTN_UNICAST)
 388                        continue;
 389
 390                if (next_fi->fib_priority > res->fi->fib_priority)
 391                        break;
 392                if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
 393                        continue;
 394                f->fn_state |= FN_S_ACCESSED;
 395
 396                if (fi == NULL) {
 397                        if (next_fi != res->fi)
 398                                break;
 399                } else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
 400                        if (res->fi)
 401                                fib_info_put(res->fi);
 402                        res->fi = fi;
 403                        atomic_inc(&fi->fib_clntref);
 404                        fn_hash_last_dflt = order;
 405                        goto out;
 406                }
 407                fi = next_fi;
 408                order++;
 409        }
 410
 411        if (order<=0 || fi==NULL) {
 412                fn_hash_last_dflt = -1;
 413                goto out;
 414        }
 415
 416        if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
 417                if (res->fi)
 418                        fib_info_put(res->fi);
 419                res->fi = fi;
 420                atomic_inc(&fi->fib_clntref);
 421                fn_hash_last_dflt = order;
 422                goto out;
 423        }
 424
 425        if (last_idx >= 0) {
 426                if (res->fi)
 427                        fib_info_put(res->fi);
 428                res->fi = last_resort;
 429                if (last_resort)
 430                        atomic_inc(&last_resort->fib_clntref);
 431        }
 432        fn_hash_last_dflt = last_idx;
 433out:
 434        read_unlock(&fib_hash_lock);
 435}
 436
 437#define FIB_SCAN(f, fp) \
 438for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
 439
 440#define FIB_SCAN_KEY(f, fp, key) \
 441for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
 442
 443#ifndef CONFIG_IP_ROUTE_TOS
 444#define FIB_SCAN_TOS(f, fp, key, tos) FIB_SCAN_KEY(f, fp, key)
 445#else
 446#define FIB_SCAN_TOS(f, fp, key, tos) \
 447for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)) && \
 448     (f)->fn_tos == (tos) ; (fp) = &(f)->fn_next)
 449#endif
 450
 451
 452static void rtmsg_fib(int, struct fib_node*, int, int,
 453                      struct nlmsghdr *n,
 454                      struct netlink_skb_parms *);
 455
 456static int
 457fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 458                struct nlmsghdr *n, struct netlink_skb_parms *req)
 459{
 460        struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 461        struct fib_node *new_f, *f, **fp, **del_fp;
 462        struct fn_zone *fz;
 463        struct fib_info *fi;
 464
 465        int z = r->rtm_dst_len;
 466        int type = r->rtm_type;
 467#ifdef CONFIG_IP_ROUTE_TOS
 468        u8 tos = r->rtm_tos;
 469#endif
 470        fn_key_t key;
 471        int err;
 472
 473FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
 474*(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1,
 475rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
 476        if (z > 32)
 477                return -EINVAL;
 478        fz = table->fn_zones[z];
 479        if (!fz && !(fz = fn_new_zone(table, z)))
 480                return -ENOBUFS;
 481
 482        fz_key_0(key);
 483        if (rta->rta_dst) {
 484                u32 dst;
 485                memcpy(&dst, rta->rta_dst, 4);
 486                if (dst & ~FZ_MASK(fz))
 487                        return -EINVAL;
 488                key = fz_key(dst, fz);
 489        }
 490
 491        if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
 492                return err;
 493
 494        if (fz->fz_nent > (fz->fz_divisor<<1) &&
 495            fz->fz_divisor < FZ_MAX_DIVISOR &&
 496            (z==32 || (1<<z) > fz->fz_divisor))
 497                fn_rehash_zone(fz);
 498
 499        fp = fz_chain_p(key, fz);
 500
 501
 502        /*
 503         * Scan list to find the first route with the same destination
 504         */
 505        FIB_SCAN(f, fp) {
 506                if (fn_key_leq(key,f->fn_key))
 507                        break;
 508        }
 509
 510#ifdef CONFIG_IP_ROUTE_TOS
 511        /*
 512         * Find route with the same destination and tos.
 513         */
 514        FIB_SCAN_KEY(f, fp, key) {
 515                if (f->fn_tos <= tos)
 516                        break;
 517        }
 518#endif
 519
 520        del_fp = NULL;
 521
 522        if (f && (f->fn_state&FN_S_ZOMBIE) &&
 523#ifdef CONFIG_IP_ROUTE_TOS
 524            f->fn_tos == tos &&
 525#endif
 526            fn_key_eq(f->fn_key, key)) {
 527                del_fp = fp;
 528                fp = &f->fn_next;
 529                f = *fp;
 530                goto create;
 531        }
 532
 533        FIB_SCAN_TOS(f, fp, key, tos) {
 534                if (fi->fib_priority <= FIB_INFO(f)->fib_priority)
 535                        break;
 536        }
 537
 538        /* Now f==*fp points to the first node with the same
 539           keys [prefix,tos,priority], if such key already
 540           exists or to the node, before which we will insert new one.
 541         */
 542
 543        if (f && 
 544#ifdef CONFIG_IP_ROUTE_TOS
 545            f->fn_tos == tos &&
 546#endif
 547            fn_key_eq(f->fn_key, key) &&
 548            fi->fib_priority == FIB_INFO(f)->fib_priority) {
 549                struct fib_node **ins_fp;
 550
 551                err = -EEXIST;
 552                if (n->nlmsg_flags&NLM_F_EXCL)
 553                        goto out;
 554
 555                if (n->nlmsg_flags&NLM_F_REPLACE) {
 556                        del_fp = fp;
 557                        fp = &f->fn_next;
 558                        f = *fp;
 559                        goto replace;
 560                }
 561
 562                ins_fp = fp;
 563                err = -EEXIST;
 564
 565                FIB_SCAN_TOS(f, fp, key, tos) {
 566                        if (fi->fib_priority != FIB_INFO(f)->fib_priority)
 567                                break;
 568                        if (f->fn_type == type && f->fn_scope == r->rtm_scope
 569                            && FIB_INFO(f) == fi)
 570                                goto out;
 571                }
 572
 573                if (!(n->nlmsg_flags&NLM_F_APPEND)) {
 574                        fp = ins_fp;
 575                        f = *fp;
 576                }
 577        }
 578
 579create:
 580        err = -ENOENT;
 581        if (!(n->nlmsg_flags&NLM_F_CREATE))
 582                goto out;
 583
 584replace:
 585        err = -ENOBUFS;
 586        new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
 587        if (new_f == NULL)
 588                goto out;
 589
 590        memset(new_f, 0, sizeof(struct fib_node));
 591
 592        new_f->fn_key = key;
 593#ifdef CONFIG_IP_ROUTE_TOS
 594        new_f->fn_tos = tos;
 595#endif
 596        new_f->fn_type = type;
 597        new_f->fn_scope = r->rtm_scope;
 598        FIB_INFO(new_f) = fi;
 599
 600        /*
 601         * Insert new entry to the list.
 602         */
 603
 604        new_f->fn_next = f;
 605        write_lock_bh(&fib_hash_lock);
 606        *fp = new_f;
 607        write_unlock_bh(&fib_hash_lock);
 608        fz->fz_nent++;
 609
 610        if (del_fp) {
 611                f = *del_fp;
 612                /* Unlink replaced node */
 613                write_lock_bh(&fib_hash_lock);
 614                *del_fp = f->fn_next;
 615                write_unlock_bh(&fib_hash_lock);
 616
 617                if (!(f->fn_state&FN_S_ZOMBIE))
 618                        rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
 619                if (f->fn_state&FN_S_ACCESSED)
 620                        rt_cache_flush(-1);
 621                fn_free_node(f);
 622                fz->fz_nent--;
 623        } else {
 624                rt_cache_flush(-1);
 625        }
 626        rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
 627        return 0;
 628
 629out:
 630        fib_release_info(fi);
 631        return err;
 632}
 633
 634
 635static int
 636fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 637                struct nlmsghdr *n, struct netlink_skb_parms *req)
 638{
 639        struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 640        struct fib_node **fp, **del_fp, *f;
 641        int z = r->rtm_dst_len;
 642        struct fn_zone *fz;
 643        fn_key_t key;
 644        int matched;
 645#ifdef CONFIG_IP_ROUTE_TOS
 646        u8 tos = r->rtm_tos;
 647#endif
 648
 649FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
 650       *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1);
 651        if (z > 32)
 652                return -EINVAL;
 653        if ((fz  = table->fn_zones[z]) == NULL)
 654                return -ESRCH;
 655
 656        fz_key_0(key);
 657        if (rta->rta_dst) {
 658                u32 dst;
 659                memcpy(&dst, rta->rta_dst, 4);
 660                if (dst & ~FZ_MASK(fz))
 661                        return -EINVAL;
 662                key = fz_key(dst, fz);
 663        }
 664
 665        fp = fz_chain_p(key, fz);
 666
 667
 668        FIB_SCAN(f, fp) {
 669                if (fn_key_eq(f->fn_key, key))
 670                        break;
 671                if (fn_key_leq(key, f->fn_key)) {
 672                        return -ESRCH;
 673                }
 674        }
 675#ifdef CONFIG_IP_ROUTE_TOS
 676        FIB_SCAN_KEY(f, fp, key) {
 677                if (f->fn_tos == tos)
 678                        break;
 679        }
 680#endif
 681
 682        matched = 0;
 683        del_fp = NULL;
 684        FIB_SCAN_TOS(f, fp, key, tos) {
 685                struct fib_info * fi = FIB_INFO(f);
 686
 687                if (f->fn_state&FN_S_ZOMBIE) {
 688                        return -ESRCH;
 689                }
 690                matched++;
 691
 692                if (del_fp == NULL &&
 693                    (!r->rtm_type || f->fn_type == r->rtm_type) &&
 694                    (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
 695                    (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) &&
 696                    fib_nh_match(r, n, rta, fi) == 0)
 697                        del_fp = fp;
 698        }
 699
 700        if (del_fp) {
 701                f = *del_fp;
 702                rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
 703
 704                if (matched != 1) {
 705                        write_lock_bh(&fib_hash_lock);
 706                        *del_fp = f->fn_next;
 707                        write_unlock_bh(&fib_hash_lock);
 708
 709                        if (f->fn_state&FN_S_ACCESSED)
 710                                rt_cache_flush(-1);
 711                        fn_free_node(f);
 712                        fz->fz_nent--;
 713                } else {
 714                        f->fn_state |= FN_S_ZOMBIE;
 715                        if (f->fn_state&FN_S_ACCESSED) {
 716                                f->fn_state &= ~FN_S_ACCESSED;
 717                                rt_cache_flush(-1);
 718                        }
 719                        if (++fib_hash_zombies > 128)
 720                                fib_flush();
 721                }
 722
 723                return 0;
 724        }
 725        return -ESRCH;
 726}
 727
 728static __inline__ int
 729fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table)
 730{
 731        int found = 0;
 732        struct fib_node *f;
 733
 734        while ((f = *fp) != NULL) {
 735                struct fib_info *fi = FIB_INFO(f);
 736
 737                if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) {
 738                        write_lock_bh(&fib_hash_lock);
 739                        *fp = f->fn_next;
 740                        write_unlock_bh(&fib_hash_lock);
 741
 742                        fn_free_node(f);
 743                        found++;
 744                        continue;
 745                }
 746                fp = &f->fn_next;
 747        }
 748        return found;
 749}
 750
 751static int fn_hash_flush(struct fib_table *tb)
 752{
 753        struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 754        struct fn_zone *fz;
 755        int found = 0;
 756
 757        fib_hash_zombies = 0;
 758        for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
 759                int i;
 760                int tmp = 0;
 761                for (i=fz->fz_divisor-1; i>=0; i--)
 762                        tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table);
 763                fz->fz_nent -= tmp;
 764                found += tmp;
 765        }
 766        return found;
 767}
 768
 769
 770static __inline__ int
 771fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
 772                     struct fib_table *tb,
 773                     struct fn_zone *fz,
 774                     struct fib_node *f)
 775{
 776        int i, s_i;
 777
 778        s_i = cb->args[3];
 779        for (i=0; f; i++, f=f->fn_next) {
 780                if (i < s_i) continue;
 781                if (f->fn_state&FN_S_ZOMBIE) continue;
 782                if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
 783                                  RTM_NEWROUTE,
 784                                  tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope,
 785                                  &f->fn_key, fz->fz_order, f->fn_tos,
 786                                  f->fn_info) < 0) {
 787                        cb->args[3] = i;
 788                        return -1;
 789                }
 790        }
 791        cb->args[3] = i;
 792        return skb->len;
 793}
 794
 795static __inline__ int
 796fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
 797                   struct fib_table *tb,
 798                   struct fn_zone *fz)
 799{
 800        int h, s_h;
 801
 802        s_h = cb->args[2];
 803        for (h=0; h < fz->fz_divisor; h++) {
 804                if (h < s_h) continue;
 805                if (h > s_h)
 806                        memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
 807                if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL)
 808                        continue;
 809                if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) {
 810                        cb->args[2] = h;
 811                        return -1;
 812                }
 813        }
 814        cb->args[2] = h;
 815        return skb->len;
 816}
 817
 818static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
 819{
 820        int m, s_m;
 821        struct fn_zone *fz;
 822        struct fn_hash *table = (struct fn_hash*)tb->tb_data;
 823
 824        s_m = cb->args[1];
 825        read_lock(&fib_hash_lock);
 826        for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
 827                if (m < s_m) continue;
 828                if (m > s_m)
 829                        memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
 830                if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
 831                        cb->args[1] = m;
 832                        read_unlock(&fib_hash_lock);
 833                        return -1;
 834                }
 835        }
 836        read_unlock(&fib_hash_lock);
 837        cb->args[1] = m;
 838        return skb->len;
 839}
 840
 841static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
 842                      struct nlmsghdr *n, struct netlink_skb_parms *req)
 843{
 844        struct sk_buff *skb;
 845        u32 pid = req ? req->pid : 0;
 846        int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
 847
 848        skb = alloc_skb(size, GFP_KERNEL);
 849        if (!skb)
 850                return;
 851
 852        if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
 853                          f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
 854                          FIB_INFO(f)) < 0) {
 855                kfree_skb(skb);
 856                return;
 857        }
 858        NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
 859        if (n->nlmsg_flags&NLM_F_ECHO)
 860                atomic_inc(&skb->users);
 861        netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
 862        if (n->nlmsg_flags&NLM_F_ECHO)
 863                netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
 864}
 865
 866#ifdef CONFIG_IP_MULTIPLE_TABLES
 867struct fib_table * fib_hash_init(int id)
 868#else
 869struct fib_table * __init fib_hash_init(int id)
 870#endif
 871{
 872        struct fib_table *tb;
 873
 874        if (fn_hash_kmem == NULL)
 875                fn_hash_kmem = kmem_cache_create("ip_fib_hash",
 876                                                 sizeof(struct fib_node),
 877                                                 0, SLAB_HWCACHE_ALIGN,
 878                                                 NULL, NULL);
 879
 880        tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL);
 881        if (tb == NULL)
 882                return NULL;
 883
 884        tb->tb_id = id;
 885        tb->tb_lookup = fn_hash_lookup;
 886        tb->tb_insert = fn_hash_insert;
 887        tb->tb_delete = fn_hash_delete;
 888        tb->tb_flush = fn_hash_flush;
 889        tb->tb_select_default = fn_hash_select_default;
 890        tb->tb_dump = fn_hash_dump;
 891        memset(tb->tb_data, 0, sizeof(struct fn_hash));
 892        return tb;
 893}
 894
 895/* ------------------------------------------------------------------------ */
 896#ifdef CONFIG_PROC_FS
 897
 898struct fib_iter_state {
 899        struct fn_zone  *zone;
 900        int             bucket;
 901        struct fib_node **hash;
 902        struct fib_node *node;
 903};
 904
 905static __inline__ struct fib_node *fib_get_first(struct seq_file *seq)
 906{
 907        struct fib_iter_state* iter = seq->private;
 908        struct fn_hash *table = (struct fn_hash *)ip_fib_main_table->tb_data;
 909
 910        iter->bucket = 0;
 911        iter->hash   = NULL;
 912        iter->node   = NULL;
 913
 914        for (iter->zone = table->fn_zone_list; iter->zone;
 915             iter->zone = iter->zone->fz_next) {
 916                int maxslot;
 917
 918                if (!iter->zone->fz_next)
 919                        continue;
 920
 921                iter->hash = iter->zone->fz_hash;
 922                maxslot = iter->zone->fz_divisor;
 923
 924                for (iter->bucket = 0; iter->bucket < maxslot;
 925                     ++iter->bucket, ++iter->hash) {
 926                        iter->node = *iter->hash;
 927
 928                        if (iter->node)
 929                                goto out;
 930                }
 931        }
 932out:
 933        return iter->node;
 934}
 935
 936static __inline__ struct fib_node *fib_get_next(struct seq_file *seq)
 937{
 938        struct fib_iter_state* iter = seq->private;
 939
 940        if (iter->node)
 941                iter->node = iter->node->fn_next;
 942
 943        if (iter->node)
 944                goto out;
 945
 946        if (!iter->zone)
 947                goto out;
 948
 949        for (;;) {
 950                int maxslot;
 951
 952                maxslot = iter->zone->fz_divisor;
 953
 954                while (++iter->bucket < maxslot) {
 955                        iter->node = *++iter->hash;
 956
 957                        if (iter->node)
 958                                goto out;
 959                }
 960
 961                iter->zone = iter->zone->fz_next;
 962
 963                if (!iter->zone)
 964                        goto out;
 965                
 966                iter->hash = iter->zone->fz_hash;
 967                iter->bucket = 0;
 968                iter->node = *iter->hash;
 969                if (iter->node)
 970                        break;
 971        }
 972out:
 973        return iter->node;
 974}
 975
 976static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
 977{
 978        void *v = NULL;
 979
 980        read_lock(&fib_hash_lock);
 981        if (ip_fib_main_table)
 982                v = *pos ? fib_get_next(seq) : SEQ_START_TOKEN;
 983        return v;
 984}
 985
 986static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 987{
 988        ++*pos;
 989        return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
 990}
 991
 992static void fib_seq_stop(struct seq_file *seq, void *v)
 993{
 994        read_unlock(&fib_hash_lock);
 995}
 996
 997static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
 998{
 999        static unsigned type2flags[RTN_MAX + 1] = {
1000                [7] = RTF_REJECT, [8] = RTF_REJECT,
1001        };
1002        unsigned flags = type2flags[type];
1003
1004        if (fi && fi->fib_nh->nh_gw)
1005                flags |= RTF_GATEWAY;
1006        if (mask == 0xFFFFFFFF)
1007                flags |= RTF_HOST;
1008        if (!dead)
1009                flags |= RTF_UP;
1010        return flags;
1011}
1012
1013/* 
1014 *      This outputs /proc/net/route.
1015 *
1016 *      It always works in backward compatibility mode.
1017 *      The format of the file is not supposed to be changed.
1018 */
1019static int fib_seq_show(struct seq_file *seq, void *v)
1020{
1021        struct fib_iter_state* iter;
1022        char bf[128];
1023        u32 prefix, mask;
1024        unsigned flags;
1025        struct fib_node *f;
1026        struct fib_info *fi;
1027
1028        if (v == SEQ_START_TOKEN) {
1029                seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
1030                           "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1031                           "\tWindow\tIRTT");
1032                goto out;
1033        }
1034
1035        f       = v;
1036        fi      = FIB_INFO(f);
1037        iter    = seq->private;
1038        prefix  = fz_prefix(f->fn_key, iter->zone);
1039        mask    = FZ_MASK(iter->zone);
1040        flags   = fib_flag_trans(f->fn_type, f->fn_state & FN_S_ZOMBIE,
1041                                 mask, fi);
1042        if (fi)
1043                snprintf(bf, sizeof(bf),
1044                         "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1045                         fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1046                         fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1047                         mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
1048                         fi->fib_window,
1049                         fi->fib_rtt >> 3);
1050        else
1051                snprintf(bf, sizeof(bf),
1052                         "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1053                         prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0);
1054        seq_printf(seq, "%-127s\n", bf);
1055out:
1056        return 0;
1057}
1058
1059static struct seq_operations fib_seq_ops = {
1060        .start  = fib_seq_start,
1061        .next   = fib_seq_next,
1062        .stop   = fib_seq_stop,
1063        .show   = fib_seq_show,
1064};
1065
1066static int fib_seq_open(struct inode *inode, struct file *file)
1067{
1068        struct seq_file *seq;
1069        int rc = -ENOMEM;
1070        struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
1071       
1072        if (!s)
1073                goto out;
1074
1075        rc = seq_open(file, &fib_seq_ops);
1076        if (rc)
1077                goto out_kfree;
1078
1079        seq          = file->private_data;
1080        seq->private = s;
1081        memset(s, 0, sizeof(*s));
1082out:
1083        return rc;
1084out_kfree:
1085        kfree(s);
1086        goto out;
1087}
1088
1089static struct file_operations fib_seq_fops = {
1090        .owner          = THIS_MODULE,
1091        .open           = fib_seq_open,
1092        .read           = seq_read,
1093        .llseek         = seq_lseek,
1094        .release        = seq_release_private,
1095};
1096
1097int __init fib_proc_init(void)
1098{
1099        if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
1100                return -ENOMEM;
1101        return 0;
1102}
1103
1104void __init fib_proc_exit(void)
1105{
1106        proc_net_remove("route");
1107}
1108#endif /* CONFIG_PROC_FS */
1109
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.