linux/net/netfilter/ipvs/ip_vs_lblcr.c
<<
>>
Prefs
   1/*
   2 * IPVS:        Locality-Based Least-Connection with Replication scheduler
   3 *
   4 * Authors:     Wensong Zhang <wensong@gnuchina.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Changes:
  12 *     Julian Anastasov        :    Added the missing (dest->weight>0)
  13 *                                  condition in the ip_vs_dest_set_max.
  14 *
  15 */
  16
  17/*
  18 * The lblc/r algorithm is as follows (pseudo code):
  19 *
  20 *       if serverSet[dest_ip] is null then
  21 *               n, serverSet[dest_ip] <- {weighted least-conn node};
  22 *       else
  23 *               n <- {least-conn (alive) node in serverSet[dest_ip]};
  24 *               if (n is null) OR
  25 *                  (n.conns>n.weight AND
  26 *                   there is a node m with m.conns<m.weight/2) then
  27 *                   n <- {weighted least-conn node};
  28 *                   add n to serverSet[dest_ip];
  29 *               if |serverSet[dest_ip]| > 1 AND
  30 *                   now - serverSet[dest_ip].lastMod > T then
  31 *                   m <- {most conn node in serverSet[dest_ip]};
  32 *                   remove m from serverSet[dest_ip];
  33 *       if serverSet[dest_ip] changed then
  34 *               serverSet[dest_ip].lastMod <- now;
  35 *
  36 *       return n;
  37 *
  38 */
  39
  40#include <linux/ip.h>
  41#include <linux/module.h>
  42#include <linux/kernel.h>
  43#include <linux/skbuff.h>
  44#include <linux/jiffies.h>
  45
  46/* for sysctl */
  47#include <linux/fs.h>
  48#include <linux/sysctl.h>
  49#include <net/net_namespace.h>
  50
  51#include <net/ip_vs.h>
  52
  53
  54/*
  55 *    It is for garbage collection of stale IPVS lblcr entries,
  56 *    when the table is full.
  57 */
  58#define CHECK_EXPIRE_INTERVAL   (60*HZ)
  59#define ENTRY_TIMEOUT           (6*60*HZ)
  60
  61/*
  62 *    It is for full expiration check.
  63 *    When there is no partial expiration check (garbage collection)
  64 *    in a half hour, do a full expiration check to collect stale
  65 *    entries that haven't been touched for a day.
  66 */
  67#define COUNT_FOR_FULL_EXPIRATION   30
  68static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
  69
  70
  71/*
  72 *     for IPVS lblcr entry hash table
  73 */
  74#ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
  75#define CONFIG_IP_VS_LBLCR_TAB_BITS      10
  76#endif
  77#define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
  78#define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
  79#define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
  80
  81
  82/*
  83 *      IPVS destination set structure and operations
  84 */
  85struct ip_vs_dest_list {
  86        struct ip_vs_dest_list  *next;          /* list link */
  87        struct ip_vs_dest       *dest;          /* destination server */
  88};
  89
  90struct ip_vs_dest_set {
  91        atomic_t                size;           /* set size */
  92        unsigned long           lastmod;        /* last modified time */
  93        struct ip_vs_dest_list  *list;          /* destination list */
  94        rwlock_t                lock;           /* lock for this list */
  95};
  96
  97
  98static struct ip_vs_dest_list *
  99ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 100{
 101        struct ip_vs_dest_list *e;
 102
 103        for (e=set->list; e!=NULL; e=e->next) {
 104                if (e->dest == dest)
 105                        /* already existed */
 106                        return NULL;
 107        }
 108
 109        e = kmalloc(sizeof(*e), GFP_ATOMIC);
 110        if (e == NULL) {
 111                IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
 112                return NULL;
 113        }
 114
 115        atomic_inc(&dest->refcnt);
 116        e->dest = dest;
 117
 118        /* link it to the list */
 119        e->next = set->list;
 120        set->list = e;
 121        atomic_inc(&set->size);
 122
 123        set->lastmod = jiffies;
 124        return e;
 125}
 126
 127static void
 128ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 129{
 130        struct ip_vs_dest_list *e, **ep;
 131
 132        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
 133                if (e->dest == dest) {
 134                        /* HIT */
 135                        *ep = e->next;
 136                        atomic_dec(&set->size);
 137                        set->lastmod = jiffies;
 138                        atomic_dec(&e->dest->refcnt);
 139                        kfree(e);
 140                        break;
 141                }
 142                ep = &e->next;
 143        }
 144}
 145
 146static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 147{
 148        struct ip_vs_dest_list *e, **ep;
 149
 150        write_lock(&set->lock);
 151        for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
 152                *ep = e->next;
 153                /*
 154                 * We don't kfree dest because it is refered either
 155                 * by its service or by the trash dest list.
 156                 */
 157                atomic_dec(&e->dest->refcnt);
 158                kfree(e);
 159        }
 160        write_unlock(&set->lock);
 161}
 162
 163/* get weighted least-connection node in the destination set */
 164static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 165{
 166        register struct ip_vs_dest_list *e;
 167        struct ip_vs_dest *dest, *least;
 168        int loh, doh;
 169
 170        if (set == NULL)
 171                return NULL;
 172
 173        /* select the first destination server, whose weight > 0 */
 174        for (e=set->list; e!=NULL; e=e->next) {
 175                least = e->dest;
 176                if (least->flags & IP_VS_DEST_F_OVERLOAD)
 177                        continue;
 178
 179                if ((atomic_read(&least->weight) > 0)
 180                    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
 181                        loh = atomic_read(&least->activeconns) * 50
 182                                + atomic_read(&least->inactconns);
 183                        goto nextstage;
 184                }
 185        }
 186        return NULL;
 187
 188        /* find the destination with the weighted least load */
 189  nextstage:
 190        for (e=e->next; e!=NULL; e=e->next) {
 191                dest = e->dest;
 192                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 193                        continue;
 194
 195                doh = atomic_read(&dest->activeconns) * 50
 196                        + atomic_read(&dest->inactconns);
 197                if ((loh * atomic_read(&dest->weight) >
 198                     doh * atomic_read(&least->weight))
 199                    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 200                        least = dest;
 201                        loh = doh;
 202                }
 203        }
 204
 205        IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
 206                  "activeconns %d refcnt %d weight %d overhead %d\n",
 207                  NIPQUAD(least->addr.ip), ntohs(least->port),
 208                  atomic_read(&least->activeconns),
 209                  atomic_read(&least->refcnt),
 210                  atomic_read(&least->weight), loh);
 211        return least;
 212}
 213
 214
 215/* get weighted most-connection node in the destination set */
 216static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 217{
 218        register struct ip_vs_dest_list *e;
 219        struct ip_vs_dest *dest, *most;
 220        int moh, doh;
 221
 222        if (set == NULL)
 223                return NULL;
 224
 225        /* select the first destination server, whose weight > 0 */
 226        for (e=set->list; e!=NULL; e=e->next) {
 227                most = e->dest;
 228                if (atomic_read(&most->weight) > 0) {
 229                        moh = atomic_read(&most->activeconns) * 50
 230                                + atomic_read(&most->inactconns);
 231                        goto nextstage;
 232                }
 233        }
 234        return NULL;
 235
 236        /* find the destination with the weighted most load */
 237  nextstage:
 238        for (e=e->next; e!=NULL; e=e->next) {
 239                dest = e->dest;
 240                doh = atomic_read(&dest->activeconns) * 50
 241                        + atomic_read(&dest->inactconns);
 242                /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
 243                if ((moh * atomic_read(&dest->weight) <
 244                     doh * atomic_read(&most->weight))
 245                    && (atomic_read(&dest->weight) > 0)) {
 246                        most = dest;
 247                        moh = doh;
 248                }
 249        }
 250
 251        IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
 252                  "activeconns %d refcnt %d weight %d overhead %d\n",
 253                  NIPQUAD(most->addr.ip), ntohs(most->port),
 254                  atomic_read(&most->activeconns),
 255                  atomic_read(&most->refcnt),
 256                  atomic_read(&most->weight), moh);
 257        return most;
 258}
 259
 260
 261/*
 262 *      IPVS lblcr entry represents an association between destination
 263 *      IP address and its destination server set
 264 */
 265struct ip_vs_lblcr_entry {
 266        struct list_head        list;
 267        __be32                   addr;           /* destination IP address */
 268        struct ip_vs_dest_set   set;            /* destination server set */
 269        unsigned long           lastuse;        /* last used time */
 270};
 271
 272
 273/*
 274 *      IPVS lblcr hash table
 275 */
 276struct ip_vs_lblcr_table {
 277        struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
 278        atomic_t                entries;        /* number of entries */
 279        int                     max_size;       /* maximum size of entries */
 280        struct timer_list       periodic_timer; /* collect stale entries */
 281        int                     rover;          /* rover for expire check */
 282        int                     counter;        /* counter for no expire */
 283};
 284
 285
 286/*
 287 *      IPVS LBLCR sysctl table
 288 */
 289
 290static ctl_table vs_vars_table[] = {
 291        {
 292                .procname       = "lblcr_expiration",
 293                .data           = &sysctl_ip_vs_lblcr_expiration,
 294                .maxlen         = sizeof(int),
 295                .mode           = 0644,
 296                .proc_handler   = &proc_dointvec_jiffies,
 297        },
 298        { .ctl_name = 0 }
 299};
 300
 301static struct ctl_table_header * sysctl_header;
 302
 303static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 304{
 305        list_del(&en->list);
 306        ip_vs_dest_set_eraseall(&en->set);
 307        kfree(en);
 308}
 309
 310
 311/*
 312 *      Returns hash value for IPVS LBLCR entry
 313 */
 314static inline unsigned ip_vs_lblcr_hashkey(__be32 addr)
 315{
 316        return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
 317}
 318
 319
 320/*
 321 *      Hash an entry in the ip_vs_lblcr_table.
 322 *      returns bool success.
 323 */
 324static void
 325ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
 326{
 327        unsigned hash = ip_vs_lblcr_hashkey(en->addr);
 328
 329        list_add(&en->list, &tbl->bucket[hash]);
 330        atomic_inc(&tbl->entries);
 331}
 332
 333
 334/*
 335 *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
 336 *  read lock.
 337 */
 338static inline struct ip_vs_lblcr_entry *
 339ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
 340{
 341        unsigned hash = ip_vs_lblcr_hashkey(addr);
 342        struct ip_vs_lblcr_entry *en;
 343
 344        list_for_each_entry(en, &tbl->bucket[hash], list)
 345                if (en->addr == addr)
 346                        return en;
 347
 348        return NULL;
 349}
 350
 351
 352/*
 353 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
 354 * IP address to a server. Called under write lock.
 355 */
 356static inline struct ip_vs_lblcr_entry *
 357ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl,  __be32 daddr,
 358                struct ip_vs_dest *dest)
 359{
 360        struct ip_vs_lblcr_entry *en;
 361
 362        en = ip_vs_lblcr_get(tbl, daddr);
 363        if (!en) {
 364                en = kmalloc(sizeof(*en), GFP_ATOMIC);
 365                if (!en) {
 366                        IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
 367                        return NULL;
 368                }
 369
 370                en->addr = daddr;
 371                en->lastuse = jiffies;
 372
 373                /* initilize its dest set */
 374                atomic_set(&(en->set.size), 0);
 375                en->set.list = NULL;
 376                rwlock_init(&en->set.lock);
 377
 378                ip_vs_lblcr_hash(tbl, en);
 379        }
 380
 381        write_lock(&en->set.lock);
 382        ip_vs_dest_set_insert(&en->set, dest);
 383        write_unlock(&en->set.lock);
 384
 385        return en;
 386}
 387
 388
 389/*
 390 *      Flush all the entries of the specified table.
 391 */
 392static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
 393{
 394        int i;
 395        struct ip_vs_lblcr_entry *en, *nxt;
 396
 397        /* No locking required, only called during cleanup. */
 398        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 399                list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
 400                        ip_vs_lblcr_free(en);
 401                }
 402        }
 403}
 404
 405
 406static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 407{
 408        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 409        unsigned long now = jiffies;
 410        int i, j;
 411        struct ip_vs_lblcr_entry *en, *nxt;
 412
 413        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 414                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 415
 416                write_lock(&svc->sched_lock);
 417                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 418                        if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
 419                                       now))
 420                                continue;
 421
 422                        ip_vs_lblcr_free(en);
 423                        atomic_dec(&tbl->entries);
 424                }
 425                write_unlock(&svc->sched_lock);
 426        }
 427        tbl->rover = j;
 428}
 429
 430
 431/*
 432 *      Periodical timer handler for IPVS lblcr table
 433 *      It is used to collect stale entries when the number of entries
 434 *      exceeds the maximum size of the table.
 435 *
 436 *      Fixme: we probably need more complicated algorithm to collect
 437 *             entries that have not been used for a long time even
 438 *             if the number of entries doesn't exceed the maximum size
 439 *             of the table.
 440 *      The full expiration check is for this purpose now.
 441 */
 442static void ip_vs_lblcr_check_expire(unsigned long data)
 443{
 444        struct ip_vs_service *svc = (struct ip_vs_service *) data;
 445        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 446        unsigned long now = jiffies;
 447        int goal;
 448        int i, j;
 449        struct ip_vs_lblcr_entry *en, *nxt;
 450
 451        if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
 452                /* do full expiration check */
 453                ip_vs_lblcr_full_check(svc);
 454                tbl->counter = 1;
 455                goto out;
 456        }
 457
 458        if (atomic_read(&tbl->entries) <= tbl->max_size) {
 459                tbl->counter++;
 460                goto out;
 461        }
 462
 463        goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
 464        if (goal > tbl->max_size/2)
 465                goal = tbl->max_size/2;
 466
 467        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 468                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 469
 470                write_lock(&svc->sched_lock);
 471                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 472                        if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
 473                                continue;
 474
 475                        ip_vs_lblcr_free(en);
 476                        atomic_dec(&tbl->entries);
 477                        goal--;
 478                }
 479                write_unlock(&svc->sched_lock);
 480                if (goal <= 0)
 481                        break;
 482        }
 483        tbl->rover = j;
 484
 485  out:
 486        mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
 487}
 488
 489static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 490{
 491        int i;
 492        struct ip_vs_lblcr_table *tbl;
 493
 494        /*
 495         *    Allocate the ip_vs_lblcr_table for this service
 496         */
 497        tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
 498        if (tbl == NULL) {
 499                IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
 500                return -ENOMEM;
 501        }
 502        svc->sched_data = tbl;
 503        IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
 504                  "current service\n", sizeof(*tbl));
 505
 506        /*
 507         *    Initialize the hash buckets
 508         */
 509        for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 510                INIT_LIST_HEAD(&tbl->bucket[i]);
 511        }
 512        tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
 513        tbl->rover = 0;
 514        tbl->counter = 1;
 515
 516        /*
 517         *    Hook periodic timer for garbage collection
 518         */
 519        setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
 520                        (unsigned long)svc);
 521        mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
 522
 523        return 0;
 524}
 525
 526
 527static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
 528{
 529        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 530
 531        /* remove periodic timer */
 532        del_timer_sync(&tbl->periodic_timer);
 533
 534        /* got to clean up table entries here */
 535        ip_vs_lblcr_flush(tbl);
 536
 537        /* release the table itself */
 538        kfree(tbl);
 539        IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
 540                  sizeof(*tbl));
 541
 542        return 0;
 543}
 544
 545
 546static inline struct ip_vs_dest *
 547__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
 548{
 549        struct ip_vs_dest *dest, *least;
 550        int loh, doh;
 551
 552        /*
 553         * We think the overhead of processing active connections is fifty
 554         * times higher than that of inactive connections in average. (This
 555         * fifty times might not be accurate, we will change it later.) We
 556         * use the following formula to estimate the overhead:
 557         *                dest->activeconns*50 + dest->inactconns
 558         * and the load:
 559         *                (dest overhead) / dest->weight
 560         *
 561         * Remember -- no floats in kernel mode!!!
 562         * The comparison of h1*w2 > h2*w1 is equivalent to that of
 563         *                h1/w1 > h2/w2
 564         * if every weight is larger than zero.
 565         *
 566         * The server with weight=0 is quiesced and will not receive any
 567         * new connection.
 568         */
 569        list_for_each_entry(dest, &svc->destinations, n_list) {
 570                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 571                        continue;
 572
 573                if (atomic_read(&dest->weight) > 0) {
 574                        least = dest;
 575                        loh = atomic_read(&least->activeconns) * 50
 576                                + atomic_read(&least->inactconns);
 577                        goto nextstage;
 578                }
 579        }
 580        return NULL;
 581
 582        /*
 583         *    Find the destination with the least load.
 584         */
 585  nextstage:
 586        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
 587                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 588                        continue;
 589
 590                doh = atomic_read(&dest->activeconns) * 50
 591                        + atomic_read(&dest->inactconns);
 592                if (loh * atomic_read(&dest->weight) >
 593                    doh * atomic_read(&least->weight)) {
 594                        least = dest;
 595                        loh = doh;
 596                }
 597        }
 598
 599        IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d "
 600                  "activeconns %d refcnt %d weight %d overhead %d\n",
 601                  NIPQUAD(least->addr.ip), ntohs(least->port),
 602                  atomic_read(&least->activeconns),
 603                  atomic_read(&least->refcnt),
 604                  atomic_read(&least->weight), loh);
 605
 606        return least;
 607}
 608
 609
 610/*
 611 *   If this destination server is overloaded and there is a less loaded
 612 *   server, then return true.
 613 */
 614static inline int
 615is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 616{
 617        if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
 618                struct ip_vs_dest *d;
 619
 620                list_for_each_entry(d, &svc->destinations, n_list) {
 621                        if (atomic_read(&d->activeconns)*2
 622                            < atomic_read(&d->weight)) {
 623                                return 1;
 624                        }
 625                }
 626        }
 627        return 0;
 628}
 629
 630
 631/*
 632 *    Locality-Based (weighted) Least-Connection scheduling
 633 */
 634static struct ip_vs_dest *
 635ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 636{
 637        struct ip_vs_lblcr_table *tbl = svc->sched_data;
 638        struct iphdr *iph = ip_hdr(skb);
 639        struct ip_vs_dest *dest = NULL;
 640        struct ip_vs_lblcr_entry *en;
 641
 642        IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
 643
 644        /* First look in our cache */
 645        read_lock(&svc->sched_lock);
 646        en = ip_vs_lblcr_get(tbl, iph->daddr);
 647        if (en) {
 648                /* We only hold a read lock, but this is atomic */
 649                en->lastuse = jiffies;
 650
 651                /* Get the least loaded destination */
 652                read_lock(&en->set.lock);
 653                dest = ip_vs_dest_set_min(&en->set);
 654                read_unlock(&en->set.lock);
 655
 656                /* More than one destination + enough time passed by, cleanup */
 657                if (atomic_read(&en->set.size) > 1 &&
 658                                time_after(jiffies, en->set.lastmod +
 659                                sysctl_ip_vs_lblcr_expiration)) {
 660                        struct ip_vs_dest *m;
 661
 662                        write_lock(&en->set.lock);
 663                        m = ip_vs_dest_set_max(&en->set);
 664                        if (m)
 665                                ip_vs_dest_set_erase(&en->set, m);
 666                        write_unlock(&en->set.lock);
 667                }
 668
 669                /* If the destination is not overloaded, use it */
 670                if (dest && !is_overloaded(dest, svc)) {
 671                        read_unlock(&svc->sched_lock);
 672                        goto out;
 673                }
 674
 675                /* The cache entry is invalid, time to schedule */
 676                dest = __ip_vs_lblcr_schedule(svc, iph);
 677                if (!dest) {
 678                        IP_VS_DBG(1, "no destination available\n");
 679                        read_unlock(&svc->sched_lock);
 680                        return NULL;
 681                }
 682
 683                /* Update our cache entry */
 684                write_lock(&en->set.lock);
 685                ip_vs_dest_set_insert(&en->set, dest);
 686                write_unlock(&en->set.lock);
 687        }
 688        read_unlock(&svc->sched_lock);
 689
 690        if (dest)
 691                goto out;
 692
 693        /* No cache entry, time to schedule */
 694        dest = __ip_vs_lblcr_schedule(svc, iph);
 695        if (!dest) {
 696                IP_VS_DBG(1, "no destination available\n");
 697                return NULL;
 698        }
 699
 700        /* If we fail to create a cache entry, we'll just use the valid dest */
 701        write_lock(&svc->sched_lock);
 702        ip_vs_lblcr_new(tbl, iph->daddr, dest);
 703        write_unlock(&svc->sched_lock);
 704
 705out:
 706        IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
 707                  "--> server %u.%u.%u.%u:%d\n",
 708                  NIPQUAD(iph->daddr),
 709                  NIPQUAD(dest->addr.ip),
 710                  ntohs(dest->port));
 711
 712        return dest;
 713}
 714
 715
 716/*
 717 *      IPVS LBLCR Scheduler structure
 718 */
 719static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 720{
 721        .name =                 "lblcr",
 722        .refcnt =               ATOMIC_INIT(0),
 723        .module =               THIS_MODULE,
 724        .n_list =               LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
 725#ifdef CONFIG_IP_VS_IPV6
 726        .supports_ipv6 =        0,
 727#endif
 728        .init_service =         ip_vs_lblcr_init_svc,
 729        .done_service =         ip_vs_lblcr_done_svc,
 730        .schedule =             ip_vs_lblcr_schedule,
 731};
 732
 733
 734static int __init ip_vs_lblcr_init(void)
 735{
 736        int ret;
 737
 738        sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
 739        ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 740        if (ret)
 741                unregister_sysctl_table(sysctl_header);
 742        return ret;
 743}
 744
 745
 746static void __exit ip_vs_lblcr_cleanup(void)
 747{
 748        unregister_sysctl_table(sysctl_header);
 749        unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 750}
 751
 752
 753module_init(ip_vs_lblcr_init);
 754module_exit(ip_vs_lblcr_cleanup);
 755MODULE_LICENSE("GPL");
 756