linux-old/net/ipv4/ipvs/ip_vs_ctl.c
<<
>>
Prefs
   1/*
   2 * IPVS         An implementation of the IP virtual server support for the
   3 *              LINUX operating system.  IPVS is now implemented as a module
   4 *              over the NetFilter framework. IPVS can be used to build a
   5 *              high-performance and highly available server based on a
   6 *              cluster of servers.
   7 *
   8 * Version:     $Id: ip_vs_ctl.c,v 1.30.2.3 2003/07/29 14:37:12 wensong Exp $
   9 *
  10 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  11 *              Peter Kese <peter.kese@ijs.si>
  12 *              Julian Anastasov <ja@ssi.bg>
  13 *
  14 *              This program is free software; you can redistribute it and/or
  15 *              modify it under the terms of the GNU General Public License
  16 *              as published by the Free Software Foundation; either version
  17 *              2 of the License, or (at your option) any later version.
  18 *
  19 * Changes:
  20 *
  21 */
  22
  23#include <linux/module.h>
  24#include <linux/init.h>
  25#include <linux/types.h>
  26#include <linux/fs.h>
  27#include <linux/sysctl.h>
  28#include <linux/proc_fs.h>
  29#include <linux/timer.h>
  30#include <linux/swap.h>
  31#include <linux/proc_fs.h>
  32
  33#include <linux/netfilter.h>
  34#include <linux/netfilter_ipv4.h>
  35
  36#include <net/ip.h>
  37#include <net/sock.h>
  38
  39#include <asm/uaccess.h>
  40
  41#include <net/ip_vs.h>
  42
  43/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  44static DECLARE_MUTEX(__ip_vs_mutex);
  45
  46/* lock for service table */
  47rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
  48
  49/* lock for table with the real services */
  50static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
  51
  52/* lock for state and timeout tables */
  53static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
  54
  55/* lock for drop entry handling */
  56static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
  57
  58/* lock for drop packet handling */
  59static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
  60
  61/* 1/rate drop and drop-entry variables */
  62int ip_vs_drop_rate = 0;
  63int ip_vs_drop_counter = 0;
  64atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
  65
  66/* number of virtual services */
  67static int ip_vs_num_services = 0;
  68
  69/* sysctl variables */
  70static int sysctl_ip_vs_drop_entry = 0;
  71static int sysctl_ip_vs_drop_packet = 0;
  72static int sysctl_ip_vs_secure_tcp = 0;
  73static int sysctl_ip_vs_amemthresh = 2048;
  74static int sysctl_ip_vs_am_droprate = 10;
  75int sysctl_ip_vs_cache_bypass = 0;
  76int sysctl_ip_vs_expire_nodest_conn = 0;
  77int sysctl_ip_vs_expire_quiescent_template = 0;
  78int sysctl_ip_vs_sync_threshold = 3;
  79int sysctl_ip_vs_nat_icmp_send = 0;
  80
  81#ifdef CONFIG_IP_VS_DEBUG
  82static int sysctl_ip_vs_debug_level = 0;
  83
  84int ip_vs_get_debug_level(void)
  85{
  86        return sysctl_ip_vs_debug_level;
  87}
  88#endif
  89
  90/*
  91 *      update_defense_level is called from timer bh and from sysctl.
  92 */
  93static void update_defense_level(void)
  94{
  95        struct sysinfo i;
  96        int availmem;
  97        int nomem;
  98
  99        /* we only count free and buffered memory (in pages) */
 100        si_meminfo(&i);
 101        availmem = i.freeram + i.bufferram;
 102
 103        nomem = (availmem < sysctl_ip_vs_amemthresh);
 104
 105        /* drop_entry */
 106        spin_lock(&__ip_vs_dropentry_lock);
 107        switch (sysctl_ip_vs_drop_entry) {
 108        case 0:
 109                atomic_set(&ip_vs_dropentry, 0);
 110                break;
 111        case 1:
 112                if (nomem) {
 113                        atomic_set(&ip_vs_dropentry, 1);
 114                        sysctl_ip_vs_drop_entry = 2;
 115                } else {
 116                        atomic_set(&ip_vs_dropentry, 0);
 117                }
 118                break;
 119        case 2:
 120                if (nomem) {
 121                        atomic_set(&ip_vs_dropentry, 1);
 122                } else {
 123                        atomic_set(&ip_vs_dropentry, 0);
 124                        sysctl_ip_vs_drop_entry = 1;
 125                };
 126                break;
 127        case 3:
 128                atomic_set(&ip_vs_dropentry, 1);
 129                break;
 130        }
 131        spin_unlock(&__ip_vs_dropentry_lock);
 132
 133        /* drop_packet */
 134        spin_lock(&__ip_vs_droppacket_lock);
 135        switch (sysctl_ip_vs_drop_packet) {
 136        case 0:
 137                ip_vs_drop_rate = 0;
 138                break;
 139        case 1:
 140                if (nomem) {
 141                        ip_vs_drop_rate = ip_vs_drop_counter
 142                                = sysctl_ip_vs_amemthresh /
 143                                (sysctl_ip_vs_amemthresh - availmem);
 144                        sysctl_ip_vs_drop_packet = 2;
 145                } else {
 146                        ip_vs_drop_rate = 0;
 147                }
 148                break;
 149        case 2:
 150                if (nomem) {
 151                        ip_vs_drop_rate = ip_vs_drop_counter
 152                                = sysctl_ip_vs_amemthresh /
 153                                (sysctl_ip_vs_amemthresh - availmem);
 154                } else {
 155                        ip_vs_drop_rate = 0;
 156                        sysctl_ip_vs_drop_packet = 1;
 157                }
 158                break;
 159        case 3:
 160                ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
 161                break;
 162        }
 163        spin_unlock(&__ip_vs_droppacket_lock);
 164
 165        /* secure_tcp */
 166        write_lock(&__ip_vs_securetcp_lock);
 167        switch (sysctl_ip_vs_secure_tcp) {
 168        case 0:
 169                ip_vs_secure_tcp_set(0);
 170                break;
 171        case 1:
 172                if (nomem) {
 173                        ip_vs_secure_tcp_set(1);
 174                        sysctl_ip_vs_secure_tcp = 2;
 175                } else {
 176                        ip_vs_secure_tcp_set(0);
 177                }
 178                break;
 179        case 2:
 180                if (nomem) {
 181                        ip_vs_secure_tcp_set(1);
 182                } else {
 183                        ip_vs_secure_tcp_set(0);
 184                        sysctl_ip_vs_secure_tcp = 1;
 185                }
 186                break;
 187        case 3:
 188                ip_vs_secure_tcp_set(1);
 189                break;
 190        }
 191        write_unlock(&__ip_vs_securetcp_lock);
 192}
 193
 194
 195/*
 196 *      Timer for checking the defense
 197 */
 198static struct timer_list defense_timer;
 199#define DEFENSE_TIMER_PERIOD    1*HZ
 200
 201static void defense_timer_handler(unsigned long data)
 202{
 203        update_defense_level();
 204        if (atomic_read(&ip_vs_dropentry))
 205                ip_vs_random_dropentry();
 206
 207        mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
 208}
 209
 210
 211/*
 212 *  Hash table: for virtual service lookups
 213 */
 214#define IP_VS_SVC_TAB_BITS 8
 215#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
 216#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 217
 218/* the service table hashed by <protocol, addr, port> */
 219static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 220/* the service table hashed by fwmark */
 221static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 222
 223/*
 224 *  Hash table: for real service lookups
 225 */
 226#define IP_VS_RTAB_BITS 4
 227#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
 228#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
 229
 230static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
 231
 232/*
 233 * Trash for destinations
 234 */
 235static LIST_HEAD(ip_vs_dest_trash);
 236
 237/*
 238 * FTP & NULL virtual service counters
 239 */
 240static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
 241static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
 242
 243
 244/*
 245 *  Returns hash value for virtual service
 246 */
 247static __inline__ unsigned
 248ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
 249{
 250        register unsigned porth = ntohs(port);
 251
 252        return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 253                & IP_VS_SVC_TAB_MASK;
 254}
 255
 256/*
 257 *  Returns hash value of fwmark for virtual service lookup
 258 */
 259static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
 260{
 261        return fwmark & IP_VS_SVC_TAB_MASK;
 262}
 263
 264/*
 265 *  Hashes ip_vs_service in the ip_vs_svc_table by <proto,addr,port>
 266 *  or in the ip_vs_svc_fwm_table by fwmark.
 267 *  Should be called with locked tables.
 268 *  Returns bool success.
 269 */
 270static int ip_vs_svc_hash(struct ip_vs_service *svc)
 271{
 272        unsigned hash;
 273
 274        if (svc->flags & IP_VS_SVC_F_HASHED) {
 275                IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
 276                          "called from %p\n", __builtin_return_address(0));
 277                return 0;
 278        }
 279
 280        if (svc->fwmark == 0) {
 281                /*
 282                 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
 283                 */
 284                hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
 285                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 286        } else {
 287                /*
 288                 *  Hash it by fwmark in ip_vs_svc_fwm_table
 289                 */
 290                hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
 291                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 292        }
 293
 294        svc->flags |= IP_VS_SVC_F_HASHED;
 295        /* increase its refcnt because it is referenced by the svc table */
 296        atomic_inc(&svc->refcnt);
 297        return 1;
 298}
 299
 300
 301/*
 302 *  Unhashes ip_vs_service from ip_vs_svc_table/ip_vs_svc_fwm_table.
 303 *  Should be called with locked tables.
 304 *  Returns bool success.
 305 */
 306static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 307{
 308        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
 309                IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
 310                          "called from %p\n", __builtin_return_address(0));
 311                return 0;
 312        }
 313
 314        if (svc->fwmark == 0) {
 315                /*
 316                 * Remove it from the ip_vs_svc_table table.
 317                 */
 318                list_del(&svc->s_list);
 319        } else {
 320                /*
 321                 * Remove it from the ip_vs_svc_fwm_table table.
 322                 */
 323                list_del(&svc->f_list);
 324        }
 325
 326        svc->flags &= ~IP_VS_SVC_F_HASHED;
 327        atomic_dec(&svc->refcnt);
 328        return 1;
 329}
 330
 331
 332/*
 333 *  Get service by {proto,addr,port} in the service table.
 334 */
 335static __inline__ struct ip_vs_service *
 336__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
 337{
 338        unsigned hash;
 339        struct ip_vs_service *svc;
 340        struct list_head *l,*e;
 341
 342        /*
 343         *      Check for "full" addressed entries
 344         */
 345        hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
 346
 347        l = &ip_vs_svc_table[hash];
 348        for (e=l->next; e!=l; e=e->next) {
 349                svc = list_entry(e, struct ip_vs_service, s_list);
 350                if ((svc->addr == vaddr)
 351                    && (svc->port == vport)
 352                    && (svc->protocol == protocol)) {
 353                        /* HIT */
 354                        atomic_inc(&svc->usecnt);
 355                        return svc;
 356                }
 357        }
 358
 359        return NULL;
 360}
 361
 362
 363/*
 364 *  Get service by {fwmark} in the service table.
 365 */
 366static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
 367{
 368        unsigned hash;
 369        struct ip_vs_service *svc;
 370        struct list_head *l,*e;
 371
 372        /*
 373         *      Check for "full" addressed entries
 374         */
 375        hash = ip_vs_svc_fwm_hashkey(fwmark);
 376
 377        l = &ip_vs_svc_fwm_table[hash];
 378        for (e=l->next; e!=l; e=e->next) {
 379                svc = list_entry(e, struct ip_vs_service, f_list);
 380                if (svc->fwmark == fwmark) {
 381                        /* HIT */
 382                        atomic_inc(&svc->usecnt);
 383                        return svc;
 384                }
 385        }
 386
 387        return NULL;
 388}
 389
 390struct ip_vs_service *
 391ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
 392{
 393        struct ip_vs_service *svc;
 394
 395        read_lock(&__ip_vs_svc_lock);
 396
 397        /*
 398         *      Check the table hashed by fwmark first
 399         */
 400        if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
 401                goto out;
 402
 403        /*
 404         *      Check the table hashed by <protocol,addr,port>
 405         *      for "full" addressed entries
 406         */
 407        svc = __ip_vs_service_get(protocol, vaddr, vport);
 408
 409        if (svc == NULL
 410            && protocol == IPPROTO_TCP
 411            && atomic_read(&ip_vs_ftpsvc_counter)
 412            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 413                /*
 414                 * Check if ftp service entry exists, the packet
 415                 * might belong to FTP data connections.
 416                 */
 417                svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
 418        }
 419
 420        if (svc == NULL
 421            && atomic_read(&ip_vs_nullsvc_counter)) {
 422                /*
 423                 * Check if the catch-all port (port zero) exists
 424                 */
 425                svc = __ip_vs_service_get(protocol, vaddr, 0);
 426        }
 427
 428  out:
 429        read_unlock(&__ip_vs_svc_lock);
 430
 431        IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
 432                  fwmark, ip_vs_proto_name(protocol),
 433                  NIPQUAD(vaddr), ntohs(vport),
 434                  svc?"hit":"not hit");
 435
 436        return svc;
 437}
 438
 439
 440static inline void
 441__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 442{
 443        atomic_inc(&svc->refcnt);
 444        dest->svc = svc;
 445}
 446
 447static inline void
 448__ip_vs_unbind_svc(struct ip_vs_dest *dest)
 449{
 450        struct ip_vs_service *svc = dest->svc;
 451
 452        dest->svc = NULL;
 453        if (atomic_dec_and_test(&svc->refcnt))
 454                kfree(svc);
 455}
 456
 457/*
 458 *  Returns hash value for real service
 459 */
 460static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
 461{
 462        register unsigned porth = ntohs(port);
 463
 464        return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
 465                & IP_VS_RTAB_MASK;
 466}
 467
 468/*
 469 *  Hashes ip_vs_dest in ip_vs_rtable by proto,addr,port.
 470 *  should be called with locked tables.
 471 *  returns bool success.
 472 */
 473static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 474{
 475        unsigned hash;
 476
 477        if (!list_empty(&dest->d_list)) {
 478                return 0;
 479        }
 480
 481        /*
 482         *      Hash by proto,addr,port,
 483         *      which are the parameters of the real service.
 484         */
 485        hash = ip_vs_rs_hashkey(dest->addr, dest->port);
 486        list_add(&dest->d_list, &ip_vs_rtable[hash]);
 487
 488        return 1;
 489}
 490
 491/*
 492 *  UNhashes ip_vs_dest from ip_vs_rtable.
 493 *  should be called with locked tables.
 494 *  returns bool success.
 495 */
 496static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 497{
 498        /*
 499         * Remove it from the ip_vs_rtable table.
 500         */
 501        if (!list_empty(&dest->d_list)) {
 502                list_del(&dest->d_list);
 503                INIT_LIST_HEAD(&dest->d_list);
 504        }
 505
 506        return 1;
 507}
 508
 509/*
 510 *  Lookup real service by {proto,addr,port} in the real service table.
 511 */
 512struct ip_vs_dest *
 513ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
 514{
 515        unsigned hash;
 516        struct ip_vs_dest *dest;
 517        struct list_head *l,*e;
 518
 519        /*
 520         *      Check for "full" addressed entries
 521         *      Return the first found entry
 522         */
 523        hash = ip_vs_rs_hashkey(daddr, dport);
 524
 525        l = &ip_vs_rtable[hash];
 526
 527        read_lock(&__ip_vs_rs_lock);
 528        for (e=l->next; e!=l; e=e->next) {
 529                dest = list_entry(e, struct ip_vs_dest, d_list);
 530                if ((dest->addr == daddr)
 531                    && (dest->port == dport)
 532                    && ((dest->protocol == protocol) ||
 533                        dest->vfwmark)) {
 534                        /* HIT */
 535                        read_unlock(&__ip_vs_rs_lock);
 536                        return dest;
 537                }
 538        }
 539        read_unlock(&__ip_vs_rs_lock);
 540
 541        return NULL;
 542}
 543
 544/*
 545 *  Lookup destination by {addr,port} in the given service
 546 */
 547static struct ip_vs_dest *
 548ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
 549{
 550        struct ip_vs_dest *dest;
 551        struct list_head *l, *e;
 552
 553        /*
 554         * Find the destination for the given service
 555         */
 556        l = &svc->destinations;
 557        for (e=l->next; e!=l; e=e->next) {
 558                dest = list_entry(e, struct ip_vs_dest, n_list);
 559                if ((dest->addr == daddr) && (dest->port == dport)) {
 560                        /* HIT */
 561                        return dest;
 562                }
 563        }
 564
 565        return NULL;
 566}
 567
 568
 569/*
 570 *  Lookup dest by {svc,addr,port} in the destination trash.
 571 *  The destination trash is used to hold the destinations that are removed
 572 *  from the service table but are still referenced by some conn entries.
 573 *  The reason to add the destination trash is when the dest is temporary
 574 *  down (either by administrator or by monitor program), the dest can be
 575 *  picked back from the trash, the remaining connections to the dest can
 576 *  continue, and the counting information of the dest is also useful for
 577 *  scheduling.
 578 */
 579static struct ip_vs_dest *
 580ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
 581{
 582        struct ip_vs_dest *dest;
 583        struct list_head *l, *e;
 584
 585        /*
 586         * Find the destination in trash
 587         */
 588        l = &ip_vs_dest_trash;
 589
 590        for (e=l->next; e!=l; e=e->next) {
 591                dest = list_entry(e, struct ip_vs_dest, n_list);
 592                IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
 593                          "refcnt=%d\n",
 594                          dest->vfwmark,
 595                          NIPQUAD(dest->addr), ntohs(dest->port),
 596                          atomic_read(&dest->refcnt));
 597                if (dest->addr == daddr &&
 598                    dest->port == dport &&
 599                    dest->vfwmark == svc->fwmark &&
 600                    dest->protocol == svc->protocol &&
 601                    (svc->fwmark ||
 602                     (dest->vaddr == svc->addr &&
 603                      dest->vport == svc->port))) {
 604                        /* HIT */
 605                        return dest;
 606                }
 607
 608                /*
 609                 * Try to purge the destination from trash if not referenced
 610                 */
 611                if (atomic_read(&dest->refcnt) == 1) {
 612                        IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
 613                                  "from trash\n",
 614                                  dest->vfwmark,
 615                                  NIPQUAD(dest->addr), ntohs(dest->port));
 616                        e = e->prev;
 617                        list_del(&dest->n_list);
 618                        __ip_vs_dst_reset(dest);
 619                        __ip_vs_unbind_svc(dest);
 620                        kfree(dest);
 621                }
 622        }
 623
 624        return NULL;
 625}
 626
 627
 628/*
 629 *  Clean up all the destinations in the trash
 630 *  Called by the ip_vs_control_cleanup()
 631 *
 632 *  When the ip_vs_control_clearup is activated by ipvs module exit,
 633 *  the service tables must have been flushed and all the connections
 634 *  are expired, and the refcnt of each destination in the trash must
 635 *  be 1, so we simply release them here.
 636 */
 637static void ip_vs_trash_cleanup(void)
 638{
 639        struct ip_vs_dest *dest;
 640        struct list_head *l;
 641
 642        l = &ip_vs_dest_trash;
 643
 644        while (l->next != l) {
 645                dest = list_entry(l->next, struct ip_vs_dest, n_list);
 646                list_del(&dest->n_list);
 647                __ip_vs_dst_reset(dest);
 648                __ip_vs_unbind_svc(dest);
 649                kfree(dest);
 650        }
 651}
 652
 653
 654static inline void
 655__ip_vs_zero_stats(struct ip_vs_stats *stats)
 656{
 657        spin_lock_bh(&stats->lock);
 658        memset(stats, 0, (char *)&stats->lock - (char *)stats);
 659        spin_unlock_bh(&stats->lock);
 660        ip_vs_zero_estimator(stats);
 661}
 662
 663/*
 664 *  Update a destination in the given service
 665 */
 666static void __ip_vs_update_dest(struct ip_vs_service *svc,
 667                                struct ip_vs_dest *dest,
 668                                struct ip_vs_rule_user *ur)
 669{
 670        int conn_flags;
 671
 672        /*
 673         *    Set the weight and the flags
 674         */
 675        atomic_set(&dest->weight, ur->weight);
 676
 677        conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
 678
 679        /*
 680         *    Check if local node and update the flags
 681         */
 682        if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
 683                conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 684                        | IP_VS_CONN_F_LOCALNODE;
 685        }
 686
 687        /*
 688         *    Set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading
 689         */
 690        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
 691                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 692        } else {
 693                /*
 694                 *    Put the real service in ip_vs_rtable if not present.
 695                 *    For now only for NAT!
 696                 */
 697                write_lock_bh(&__ip_vs_rs_lock);
 698                ip_vs_rs_hash(dest);
 699                write_unlock_bh(&__ip_vs_rs_lock);
 700        }
 701        atomic_set(&dest->conn_flags, conn_flags);
 702
 703        /* bind the service */
 704        if (!dest->svc) {
 705                __ip_vs_bind_svc(dest, svc);
 706        } else {
 707                if (dest->svc != svc) {
 708                        __ip_vs_unbind_svc(dest);
 709                        __ip_vs_zero_stats(&dest->stats);
 710                        __ip_vs_bind_svc(dest, svc);
 711                }
 712        }
 713
 714        /* set the dest status flags */
 715        dest->flags |= IP_VS_DEST_F_AVAILABLE;
 716}
 717
 718
 719/*
 720 *  Create a destination for the given service
 721 */
 722static int
 723ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
 724               struct ip_vs_dest **destp)
 725{
 726        struct ip_vs_dest *dest;
 727        unsigned atype;
 728
 729        EnterFunction(2);
 730
 731        atype = inet_addr_type(ur->daddr);
 732        if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 733                return -EINVAL;
 734
 735        *destp = dest = (struct ip_vs_dest*)
 736                kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
 737        if (dest == NULL) {
 738                IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
 739                return -ENOMEM;
 740        }
 741        memset(dest, 0, sizeof(struct ip_vs_dest));
 742
 743        dest->protocol = svc->protocol;
 744        dest->vaddr = svc->addr;
 745        dest->vport = svc->port;
 746        dest->vfwmark = svc->fwmark;
 747        dest->addr = ur->daddr;
 748        dest->port = ur->dport;
 749
 750        atomic_set(&dest->activeconns, 0);
 751        atomic_set(&dest->inactconns, 0);
 752        atomic_set(&dest->refcnt, 0);
 753
 754        INIT_LIST_HEAD(&dest->d_list);
 755        dest->dst_lock = SPIN_LOCK_UNLOCKED;
 756        dest->stats.lock = SPIN_LOCK_UNLOCKED;
 757        __ip_vs_update_dest(svc, dest, ur);
 758        ip_vs_new_estimator(&dest->stats);
 759
 760        LeaveFunction(2);
 761        return 0;
 762}
 763
 764
 765/*
 766 *  Add a destination into an existing service
 767 */
 768static int ip_vs_add_dest(struct ip_vs_service *svc,
 769                          struct ip_vs_rule_user *ur)
 770{
 771        struct ip_vs_dest *dest;
 772        __u32 daddr = ur->daddr;
 773        __u16 dport = ur->dport;
 774        int ret;
 775
 776        EnterFunction(2);
 777
 778        if (ur->weight < 0) {
 779                IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
 780                return -ERANGE;
 781        }
 782
 783        /*
 784         * Check if the dest already exists in the list
 785         */
 786        dest = ip_vs_lookup_dest(svc, daddr, dport);
 787        if (dest != NULL) {
 788                IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
 789                return -EEXIST;
 790        }
 791
 792        /*
 793         * Check if the dest already exists in the trash and
 794         * is from the same service
 795         */
 796        dest = ip_vs_trash_get_dest(svc, daddr, dport);
 797        if (dest != NULL) {
 798                IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
 799                          "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
 800                          NIPQUAD(daddr), ntohs(dport),
 801                          atomic_read(&dest->refcnt),
 802                          dest->vfwmark,
 803                          NIPQUAD(dest->vaddr),
 804                          ntohs(dest->vport));
 805                __ip_vs_update_dest(svc, dest, ur);
 806
 807                /*
 808                 * Get the destination from the trash
 809                 */
 810                list_del(&dest->n_list);
 811
 812                ip_vs_new_estimator(&dest->stats);
 813
 814                write_lock_bh(&__ip_vs_svc_lock);
 815
 816                /*
 817                 * Wait until all other svc users go away.
 818                 */
 819                while (atomic_read(&svc->usecnt) > 1) {};
 820
 821                list_add(&dest->n_list, &svc->destinations);
 822                svc->num_dests++;
 823
 824                /* call the update_service function of its scheduler */
 825                svc->scheduler->update_service(svc);
 826
 827                write_unlock_bh(&__ip_vs_svc_lock);
 828                return 0;
 829        }
 830
 831        /*
 832         * Allocate and initialize the dest structure
 833         */
 834        ret = ip_vs_new_dest(svc, ur, &dest);
 835        if (ret) {
 836                return ret;
 837        }
 838
 839        /*
 840         * Add the dest entry into the list
 841         */
 842        atomic_inc(&dest->refcnt);
 843
 844        write_lock_bh(&__ip_vs_svc_lock);
 845
 846        /*
 847         * Wait until all other svc users go away.
 848         */
 849        while (atomic_read(&svc->usecnt) > 1) {};
 850
 851        list_add(&dest->n_list, &svc->destinations);
 852        svc->num_dests++;
 853
 854        /* call the update_service function of its scheduler */
 855        svc->scheduler->update_service(svc);
 856
 857        write_unlock_bh(&__ip_vs_svc_lock);
 858
 859        LeaveFunction(2);
 860
 861        return 0;
 862}
 863
 864
 865/*
 866 *  Edit a destination in the given service
 867 */
 868static int ip_vs_edit_dest(struct ip_vs_service *svc,
 869                           struct ip_vs_rule_user *ur)
 870{
 871        struct ip_vs_dest *dest;
 872        __u32 daddr = ur->daddr;
 873        __u16 dport = ur->dport;
 874
 875        EnterFunction(2);
 876
 877        if (ur->weight < 0) {
 878                IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
 879                return -ERANGE;
 880        }
 881
 882        /*
 883         *  Lookup the destination list
 884         */
 885        dest = ip_vs_lookup_dest(svc, daddr, dport);
 886        if (dest == NULL) {
 887                IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
 888                return -ENOENT;
 889        }
 890
 891        __ip_vs_update_dest(svc, dest, ur);
 892
 893        write_lock_bh(&__ip_vs_svc_lock);
 894
 895        /* Wait until all other svc users go away */
 896        while (atomic_read(&svc->usecnt) > 1) {};
 897
 898        /* call the update_service, because server weight may be changed */
 899        svc->scheduler->update_service(svc);
 900
 901        write_unlock_bh(&__ip_vs_svc_lock);
 902
 903        LeaveFunction(2);
 904
 905        return 0;
 906}
 907
 908
 909/*
 910 *  Delete a destination (must be already unlinked from the service)
 911 */
 912static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 913{
 914        ip_vs_kill_estimator(&dest->stats);
 915
 916        /*
 917         *  Remove it from the d-linked list with the real services.
 918         */
 919        write_lock_bh(&__ip_vs_rs_lock);
 920        ip_vs_rs_unhash(dest);
 921        write_unlock_bh(&__ip_vs_rs_lock);
 922
 923        /*
 924         *  Decrease the refcnt of the dest, and free the dest
 925         *  if nobody refers to it (refcnt=0). Otherwise, throw
 926         *  the destination into the trash.
 927         */
 928        if (atomic_dec_and_test(&dest->refcnt)) {
 929                __ip_vs_dst_reset(dest);
 930                /* simply decrease svc->refcnt here, let the caller check
 931                   and release the service if nobody refers to it.
 932                   Only user context can release destination and service,
 933                   and only one user context can update virtual service at a
 934                   time, so the operation here is OK */
 935                atomic_dec(&dest->svc->refcnt);
 936                kfree(dest);
 937        } else {
 938                IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
 939                          NIPQUAD(dest->addr), ntohs(dest->port),
 940                          atomic_read(&dest->refcnt));
 941                list_add(&dest->n_list, &ip_vs_dest_trash);
 942                atomic_inc(&dest->refcnt);
 943        }
 944}
 945
 946
 947/*
 948 *  Unlink a destination from the given service
 949 */
 950static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
 951                                struct ip_vs_dest *dest,
 952                                int svcupd)
 953{
 954        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
 955
 956        /*
 957         *  Remove it from the d-linked destination list.
 958         */
 959        list_del(&dest->n_list);
 960        svc->num_dests--;
 961        if (svcupd) {
 962                /*
 963                 *  Call the update_service function of its scheduler
 964                 */
 965                svc->scheduler->update_service(svc);
 966        }
 967}
 968
 969
 970/*
 971 *  Delete a destination server in the given service
 972 */
 973static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
 974{
 975        struct ip_vs_dest *dest;
 976        __u32 daddr = ur->daddr;
 977        __u16 dport = ur->dport;
 978
 979        EnterFunction(2);
 980
 981        dest = ip_vs_lookup_dest(svc, daddr, dport);
 982        if (dest == NULL) {
 983                IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
 984                return -ENOENT;
 985        }
 986
 987        write_lock_bh(&__ip_vs_svc_lock);
 988
 989        /*
 990         *      Wait until all other svc users go away.
 991         */
 992        while (atomic_read(&svc->usecnt) > 1) {};
 993
 994        /*
 995         *      Unlink dest from the service
 996         */
 997        __ip_vs_unlink_dest(svc, dest, 1);
 998
 999        write_unlock_bh(&__ip_vs_svc_lock);
1000
1001        /*
1002         *      Delete the destination
1003         */
1004        __ip_vs_del_dest(dest);
1005
1006        LeaveFunction(2);
1007
1008        return 0;
1009}
1010
1011
1012/*
1013 *  Add a service into the service hash table
1014 */
1015static int
1016ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
1017{
1018        int ret = 0;
1019        struct ip_vs_scheduler *sched;
1020        struct ip_vs_service *svc = NULL;
1021
1022        MOD_INC_USE_COUNT;
1023
1024        /*
1025         * Lookup the scheduler, by 'ur->sched_name'
1026         */
1027        sched = ip_vs_scheduler_get(ur->sched_name);
1028        if (sched == NULL) {
1029                IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1030                           ur->sched_name);
1031                ret = -ENOENT;
1032                goto out_mod_dec;
1033        }
1034
1035        svc = (struct ip_vs_service*)
1036                kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1037        if (svc == NULL) {
1038                IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1039                ret = -ENOMEM;
1040                goto out_err;
1041        }
1042        memset(svc, 0, sizeof(struct ip_vs_service));
1043
1044        svc->protocol = ur->protocol;
1045        svc->addr = ur->vaddr;
1046        svc->port = ur->vport;
1047        svc->fwmark = ur->vfwmark;
1048        svc->flags = ur->vs_flags;
1049        svc->timeout = ur->timeout * HZ;
1050        svc->netmask = ur->netmask;
1051
1052        INIT_LIST_HEAD(&svc->destinations);
1053        svc->sched_lock = RW_LOCK_UNLOCKED;
1054        svc->stats.lock = SPIN_LOCK_UNLOCKED;
1055
1056        /*
1057         *    Bind the scheduler
1058         */
1059        ret = ip_vs_bind_scheduler(svc, sched);
1060        if (ret) {
1061                goto out_err;
1062        }
1063
1064        /*
1065         *    Update the virtual service counters
1066         */
1067        if (svc->port == FTPPORT)
1068                atomic_inc(&ip_vs_ftpsvc_counter);
1069        else if (svc->port == 0)
1070                atomic_inc(&ip_vs_nullsvc_counter);
1071
1072        /*
1073         *    I'm the first user of the service
1074         */
1075        atomic_set(&svc->usecnt, 1);
1076        atomic_set(&svc->refcnt, 0);
1077
1078        ip_vs_new_estimator(&svc->stats);
1079        ip_vs_num_services++;
1080
1081        /*
1082         *    Hash the service into the service table
1083         */
1084        write_lock_bh(&__ip_vs_svc_lock);
1085        ip_vs_svc_hash(svc);
1086        write_unlock_bh(&__ip_vs_svc_lock);
1087
1088        *svc_p = svc;
1089        return 0;
1090
1091  out_err:
1092        if (svc)
1093                kfree(svc);
1094        ip_vs_scheduler_put(sched);
1095  out_mod_dec:
1096        MOD_DEC_USE_COUNT;
1097        return ret;
1098}
1099
1100
1101/*
1102 *      Edit a service and bind it with a new scheduler
1103 */
1104static int ip_vs_edit_service(struct ip_vs_service *svc,
1105                              struct ip_vs_rule_user *ur)
1106{
1107        struct ip_vs_scheduler *sched, *old_sched;
1108        int ret = 0;
1109
1110        /*
1111         * Lookup the scheduler, by 'ur->sched_name'
1112         */
1113        sched = ip_vs_scheduler_get(ur->sched_name);
1114        if (sched == NULL) {
1115                IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1116                           ur->sched_name);
1117                return -ENOENT;
1118        }
1119
1120        write_lock_bh(&__ip_vs_svc_lock);
1121
1122        /*
1123         * Wait until all other svc users go away.
1124         */
1125        while (atomic_read(&svc->usecnt) > 1) {};
1126
1127        /*
1128         * Set the flags and timeout value
1129         */
1130        svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
1131        svc->timeout = ur->timeout * HZ;
1132        svc->netmask = ur->netmask;
1133
1134        old_sched = svc->scheduler;
1135        if (sched != old_sched) {
1136                /*
1137                 * Unbind the old scheduler
1138                 */
1139                if ((ret = ip_vs_unbind_scheduler(svc))) {
1140                        old_sched = sched;
1141                        goto out;
1142                }
1143
1144                /*
1145                 * Bind the new scheduler
1146                 */
1147                if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1148                        /*
1149                         * If ip_vs_bind_scheduler fails, restore the old
1150                         * scheduler.
1151                         * The main reason of failure is out of memory.
1152                         *
1153                         * The question is if the old scheduler can be
1154                         * restored all the time. TODO: if it cannot be
1155                         * restored some time, we must delete the service,
1156                         * otherwise the system may crash.
1157                         */
1158                        ip_vs_bind_scheduler(svc, old_sched);
1159                        old_sched = sched;
1160                }
1161        }
1162
1163  out:
1164        write_unlock_bh(&__ip_vs_svc_lock);
1165
1166        if (old_sched)
1167                ip_vs_scheduler_put(old_sched);
1168
1169        return ret;
1170}
1171
1172
1173/*
1174 *  Delete a service from the service list
1175 *  The service must be unlinked, unlocked and not referenced!
1176 */
1177static void __ip_vs_del_service(struct ip_vs_service *svc)
1178{
1179        struct list_head *l;
1180        struct ip_vs_dest *dest;
1181        struct ip_vs_scheduler *old_sched;
1182
1183        ip_vs_num_services--;
1184        ip_vs_kill_estimator(&svc->stats);
1185
1186        /*
1187         *    Unbind scheduler
1188         */
1189        old_sched = svc->scheduler;
1190        ip_vs_unbind_scheduler(svc);
1191        if (old_sched && old_sched->module)
1192                __MOD_DEC_USE_COUNT(old_sched->module);
1193
1194        /*
1195         *    Unlink the whole destination list
1196         */
1197        l = &svc->destinations;
1198        while (l->next != l) {
1199                dest = list_entry(l->next, struct ip_vs_dest, n_list);
1200                __ip_vs_unlink_dest(svc, dest, 0);
1201                __ip_vs_del_dest(dest);
1202        }
1203
1204        /*
1205         *    Update the virtual service counters
1206         */
1207        if (svc->port == FTPPORT)
1208                atomic_dec(&ip_vs_ftpsvc_counter);
1209        else if (svc->port == 0)
1210                atomic_dec(&ip_vs_nullsvc_counter);
1211
1212        /*
1213         *    Free the service if nobody refers to it
1214         */
1215        if (atomic_read(&svc->refcnt) == 0)
1216                kfree(svc);
1217        MOD_DEC_USE_COUNT;
1218}
1219
1220/*
1221 *  Delete a service from the service list
1222 */
1223static int ip_vs_del_service(struct ip_vs_service *svc)
1224{
1225        if (svc == NULL)
1226                return -EEXIST;
1227
1228        /*
1229         * Unhash it from the service table
1230         */
1231        write_lock_bh(&__ip_vs_svc_lock);
1232
1233        ip_vs_svc_unhash(svc);
1234
1235        /*
1236         * Wait until all the svc users go away.
1237         */
1238        while (atomic_read(&svc->usecnt) > 1) {};
1239
1240        __ip_vs_del_service(svc);
1241
1242        write_unlock_bh(&__ip_vs_svc_lock);
1243
1244        return 0;
1245}
1246
1247
1248/*
1249 *  Flush all the virtual services
1250 */
1251static int ip_vs_flush(void)
1252{
1253        int idx;
1254        struct ip_vs_service *svc;
1255        struct list_head *l;
1256
1257        /*
1258         * Flush the service table hashed by <protocol,addr,port>
1259         */
1260        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1261                l = &ip_vs_svc_table[idx];
1262                while (l->next != l) {
1263                        svc = list_entry(l->next,struct ip_vs_service,s_list);
1264                        write_lock_bh(&__ip_vs_svc_lock);
1265                        ip_vs_svc_unhash(svc);
1266                        /*
1267                         * Wait until all the svc users go away.
1268                         */
1269                        while (atomic_read(&svc->usecnt) > 0) {};
1270                        __ip_vs_del_service(svc);
1271                        write_unlock_bh(&__ip_vs_svc_lock);
1272                }
1273        }
1274
1275        /*
1276         * Flush the service table hashed by fwmark
1277         */
1278        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1279                l = &ip_vs_svc_fwm_table[idx];
1280                while (l->next != l) {
1281                        svc = list_entry(l->next,struct ip_vs_service,f_list);
1282                        write_lock_bh(&__ip_vs_svc_lock);
1283                        ip_vs_svc_unhash(svc);
1284                        /*
1285                         * Wait until all the svc users go away.
1286                         */
1287                        while (atomic_read(&svc->usecnt) > 0) {};
1288                        __ip_vs_del_service(svc);
1289                        write_unlock_bh(&__ip_vs_svc_lock);
1290                }
1291        }
1292
1293        return 0;
1294}
1295
1296
1297/*
1298 *  Zero counters in a service or all services
1299 */
1300static int ip_vs_zero_service(struct ip_vs_service *svc)
1301{
1302        struct list_head *l;
1303        struct ip_vs_dest *dest;
1304
1305        write_lock_bh(&__ip_vs_svc_lock);
1306        list_for_each (l, &svc->destinations) {
1307                dest = list_entry(l, struct ip_vs_dest, n_list);
1308                __ip_vs_zero_stats(&dest->stats);
1309        }
1310        __ip_vs_zero_stats(&svc->stats);
1311        write_unlock_bh(&__ip_vs_svc_lock);
1312        return 0;
1313}
1314
1315static int ip_vs_zero_all(void)
1316{
1317        int idx;
1318        struct list_head *l;
1319        struct ip_vs_service *svc;
1320
1321        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1322                list_for_each (l, &ip_vs_svc_table[idx]) {
1323                        svc = list_entry(l, struct ip_vs_service, s_list);
1324                        ip_vs_zero_service(svc);
1325                }
1326        }
1327
1328        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1329                list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1330                        svc = list_entry(l, struct ip_vs_service, f_list);
1331                        ip_vs_zero_service(svc);
1332                }
1333        }
1334
1335        __ip_vs_zero_stats(&ip_vs_stats);
1336        return 0;
1337}
1338
1339
1340static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
1341        struct file * filp, void *buffer, size_t *lenp)
1342{
1343        int *valp = ctl->data;
1344        int val = *valp;
1345        int ret;
1346
1347        ret = proc_dointvec(ctl, write, filp, buffer, lenp);
1348        if (write && (*valp != val)) {
1349                if ((*valp < 0) || (*valp > 3)) {
1350                        /* Restore the correct value */
1351                        *valp = val;
1352                } else {
1353                        local_bh_disable();
1354                        update_defense_level();
1355                        local_bh_enable();
1356                }
1357        }
1358        return ret;
1359}
1360
1361
1362/*
1363 *      IPVS sysctl table
1364 */
1365struct ip_vs_sysctl_table {
1366        struct ctl_table_header *sysctl_header;
1367        ctl_table vs_vars[NET_IPV4_VS_LAST];
1368        ctl_table vs_dir[2];
1369        ctl_table ipv4_dir[2];
1370        ctl_table root_dir[2];
1371};
1372
1373
1374static struct ip_vs_sysctl_table ipv4_vs_table = {
1375        NULL,
1376        {{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1377          &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1378          &proc_dointvec},
1379#ifdef CONFIG_IP_VS_DEBUG
1380         {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1381          &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1382          &proc_dointvec},
1383#endif
1384         {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1385          &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1386          &proc_dointvec},
1387         {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1388          &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1389          &ip_vs_sysctl_defense_mode},
1390         {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1391          &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1392          &ip_vs_sysctl_defense_mode},
1393         {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1394          &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1395          &ip_vs_sysctl_defense_mode},
1396         {NET_IPV4_VS_TO_ES, "timeout_established",
1397          &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1398          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1399         {NET_IPV4_VS_TO_SS, "timeout_synsent",
1400          &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1401          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1402         {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1403          &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1404          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1405         {NET_IPV4_VS_TO_FW, "timeout_finwait",
1406          &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1407          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1408         {NET_IPV4_VS_TO_TW, "timeout_timewait",
1409          &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1410          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1411         {NET_IPV4_VS_TO_CL, "timeout_close",
1412          &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1413          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1414         {NET_IPV4_VS_TO_CW, "timeout_closewait",
1415          &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1416          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1417         {NET_IPV4_VS_TO_LA, "timeout_lastack",
1418          &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1419          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1420         {NET_IPV4_VS_TO_LI, "timeout_listen",
1421          &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1422          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1423         {NET_IPV4_VS_TO_SA, "timeout_synack",
1424          &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1425          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1426         {NET_IPV4_VS_TO_UDP, "timeout_udp",
1427          &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1428          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1429         {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1430          &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1431          sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1432         {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1433          &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1434          &proc_dointvec},
1435         {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1436          &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1437          &proc_dointvec},
1438         {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1439          &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
1440          &proc_dointvec},
1441         {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1442          &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1443          &proc_dointvec},
1444         {NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template",
1445          &sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL,
1446          &proc_dointvec},
1447         {0}},
1448        {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1449         {0}},
1450        {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1451         {0}},
1452        {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1453         {0}}
1454};
1455
1456
1457/*
1458 *      Write the contents of the VS rule table to a PROCfs file.
1459 *      (It is kept just for backward compatibility)
1460 */
1461static inline char *ip_vs_fwd_name(unsigned flags)
1462{
1463        char *fwd;
1464
1465        switch (flags & IP_VS_CONN_F_FWD_MASK) {
1466        case IP_VS_CONN_F_LOCALNODE:
1467                fwd = "Local";
1468                break;
1469        case IP_VS_CONN_F_TUNNEL:
1470                fwd = "Tunnel";
1471                break;
1472        case IP_VS_CONN_F_DROUTE:
1473                fwd = "Route";
1474                break;
1475        default:
1476                fwd = "Masq";
1477        }
1478        return fwd;
1479}
1480
1481static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
1482{
1483        int len=0;
1484        off_t pos=0;
1485        char temp[64], temp2[32];
1486        int idx;
1487        struct ip_vs_service *svc;
1488        struct ip_vs_dest *dest;
1489        struct list_head *l, *e, *p, *q;
1490
1491        /*
1492         * Note: since the length of the buffer is usually the multiple
1493         * of 512, it is good to use fixed record of the divisor of 512,
1494         * so that records won't be truncated at buffer boundary.
1495         */
1496        pos = 192;
1497        if (pos > offset) {
1498                sprintf(temp,
1499                        "IP Virtual Server version %d.%d.%d (size=%d)",
1500                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1501                len += sprintf(buf+len, "%-63s\n", temp);
1502                len += sprintf(buf+len, "%-63s\n",
1503                               "Prot LocalAddress:Port Scheduler Flags");
1504                len += sprintf(buf+len, "%-63s\n",
1505                               "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
1506        }
1507
1508        read_lock_bh(&__ip_vs_svc_lock);
1509
1510        /* print the service table hashed by <protocol,addr,port> */
1511        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1512                l = &ip_vs_svc_table[idx];
1513                for (e=l->next; e!=l; e=e->next) {
1514                        svc = list_entry(e, struct ip_vs_service, s_list);
1515                        pos += 64;
1516                        if (pos > offset) {
1517                                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1518                                        sprintf(temp2, "persistent %d %08X",
1519                                                svc->timeout,
1520                                                ntohl(svc->netmask));
1521                                else
1522                                        temp2[0] = '\0';
1523
1524                                sprintf(temp, "%s  %08X:%04X %s %s",
1525                                        ip_vs_proto_name(svc->protocol),
1526                                        ntohl(svc->addr),
1527                                        ntohs(svc->port),
1528                                        svc->scheduler->name, temp2);
1529                                len += sprintf(buf+len, "%-63s\n", temp);
1530                                if (len >= length)
1531                                        goto done;
1532                        }
1533
1534                        p = &svc->destinations;
1535                        for (q=p->next; q!=p; q=q->next) {
1536                                dest = list_entry(q, struct ip_vs_dest, n_list);
1537                                pos += 64;
1538                                if (pos <= offset)
1539                                        continue;
1540                                sprintf(temp,
1541                                        "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1542                                        ntohl(dest->addr),
1543                                        ntohs(dest->port),
1544                                        ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1545                                        atomic_read(&dest->weight),
1546                                        atomic_read(&dest->activeconns),
1547                                        atomic_read(&dest->inactconns));
1548                                len += sprintf(buf+len, "%-63s\n", temp);
1549                                if (len >= length)
1550                                        goto done;
1551                        }
1552                }
1553        }
1554
1555        /* print the service table hashed by fwmark */
1556        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1557                l = &ip_vs_svc_fwm_table[idx];
1558                for (e=l->next; e!=l; e=e->next) {
1559                        svc = list_entry(e, struct ip_vs_service, f_list);
1560                        pos += 64;
1561                        if (pos > offset) {
1562                                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1563                                        sprintf(temp2, "persistent %d %08X",
1564                                                svc->timeout,
1565                                                ntohl(svc->netmask));
1566                                else
1567                                        temp2[0] = '\0';
1568
1569                                sprintf(temp, "FWM  %08X %s %s",
1570                                        svc->fwmark,
1571                                        svc->scheduler->name, temp2);
1572                                len += sprintf(buf+len, "%-63s\n", temp);
1573                                if (len >= length)
1574                                        goto done;
1575                        }
1576
1577                        p = &svc->destinations;
1578                        for (q=p->next; q!=p; q=q->next) {
1579                                dest = list_entry(q, struct ip_vs_dest, n_list);
1580                                pos += 64;
1581                                if (pos <= offset)
1582                                        continue;
1583                                sprintf(temp,
1584                                        "  -> %08X:%04X      %-7s %-6d %-10d %-10d",
1585                                        ntohl(dest->addr),
1586                                        ntohs(dest->port),
1587                                        ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1588                                        atomic_read(&dest->weight),
1589                                        atomic_read(&dest->activeconns),
1590                                        atomic_read(&dest->inactconns));
1591                                len += sprintf(buf+len, "%-63s\n", temp);
1592                                if (len >= length)
1593                                        goto done;
1594                        }
1595                }
1596        }
1597
1598  done:
1599        read_unlock_bh(&__ip_vs_svc_lock);
1600
1601        *start = buf+len-(pos-offset);          /* Start of wanted data */
1602        len = pos-offset;
1603        if (len > length)
1604                len = length;
1605        if (len < 0)
1606                len = 0;
1607        return len;
1608}
1609
1610
1611struct ip_vs_stats ip_vs_stats;
1612
1613static int
1614ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
1615{
1616        int len=0;
1617        off_t pos=0;
1618        char temp[64];
1619
1620        pos += 320;
1621        if (pos > offset) {
1622                len += sprintf(buf+len, "%-63s\n%-63s\n",
1623/*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1624                               "   Total Incoming Outgoing         Incoming         Outgoing",
1625                               "   Conns  Packets  Packets            Bytes            Bytes");
1626
1627                spin_lock_bh(&ip_vs_stats.lock);
1628                sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
1629                        ip_vs_stats.conns,
1630                        ip_vs_stats.inpkts,
1631                        ip_vs_stats.outpkts,
1632                        (__u32)(ip_vs_stats.inbytes>>32),
1633                        (__u32)ip_vs_stats.inbytes,
1634                        (__u32)(ip_vs_stats.outbytes>>32),
1635                        (__u32)ip_vs_stats.outbytes);
1636                len += sprintf(buf+len, "%-62s\n\n", temp);
1637
1638                len += sprintf(buf+len, "%-63s\n",
1639/*                              01234567 01234567 01234567 0123456701234567 0123456701234567 */
1640                               " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s");
1641                sprintf(temp, "%8X %8X %8X %16X %16X",
1642                        ip_vs_stats.cps,
1643                        ip_vs_stats.inpps,
1644                        ip_vs_stats.outpps,
1645                        ip_vs_stats.inbps,
1646                        ip_vs_stats.outbps);
1647                len += sprintf(buf+len, "%-63s\n", temp);
1648
1649                spin_unlock_bh(&ip_vs_stats.lock);
1650        }
1651
1652        *start = buf+len-(pos-offset);          /* Start of wanted data */
1653        len = pos-offset;
1654        if (len > length)
1655                len = length;
1656        if (len < 0)
1657                len = 0;
1658        return len;
1659}
1660
1661
1662/*
1663 * Set timeout values for tcp tcpfin udp in the vs_timeout_table.
1664 */
1665static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
1666{
1667        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1668                  u->tcp_timeout,
1669                  u->tcp_fin_timeout,
1670                  u->udp_timeout);
1671
1672        if (u->tcp_timeout) {
1673                vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
1674                        = u->tcp_timeout * HZ;
1675        }
1676
1677        if (u->tcp_fin_timeout) {
1678                vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
1679                        = u->tcp_fin_timeout * HZ;
1680        }
1681
1682        if (u->udp_timeout) {
1683                vs_timeout_table.timeout[IP_VS_S_UDP]
1684                        = u->udp_timeout * HZ;
1685        }
1686        return 0;
1687}
1688
1689
1690static int
1691do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1692{
1693        int ret;
1694        struct ip_vs_rule_user *urule;
1695        struct ip_vs_service *svc = NULL;
1696
1697        if (!capable(CAP_NET_ADMIN))
1698                return -EPERM;
1699
1700        /*
1701         * Check the size of mm, no overflow...
1702         * len > 128000 is a sanity check.
1703         */
1704        if (len < sizeof(struct ip_vs_rule_user)) {
1705                IP_VS_ERR("set_ctl: len %u < %Zu\n",
1706                          len, sizeof(struct ip_vs_rule_user));
1707                return -EINVAL;
1708        } else if (len > 128000) {
1709                IP_VS_ERR("set_ctl: len %u > 128000\n", len);
1710                return -EINVAL;
1711        } else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
1712                IP_VS_ERR("set_ctl: no mem for len %u\n", len);
1713                return -ENOMEM;
1714        } else if (copy_from_user(urule, user, len) != 0) {
1715                ret = -EFAULT;
1716                goto out_free;
1717        }
1718
1719        MOD_INC_USE_COUNT;
1720        if (down_interruptible(&__ip_vs_mutex)) {
1721                ret = -ERESTARTSYS;
1722                goto out_dec;
1723        }
1724
1725        if (cmd == IP_VS_SO_SET_FLUSH) {
1726                /* Flush the virtual service */
1727                ret = ip_vs_flush();
1728                goto out_unlock;
1729        } else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
1730                /* Set timeout values for (tcp tcpfin udp) */
1731                ret = ip_vs_set_timeouts(urule);
1732                goto out_unlock;
1733        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1734                ret = start_sync_thread(urule->state, urule->mcast_ifn,
1735                                        urule->syncid);
1736                goto out_unlock;
1737        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1738                ret = stop_sync_thread(urule->state);
1739                goto out_unlock;
1740        } else if (cmd == IP_VS_SO_SET_ZERO) {
1741                /* if no service address is set, zero counters in all */
1742                if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
1743                        ret = ip_vs_zero_all();
1744                        goto out_unlock;
1745                }
1746        }
1747
1748        /*
1749         * Check for valid protocol: TCP or UDP. Even for fwmark!=0
1750         */
1751        if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
1752                IP_VS_ERR("set_ctl: invalid protocol %d %d.%d.%d.%d:%d %s\n",
1753                          urule->protocol, NIPQUAD(urule->vaddr),
1754                          ntohs(urule->vport), urule->sched_name);
1755                ret = -EFAULT;
1756                goto out_unlock;
1757        }
1758
1759        /*
1760         * Lookup the exact service by <protocol, vaddr, vport> or fwmark
1761         */
1762        if (urule->vfwmark == 0)
1763                svc = __ip_vs_service_get(urule->protocol,
1764                                          urule->vaddr, urule->vport);
1765        else
1766                svc = __ip_vs_svc_fwm_get(urule->vfwmark);
1767
1768        if (cmd != IP_VS_SO_SET_ADD
1769            && (svc == NULL || svc->protocol != urule->protocol)) {
1770                ret = -ESRCH;
1771                goto out_unlock;
1772        }
1773
1774        switch (cmd) {
1775        case IP_VS_SO_SET_ADD:
1776                if (svc != NULL)
1777                        ret = -EEXIST;
1778                else
1779                        ret = ip_vs_add_service(urule, &svc);
1780                break;
1781        case IP_VS_SO_SET_EDIT:
1782                ret = ip_vs_edit_service(svc, urule);
1783                break;
1784        case IP_VS_SO_SET_DEL:
1785                ret = ip_vs_del_service(svc);
1786                if (!ret)
1787                        goto out_unlock;
1788                break;
1789        case IP_VS_SO_SET_ADDDEST:
1790                ret = ip_vs_add_dest(svc, urule);
1791                break;
1792        case IP_VS_SO_SET_EDITDEST:
1793                ret = ip_vs_edit_dest(svc, urule);
1794                break;
1795        case IP_VS_SO_SET_DELDEST:
1796                ret = ip_vs_del_dest(svc, urule);
1797                break;
1798        case IP_VS_SO_SET_ZERO:
1799                ret = ip_vs_zero_service(svc);
1800                break;
1801        default:
1802                ret = -EINVAL;
1803        }
1804
1805        if (svc)
1806                ip_vs_service_put(svc);
1807
1808  out_unlock:
1809        up(&__ip_vs_mutex);
1810  out_dec:
1811        MOD_DEC_USE_COUNT;
1812  out_free:
1813        kfree(urule);
1814        return ret;
1815}
1816
1817
1818static inline void
1819__ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1820{
1821        spin_lock_bh(&src->lock);
1822        memcpy(dst, src, (char*)&src->lock - (char*)src);
1823        spin_unlock_bh(&src->lock);
1824}
1825
1826static inline int
1827__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1828                            struct ip_vs_get_services *uptr)
1829{
1830        int idx, count=0;
1831        struct ip_vs_service *svc;
1832        struct list_head *l;
1833        struct ip_vs_service_user entry;
1834        int ret = 0;
1835
1836        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1837                list_for_each (l, &ip_vs_svc_table[idx]) {
1838                        if (count >= get->num_services)
1839                                goto out;
1840                        svc = list_entry(l, struct ip_vs_service, s_list);
1841                        entry.protocol = svc->protocol;
1842                        entry.addr = svc->addr;
1843                        entry.port = svc->port;
1844                        entry.fwmark = svc->fwmark;
1845                        strcpy(entry.sched_name, svc->scheduler->name);
1846                        entry.flags = svc->flags;
1847                        entry.timeout = svc->timeout / HZ;
1848                        entry.netmask = svc->netmask;
1849                        entry.num_dests = svc->num_dests;
1850                        __ip_vs_copy_stats(&entry.stats, &svc->stats);
1851                        if (copy_to_user(&uptr->entrytable[count],
1852                                         &entry, sizeof(entry))) {
1853                                ret = -EFAULT;
1854                                goto out;
1855                        }
1856                        count++;
1857                }
1858        }
1859
1860        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1861                list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1862                        if (count >= get->num_services)
1863                                goto out;
1864                        svc = list_entry(l, struct ip_vs_service, f_list);
1865                        entry.protocol = svc->protocol;
1866                        entry.addr = svc->addr;
1867                        entry.port = svc->port;
1868                        entry.fwmark = svc->fwmark;
1869                        strcpy(entry.sched_name, svc->scheduler->name);
1870                        entry.flags = svc->flags;
1871                        entry.timeout = svc->timeout / HZ;
1872                        entry.netmask = svc->netmask;
1873                        entry.num_dests = svc->num_dests;
1874                        __ip_vs_copy_stats(&entry.stats, &svc->stats);
1875                        if (copy_to_user(&uptr->entrytable[count],
1876                                         &entry, sizeof(entry))) {
1877                                ret = -EFAULT;
1878                                goto out;
1879                        }
1880                        count++;
1881                }
1882        }
1883 out:
1884        return ret;
1885}
1886
1887static inline int
1888__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1889                         struct ip_vs_get_dests *uptr)
1890{
1891        struct ip_vs_service *svc;
1892        int ret = 0;
1893
1894        if (get->fwmark)
1895                svc = __ip_vs_svc_fwm_get(get->fwmark);
1896        else
1897                svc = __ip_vs_service_get(get->protocol,
1898                                          get->addr, get->port);
1899        if (svc) {
1900                int count = 0;
1901                struct ip_vs_dest *dest;
1902                struct list_head *l, *e;
1903                struct ip_vs_dest_user entry;
1904
1905                l = &svc->destinations;
1906                for (e=l->next; e!=l; e=e->next) {
1907                        if (count >= get->num_dests)
1908                                break;
1909                        dest = list_entry(e, struct ip_vs_dest, n_list);
1910                        entry.addr = dest->addr;
1911                        entry.port = dest->port;
1912                        entry.flags = atomic_read(&dest->conn_flags);
1913                        entry.weight = atomic_read(&dest->weight);
1914                        entry.activeconns = atomic_read(&dest->activeconns);
1915                        entry.inactconns = atomic_read(&dest->inactconns);
1916                        __ip_vs_copy_stats(&entry.stats, &dest->stats);
1917                        if (copy_to_user(&uptr->entrytable[count],
1918                                         &entry, sizeof(entry))) {
1919                                ret = -EFAULT;
1920                                break;
1921                        }
1922                        count++;
1923                }
1924                ip_vs_service_put(svc);
1925        } else
1926                ret = -ESRCH;
1927        return ret;
1928}
1929
1930static inline void
1931__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
1932{
1933        u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
1934        u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
1935        u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
1936}
1937
1938static int
1939do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1940{
1941        int ret = 0;
1942
1943        if (!capable(CAP_NET_ADMIN))
1944                return -EPERM;
1945
1946        if (down_interruptible(&__ip_vs_mutex))
1947                return -ERESTARTSYS;
1948
1949        switch (cmd) {
1950        case IP_VS_SO_GET_VERSION:
1951        {
1952                char buf[64];
1953
1954                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
1955                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1956                if (*len < strlen(buf)+1) {
1957                        ret = -EINVAL;
1958                        goto out;
1959                }
1960                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
1961                        ret = -EFAULT;
1962                        goto out;
1963                }
1964                *len = strlen(buf)+1;
1965        }
1966        break;
1967
1968        case IP_VS_SO_GET_INFO:
1969        {
1970                struct ip_vs_getinfo info;
1971                info.version = IP_VS_VERSION_CODE;
1972                info.size = IP_VS_CONN_TAB_SIZE;
1973                info.num_services = ip_vs_num_services;
1974                if (copy_to_user(user, &info, sizeof(info)) != 0)
1975                        ret = -EFAULT;
1976        }
1977        break;
1978
1979        case IP_VS_SO_GET_SERVICES:
1980        {
1981                struct ip_vs_get_services get;
1982
1983                if (*len < sizeof(get)) {
1984                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
1985                        ret = -EINVAL;
1986                        goto out;
1987                }
1988                if (copy_from_user(&get, user, sizeof(get))) {
1989                        ret = -EFAULT;
1990                        goto out;
1991                }
1992                if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
1993                        IP_VS_ERR("length: %u != %Zu\n", *len,
1994                                  sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
1995                        ret = -EINVAL;
1996                        goto out;
1997                }
1998                ret = __ip_vs_get_service_entries(&get, user);
1999        }
2000        break;
2001
2002        case IP_VS_SO_GET_SERVICE:
2003        {
2004                struct ip_vs_service_user get;
2005                struct ip_vs_service *svc;
2006
2007                if (*len != sizeof(get)) {
2008                        IP_VS_ERR("length: %u != %Zu\n", *len, sizeof(get));
2009                        ret = -EINVAL;
2010                        goto out;
2011                }
2012                if (copy_from_user(&get, user, sizeof(get))) {
2013                        ret = -EFAULT;
2014                        goto out;
2015                }
2016
2017                if (get.fwmark)
2018                        svc = __ip_vs_svc_fwm_get(get.fwmark);
2019                else
2020                        svc = __ip_vs_service_get(get.protocol,
2021                                                  get.addr, get.port);
2022                if (svc) {
2023                        strcpy(get.sched_name, svc->scheduler->name);
2024                        get.flags = svc->flags;
2025                        get.timeout = svc->timeout / HZ;
2026                        get.netmask = svc->netmask;
2027                        get.num_dests = svc->num_dests;
2028                        __ip_vs_copy_stats(&get.stats, &svc->stats);
2029                        if (copy_to_user(user, &get, *len) != 0)
2030                                ret = -EFAULT;
2031                        ip_vs_service_put(svc);
2032                } else
2033                        ret = -ESRCH;
2034        }
2035        break;
2036
2037        case IP_VS_SO_GET_DESTS:
2038        {
2039                struct ip_vs_get_dests get;
2040
2041                if (*len < sizeof(get)) {
2042                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
2043                        ret = -EINVAL;
2044                        goto out;
2045                }
2046                if (copy_from_user(&get, user, sizeof(get))) {
2047                        ret = -EFAULT;
2048                        goto out;
2049                }
2050                if (*len != (sizeof(get) +
2051                             sizeof(struct ip_vs_dest_user)*get.num_dests)) {
2052                        IP_VS_ERR("length: %u != %Zu\n", *len,
2053                                  sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
2054                        ret = -EINVAL;
2055                        goto out;
2056                }
2057                ret = __ip_vs_get_dest_entries(&get, user);
2058        }
2059        break;
2060
2061        case IP_VS_SO_GET_TIMEOUTS:
2062        {
2063                struct ip_vs_timeout_user u;
2064
2065                if (*len < sizeof(u)) {
2066                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2067                        ret = -EINVAL;
2068                        goto out;
2069                }
2070                __ip_vs_get_timeouts(&u);
2071                if (copy_to_user(user, &u, sizeof(u)) != 0)
2072                        ret = -EFAULT;
2073        }
2074        break;
2075
2076        case IP_VS_SO_GET_DAEMON:
2077        {
2078                struct ip_vs_daemon_user u;
2079
2080                if (*len < sizeof(u)) {
2081                        IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2082                        ret = -EINVAL;
2083                        goto out;
2084                }
2085                u.state = ip_vs_sync_state;
2086                if (ip_vs_sync_state & IP_VS_STATE_MASTER)
2087                        strcpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn);
2088                if (ip_vs_sync_state & IP_VS_STATE_BACKUP)
2089                        strcpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn);
2090                if (copy_to_user(user, &u, sizeof(u)) != 0)
2091                        ret = -EFAULT;
2092        }
2093        break;
2094
2095        default:
2096                ret = -EINVAL;
2097        }
2098
2099  out:
2100        up(&__ip_vs_mutex);
2101        return ret;
2102}
2103
2104
2105static struct nf_sockopt_ops ip_vs_sockopts = {
2106        { NULL, NULL }, PF_INET,
2107        IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2108        IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2109};
2110
2111
2112int ip_vs_control_init(void)
2113{
2114        int ret;
2115        int idx;
2116
2117        EnterFunction(2);
2118
2119        ret = nf_register_sockopt(&ip_vs_sockopts);
2120        if (ret) {
2121                IP_VS_ERR("cannot register sockopt.\n");
2122                return ret;
2123        }
2124
2125        proc_net_create("ip_vs", 0, ip_vs_get_info);
2126        proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
2127
2128        ipv4_vs_table.sysctl_header =
2129                register_sysctl_table(ipv4_vs_table.root_dir, 0);
2130        /*
2131         * Initilize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable,
2132         * ip_vs_schedulers.
2133         */
2134        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
2135                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2136                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2137        }
2138        for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
2139                INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2140        }
2141
2142        memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2143        ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2144        ip_vs_new_estimator(&ip_vs_stats);
2145
2146        /* Hook the defense timer */
2147        init_timer(&defense_timer);
2148        defense_timer.function = defense_timer_handler;
2149        defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2150        add_timer(&defense_timer);
2151
2152        LeaveFunction(2);
2153        return 0;
2154}
2155
2156void ip_vs_control_cleanup(void)
2157{
2158        EnterFunction(2);
2159        ip_vs_trash_cleanup();
2160        del_timer_sync(&defense_timer);
2161        ip_vs_kill_estimator(&ip_vs_stats);
2162        unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2163        proc_net_remove("ip_vs_stats");
2164        proc_net_remove("ip_vs");
2165        nf_unregister_sockopt(&ip_vs_sockopts);
2166        LeaveFunction(2);
2167}
2168
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.