linux/net/ipv4/ipvs/ip_vs_ctl.c
<<
>>
Prefs
   1/*
   2 * IPVS         An implementation of the IP virtual server support for the
   3 *              LINUX operating system.  IPVS is now implemented as a module
   4 *              over the NetFilter framework. IPVS can be used to build a
   5 *              high-performance and highly available server based on a
   6 *              cluster of servers.
   7 *
   8 * Version:     $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
   9 *
  10 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  11 *              Peter Kese <peter.kese@ijs.si>
  12 *              Julian Anastasov <ja@ssi.bg>
  13 *
  14 *              This program is free software; you can redistribute it and/or
  15 *              modify it under the terms of the GNU General Public License
  16 *              as published by the Free Software Foundation; either version
  17 *              2 of the License, or (at your option) any later version.
  18 *
  19 * Changes:
  20 *
  21 */
  22
  23#include <linux/module.h>
  24#include <linux/init.h>
  25#include <linux/types.h>
  26#include <linux/capability.h>
  27#include <linux/fs.h>
  28#include <linux/sysctl.h>
  29#include <linux/proc_fs.h>
  30#include <linux/workqueue.h>
  31#include <linux/swap.h>
  32#include <linux/seq_file.h>
  33
  34#include <linux/netfilter.h>
  35#include <linux/netfilter_ipv4.h>
  36#include <linux/mutex.h>
  37
  38#include <net/net_namespace.h>
  39#include <net/ip.h>
  40#include <net/route.h>
  41#include <net/sock.h>
  42
  43#include <asm/uaccess.h>
  44
  45#include <net/ip_vs.h>
  46
  47/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  48static DEFINE_MUTEX(__ip_vs_mutex);
  49
  50/* lock for service table */
  51static DEFINE_RWLOCK(__ip_vs_svc_lock);
  52
  53/* lock for table with the real services */
  54static DEFINE_RWLOCK(__ip_vs_rs_lock);
  55
  56/* lock for state and timeout tables */
  57static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
  58
  59/* lock for drop entry handling */
  60static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
  61
  62/* lock for drop packet handling */
  63static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
  64
  65/* 1/rate drop and drop-entry variables */
  66int ip_vs_drop_rate = 0;
  67int ip_vs_drop_counter = 0;
  68static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
  69
  70/* number of virtual services */
  71static int ip_vs_num_services = 0;
  72
  73/* sysctl variables */
  74static int sysctl_ip_vs_drop_entry = 0;
  75static int sysctl_ip_vs_drop_packet = 0;
  76static int sysctl_ip_vs_secure_tcp = 0;
  77static int sysctl_ip_vs_amemthresh = 1024;
  78static int sysctl_ip_vs_am_droprate = 10;
  79int sysctl_ip_vs_cache_bypass = 0;
  80int sysctl_ip_vs_expire_nodest_conn = 0;
  81int sysctl_ip_vs_expire_quiescent_template = 0;
  82int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
  83int sysctl_ip_vs_nat_icmp_send = 0;
  84
  85
  86#ifdef CONFIG_IP_VS_DEBUG
  87static int sysctl_ip_vs_debug_level = 0;
  88
  89int ip_vs_get_debug_level(void)
  90{
  91        return sysctl_ip_vs_debug_level;
  92}
  93#endif
  94
  95/*
  96 *      update_defense_level is called from keventd and from sysctl,
  97 *      so it needs to protect itself from softirqs
  98 */
  99static void update_defense_level(void)
 100{
 101        struct sysinfo i;
 102        static int old_secure_tcp = 0;
 103        int availmem;
 104        int nomem;
 105        int to_change = -1;
 106
 107        /* we only count free and buffered memory (in pages) */
 108        si_meminfo(&i);
 109        availmem = i.freeram + i.bufferram;
 110        /* however in linux 2.5 the i.bufferram is total page cache size,
 111           we need adjust it */
 112        /* si_swapinfo(&i); */
 113        /* availmem = availmem - (i.totalswap - i.freeswap); */
 114
 115        nomem = (availmem < sysctl_ip_vs_amemthresh);
 116
 117        local_bh_disable();
 118
 119        /* drop_entry */
 120        spin_lock(&__ip_vs_dropentry_lock);
 121        switch (sysctl_ip_vs_drop_entry) {
 122        case 0:
 123                atomic_set(&ip_vs_dropentry, 0);
 124                break;
 125        case 1:
 126                if (nomem) {
 127                        atomic_set(&ip_vs_dropentry, 1);
 128                        sysctl_ip_vs_drop_entry = 2;
 129                } else {
 130                        atomic_set(&ip_vs_dropentry, 0);
 131                }
 132                break;
 133        case 2:
 134                if (nomem) {
 135                        atomic_set(&ip_vs_dropentry, 1);
 136                } else {
 137                        atomic_set(&ip_vs_dropentry, 0);
 138                        sysctl_ip_vs_drop_entry = 1;
 139                };
 140                break;
 141        case 3:
 142                atomic_set(&ip_vs_dropentry, 1);
 143                break;
 144        }
 145        spin_unlock(&__ip_vs_dropentry_lock);
 146
 147        /* drop_packet */
 148        spin_lock(&__ip_vs_droppacket_lock);
 149        switch (sysctl_ip_vs_drop_packet) {
 150        case 0:
 151                ip_vs_drop_rate = 0;
 152                break;
 153        case 1:
 154                if (nomem) {
 155                        ip_vs_drop_rate = ip_vs_drop_counter
 156                                = sysctl_ip_vs_amemthresh /
 157                                (sysctl_ip_vs_amemthresh-availmem);
 158                        sysctl_ip_vs_drop_packet = 2;
 159                } else {
 160                        ip_vs_drop_rate = 0;
 161                }
 162                break;
 163        case 2:
 164                if (nomem) {
 165                        ip_vs_drop_rate = ip_vs_drop_counter
 166                                = sysctl_ip_vs_amemthresh /
 167                                (sysctl_ip_vs_amemthresh-availmem);
 168                } else {
 169                        ip_vs_drop_rate = 0;
 170                        sysctl_ip_vs_drop_packet = 1;
 171                }
 172                break;
 173        case 3:
 174                ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
 175                break;
 176        }
 177        spin_unlock(&__ip_vs_droppacket_lock);
 178
 179        /* secure_tcp */
 180        write_lock(&__ip_vs_securetcp_lock);
 181        switch (sysctl_ip_vs_secure_tcp) {
 182        case 0:
 183                if (old_secure_tcp >= 2)
 184                        to_change = 0;
 185                break;
 186        case 1:
 187                if (nomem) {
 188                        if (old_secure_tcp < 2)
 189                                to_change = 1;
 190                        sysctl_ip_vs_secure_tcp = 2;
 191                } else {
 192                        if (old_secure_tcp >= 2)
 193                                to_change = 0;
 194                }
 195                break;
 196        case 2:
 197                if (nomem) {
 198                        if (old_secure_tcp < 2)
 199                                to_change = 1;
 200                } else {
 201                        if (old_secure_tcp >= 2)
 202                                to_change = 0;
 203                        sysctl_ip_vs_secure_tcp = 1;
 204                }
 205                break;
 206        case 3:
 207                if (old_secure_tcp < 2)
 208                        to_change = 1;
 209                break;
 210        }
 211        old_secure_tcp = sysctl_ip_vs_secure_tcp;
 212        if (to_change >= 0)
 213                ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
 214        write_unlock(&__ip_vs_securetcp_lock);
 215
 216        local_bh_enable();
 217}
 218
 219
 220/*
 221 *      Timer for checking the defense
 222 */
 223#define DEFENSE_TIMER_PERIOD    1*HZ
 224static void defense_work_handler(struct work_struct *work);
 225static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 226
 227static void defense_work_handler(struct work_struct *work)
 228{
 229        update_defense_level();
 230        if (atomic_read(&ip_vs_dropentry))
 231                ip_vs_random_dropentry();
 232
 233        schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
 234}
 235
 236int
 237ip_vs_use_count_inc(void)
 238{
 239        return try_module_get(THIS_MODULE);
 240}
 241
 242void
 243ip_vs_use_count_dec(void)
 244{
 245        module_put(THIS_MODULE);
 246}
 247
 248
 249/*
 250 *      Hash table: for virtual service lookups
 251 */
 252#define IP_VS_SVC_TAB_BITS 8
 253#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
 254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 255
 256/* the service table hashed by <protocol, addr, port> */
 257static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 258/* the service table hashed by fwmark */
 259static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 260
 261/*
 262 *      Hash table: for real service lookups
 263 */
 264#define IP_VS_RTAB_BITS 4
 265#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
 266#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
 267
 268static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
 269
 270/*
 271 *      Trash for destinations
 272 */
 273static LIST_HEAD(ip_vs_dest_trash);
 274
 275/*
 276 *      FTP & NULL virtual service counters
 277 */
 278static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
 279static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
 280
 281
 282/*
 283 *      Returns hash value for virtual service
 284 */
 285static __inline__ unsigned
 286ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port)
 287{
 288        register unsigned porth = ntohs(port);
 289
 290        return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 291                & IP_VS_SVC_TAB_MASK;
 292}
 293
 294/*
 295 *      Returns hash value of fwmark for virtual service lookup
 296 */
 297static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
 298{
 299        return fwmark & IP_VS_SVC_TAB_MASK;
 300}
 301
 302/*
 303 *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
 304 *      or in the ip_vs_svc_fwm_table by fwmark.
 305 *      Should be called with locked tables.
 306 */
 307static int ip_vs_svc_hash(struct ip_vs_service *svc)
 308{
 309        unsigned hash;
 310
 311        if (svc->flags & IP_VS_SVC_F_HASHED) {
 312                IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
 313                          "called from %p\n", __builtin_return_address(0));
 314                return 0;
 315        }
 316
 317        if (svc->fwmark == 0) {
 318                /*
 319                 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
 320                 */
 321                hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
 322                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 323        } else {
 324                /*
 325                 *  Hash it by fwmark in ip_vs_svc_fwm_table
 326                 */
 327                hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
 328                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 329        }
 330
 331        svc->flags |= IP_VS_SVC_F_HASHED;
 332        /* increase its refcnt because it is referenced by the svc table */
 333        atomic_inc(&svc->refcnt);
 334        return 1;
 335}
 336
 337
 338/*
 339 *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
 340 *      Should be called with locked tables.
 341 */
 342static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 343{
 344        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
 345                IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
 346                          "called from %p\n", __builtin_return_address(0));
 347                return 0;
 348        }
 349
 350        if (svc->fwmark == 0) {
 351                /* Remove it from the ip_vs_svc_table table */
 352                list_del(&svc->s_list);
 353        } else {
 354                /* Remove it from the ip_vs_svc_fwm_table table */
 355                list_del(&svc->f_list);
 356        }
 357
 358        svc->flags &= ~IP_VS_SVC_F_HASHED;
 359        atomic_dec(&svc->refcnt);
 360        return 1;
 361}
 362
 363
 364/*
 365 *      Get service by {proto,addr,port} in the service table.
 366 */
 367static __inline__ struct ip_vs_service *
 368__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport)
 369{
 370        unsigned hash;
 371        struct ip_vs_service *svc;
 372
 373        /* Check for "full" addressed entries */
 374        hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
 375
 376        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
 377                if ((svc->addr == vaddr)
 378                    && (svc->port == vport)
 379                    && (svc->protocol == protocol)) {
 380                        /* HIT */
 381                        atomic_inc(&svc->usecnt);
 382                        return svc;
 383                }
 384        }
 385
 386        return NULL;
 387}
 388
 389
 390/*
 391 *      Get service by {fwmark} in the service table.
 392 */
 393static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
 394{
 395        unsigned hash;
 396        struct ip_vs_service *svc;
 397
 398        /* Check for fwmark addressed entries */
 399        hash = ip_vs_svc_fwm_hashkey(fwmark);
 400
 401        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 402                if (svc->fwmark == fwmark) {
 403                        /* HIT */
 404                        atomic_inc(&svc->usecnt);
 405                        return svc;
 406                }
 407        }
 408
 409        return NULL;
 410}
 411
 412struct ip_vs_service *
 413ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
 414{
 415        struct ip_vs_service *svc;
 416
 417        read_lock(&__ip_vs_svc_lock);
 418
 419        /*
 420         *      Check the table hashed by fwmark first
 421         */
 422        if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
 423                goto out;
 424
 425        /*
 426         *      Check the table hashed by <protocol,addr,port>
 427         *      for "full" addressed entries
 428         */
 429        svc = __ip_vs_service_get(protocol, vaddr, vport);
 430
 431        if (svc == NULL
 432            && protocol == IPPROTO_TCP
 433            && atomic_read(&ip_vs_ftpsvc_counter)
 434            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 435                /*
 436                 * Check if ftp service entry exists, the packet
 437                 * might belong to FTP data connections.
 438                 */
 439                svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
 440        }
 441
 442        if (svc == NULL
 443            && atomic_read(&ip_vs_nullsvc_counter)) {
 444                /*
 445                 * Check if the catch-all port (port zero) exists
 446                 */
 447                svc = __ip_vs_service_get(protocol, vaddr, 0);
 448        }
 449
 450  out:
 451        read_unlock(&__ip_vs_svc_lock);
 452
 453        IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
 454                  fwmark, ip_vs_proto_name(protocol),
 455                  NIPQUAD(vaddr), ntohs(vport),
 456                  svc?"hit":"not hit");
 457
 458        return svc;
 459}
 460
 461
 462static inline void
 463__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 464{
 465        atomic_inc(&svc->refcnt);
 466        dest->svc = svc;
 467}
 468
 469static inline void
 470__ip_vs_unbind_svc(struct ip_vs_dest *dest)
 471{
 472        struct ip_vs_service *svc = dest->svc;
 473
 474        dest->svc = NULL;
 475        if (atomic_dec_and_test(&svc->refcnt))
 476                kfree(svc);
 477}
 478
 479
 480/*
 481 *      Returns hash value for real service
 482 */
 483static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
 484{
 485        register unsigned porth = ntohs(port);
 486
 487        return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
 488                & IP_VS_RTAB_MASK;
 489}
 490
 491/*
 492 *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
 493 *      should be called with locked tables.
 494 */
 495static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 496{
 497        unsigned hash;
 498
 499        if (!list_empty(&dest->d_list)) {
 500                return 0;
 501        }
 502
 503        /*
 504         *      Hash by proto,addr,port,
 505         *      which are the parameters of the real service.
 506         */
 507        hash = ip_vs_rs_hashkey(dest->addr, dest->port);
 508        list_add(&dest->d_list, &ip_vs_rtable[hash]);
 509
 510        return 1;
 511}
 512
 513/*
 514 *      UNhashes ip_vs_dest from ip_vs_rtable.
 515 *      should be called with locked tables.
 516 */
 517static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 518{
 519        /*
 520         * Remove it from the ip_vs_rtable table.
 521         */
 522        if (!list_empty(&dest->d_list)) {
 523                list_del(&dest->d_list);
 524                INIT_LIST_HEAD(&dest->d_list);
 525        }
 526
 527        return 1;
 528}
 529
 530/*
 531 *      Lookup real service by <proto,addr,port> in the real service table.
 532 */
 533struct ip_vs_dest *
 534ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
 535{
 536        unsigned hash;
 537        struct ip_vs_dest *dest;
 538
 539        /*
 540         *      Check for "full" addressed entries
 541         *      Return the first found entry
 542         */
 543        hash = ip_vs_rs_hashkey(daddr, dport);
 544
 545        read_lock(&__ip_vs_rs_lock);
 546        list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
 547                if ((dest->addr == daddr)
 548                    && (dest->port == dport)
 549                    && ((dest->protocol == protocol) ||
 550                        dest->vfwmark)) {
 551                        /* HIT */
 552                        read_unlock(&__ip_vs_rs_lock);
 553                        return dest;
 554                }
 555        }
 556        read_unlock(&__ip_vs_rs_lock);
 557
 558        return NULL;
 559}
 560
 561/*
 562 *      Lookup destination by {addr,port} in the given service
 563 */
 564static struct ip_vs_dest *
 565ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 566{
 567        struct ip_vs_dest *dest;
 568
 569        /*
 570         * Find the destination for the given service
 571         */
 572        list_for_each_entry(dest, &svc->destinations, n_list) {
 573                if ((dest->addr == daddr) && (dest->port == dport)) {
 574                        /* HIT */
 575                        return dest;
 576                }
 577        }
 578
 579        return NULL;
 580}
 581
 582/*
 583 * Find destination by {daddr,dport,vaddr,protocol}
 584 * Cretaed to be used in ip_vs_process_message() in
 585 * the backup synchronization daemon. It finds the
 586 * destination to be bound to the received connection
 587 * on the backup.
 588 *
 589 * ip_vs_lookup_real_service() looked promissing, but
 590 * seems not working as expected.
 591 */
 592struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
 593                                    __be32 vaddr, __be16 vport, __u16 protocol)
 594{
 595        struct ip_vs_dest *dest;
 596        struct ip_vs_service *svc;
 597
 598        svc = ip_vs_service_get(0, protocol, vaddr, vport);
 599        if (!svc)
 600                return NULL;
 601        dest = ip_vs_lookup_dest(svc, daddr, dport);
 602        if (dest)
 603                atomic_inc(&dest->refcnt);
 604        ip_vs_service_put(svc);
 605        return dest;
 606}
 607
 608/*
 609 *  Lookup dest by {svc,addr,port} in the destination trash.
 610 *  The destination trash is used to hold the destinations that are removed
 611 *  from the service table but are still referenced by some conn entries.
 612 *  The reason to add the destination trash is when the dest is temporary
 613 *  down (either by administrator or by monitor program), the dest can be
 614 *  picked back from the trash, the remaining connections to the dest can
 615 *  continue, and the counting information of the dest is also useful for
 616 *  scheduling.
 617 */
 618static struct ip_vs_dest *
 619ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
 620{
 621        struct ip_vs_dest *dest, *nxt;
 622
 623        /*
 624         * Find the destination in trash
 625         */
 626        list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
 627                IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
 628                          "dest->refcnt=%d\n",
 629                          dest->vfwmark,
 630                          NIPQUAD(dest->addr), ntohs(dest->port),
 631                          atomic_read(&dest->refcnt));
 632                if (dest->addr == daddr &&
 633                    dest->port == dport &&
 634                    dest->vfwmark == svc->fwmark &&
 635                    dest->protocol == svc->protocol &&
 636                    (svc->fwmark ||
 637                     (dest->vaddr == svc->addr &&
 638                      dest->vport == svc->port))) {
 639                        /* HIT */
 640                        return dest;
 641                }
 642
 643                /*
 644                 * Try to purge the destination from trash if not referenced
 645                 */
 646                if (atomic_read(&dest->refcnt) == 1) {
 647                        IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
 648                                  "from trash\n",
 649                                  dest->vfwmark,
 650                                  NIPQUAD(dest->addr), ntohs(dest->port));
 651                        list_del(&dest->n_list);
 652                        ip_vs_dst_reset(dest);
 653                        __ip_vs_unbind_svc(dest);
 654                        kfree(dest);
 655                }
 656        }
 657
 658        return NULL;
 659}
 660
 661
 662/*
 663 *  Clean up all the destinations in the trash
 664 *  Called by the ip_vs_control_cleanup()
 665 *
 666 *  When the ip_vs_control_clearup is activated by ipvs module exit,
 667 *  the service tables must have been flushed and all the connections
 668 *  are expired, and the refcnt of each destination in the trash must
 669 *  be 1, so we simply release them here.
 670 */
 671static void ip_vs_trash_cleanup(void)
 672{
 673        struct ip_vs_dest *dest, *nxt;
 674
 675        list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
 676                list_del(&dest->n_list);
 677                ip_vs_dst_reset(dest);
 678                __ip_vs_unbind_svc(dest);
 679                kfree(dest);
 680        }
 681}
 682
 683
 684static void
 685ip_vs_zero_stats(struct ip_vs_stats *stats)
 686{
 687        spin_lock_bh(&stats->lock);
 688        memset(stats, 0, (char *)&stats->lock - (char *)stats);
 689        spin_unlock_bh(&stats->lock);
 690        ip_vs_zero_estimator(stats);
 691}
 692
 693/*
 694 *      Update a destination in the given service
 695 */
 696static void
 697__ip_vs_update_dest(struct ip_vs_service *svc,
 698                    struct ip_vs_dest *dest, struct ip_vs_dest_user *udest)
 699{
 700        int conn_flags;
 701
 702        /* set the weight and the flags */
 703        atomic_set(&dest->weight, udest->weight);
 704        conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
 705
 706        /* check if local node and update the flags */
 707        if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
 708                conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 709                        | IP_VS_CONN_F_LOCALNODE;
 710        }
 711
 712        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 713        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
 714                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 715        } else {
 716                /*
 717                 *    Put the real service in ip_vs_rtable if not present.
 718                 *    For now only for NAT!
 719                 */
 720                write_lock_bh(&__ip_vs_rs_lock);
 721                ip_vs_rs_hash(dest);
 722                write_unlock_bh(&__ip_vs_rs_lock);
 723        }
 724        atomic_set(&dest->conn_flags, conn_flags);
 725
 726        /* bind the service */
 727        if (!dest->svc) {
 728                __ip_vs_bind_svc(dest, svc);
 729        } else {
 730                if (dest->svc != svc) {
 731                        __ip_vs_unbind_svc(dest);
 732                        ip_vs_zero_stats(&dest->stats);
 733                        __ip_vs_bind_svc(dest, svc);
 734                }
 735        }
 736
 737        /* set the dest status flags */
 738        dest->flags |= IP_VS_DEST_F_AVAILABLE;
 739
 740        if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
 741                dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
 742        dest->u_threshold = udest->u_threshold;
 743        dest->l_threshold = udest->l_threshold;
 744}
 745
 746
 747/*
 748 *      Create a destination for the given service
 749 */
 750static int
 751ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest,
 752               struct ip_vs_dest **dest_p)
 753{
 754        struct ip_vs_dest *dest;
 755        unsigned atype;
 756
 757        EnterFunction(2);
 758
 759        atype = inet_addr_type(&init_net, udest->addr);
 760        if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 761                return -EINVAL;
 762
 763        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
 764        if (dest == NULL) {
 765                IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
 766                return -ENOMEM;
 767        }
 768
 769        dest->protocol = svc->protocol;
 770        dest->vaddr = svc->addr;
 771        dest->vport = svc->port;
 772        dest->vfwmark = svc->fwmark;
 773        dest->addr = udest->addr;
 774        dest->port = udest->port;
 775
 776        atomic_set(&dest->activeconns, 0);
 777        atomic_set(&dest->inactconns, 0);
 778        atomic_set(&dest->persistconns, 0);
 779        atomic_set(&dest->refcnt, 0);
 780
 781        INIT_LIST_HEAD(&dest->d_list);
 782        spin_lock_init(&dest->dst_lock);
 783        spin_lock_init(&dest->stats.lock);
 784        __ip_vs_update_dest(svc, dest, udest);
 785        ip_vs_new_estimator(&dest->stats);
 786
 787        *dest_p = dest;
 788
 789        LeaveFunction(2);
 790        return 0;
 791}
 792
 793
 794/*
 795 *      Add a destination into an existing service
 796 */
 797static int
 798ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 799{
 800        struct ip_vs_dest *dest;
 801        __be32 daddr = udest->addr;
 802        __be16 dport = udest->port;
 803        int ret;
 804
 805        EnterFunction(2);
 806
 807        if (udest->weight < 0) {
 808                IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
 809                return -ERANGE;
 810        }
 811
 812        if (udest->l_threshold > udest->u_threshold) {
 813                IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
 814                          "upper threshold\n");
 815                return -ERANGE;
 816        }
 817
 818        /*
 819         * Check if the dest already exists in the list
 820         */
 821        dest = ip_vs_lookup_dest(svc, daddr, dport);
 822        if (dest != NULL) {
 823                IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
 824                return -EEXIST;
 825        }
 826
 827        /*
 828         * Check if the dest already exists in the trash and
 829         * is from the same service
 830         */
 831        dest = ip_vs_trash_get_dest(svc, daddr, dport);
 832        if (dest != NULL) {
 833                IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
 834                          "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
 835                          NIPQUAD(daddr), ntohs(dport),
 836                          atomic_read(&dest->refcnt),
 837                          dest->vfwmark,
 838                          NIPQUAD(dest->vaddr),
 839                          ntohs(dest->vport));
 840                __ip_vs_update_dest(svc, dest, udest);
 841
 842                /*
 843                 * Get the destination from the trash
 844                 */
 845                list_del(&dest->n_list);
 846
 847                ip_vs_new_estimator(&dest->stats);
 848
 849                write_lock_bh(&__ip_vs_svc_lock);
 850
 851                /*
 852                 * Wait until all other svc users go away.
 853                 */
 854                IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
 855
 856                list_add(&dest->n_list, &svc->destinations);
 857                svc->num_dests++;
 858
 859                /* call the update_service function of its scheduler */
 860                svc->scheduler->update_service(svc);
 861
 862                write_unlock_bh(&__ip_vs_svc_lock);
 863                return 0;
 864        }
 865
 866        /*
 867         * Allocate and initialize the dest structure
 868         */
 869        ret = ip_vs_new_dest(svc, udest, &dest);
 870        if (ret) {
 871                return ret;
 872        }
 873
 874        /*
 875         * Add the dest entry into the list
 876         */
 877        atomic_inc(&dest->refcnt);
 878
 879        write_lock_bh(&__ip_vs_svc_lock);
 880
 881        /*
 882         * Wait until all other svc users go away.
 883         */
 884        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
 885
 886        list_add(&dest->n_list, &svc->destinations);
 887        svc->num_dests++;
 888
 889        /* call the update_service function of its scheduler */
 890        svc->scheduler->update_service(svc);
 891
 892        write_unlock_bh(&__ip_vs_svc_lock);
 893
 894        LeaveFunction(2);
 895
 896        return 0;
 897}
 898
 899
 900/*
 901 *      Edit a destination in the given service
 902 */
 903static int
 904ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest)
 905{
 906        struct ip_vs_dest *dest;
 907        __be32 daddr = udest->addr;
 908        __be16 dport = udest->port;
 909
 910        EnterFunction(2);
 911
 912        if (udest->weight < 0) {
 913                IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
 914                return -ERANGE;
 915        }
 916
 917        if (udest->l_threshold > udest->u_threshold) {
 918                IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
 919                          "upper threshold\n");
 920                return -ERANGE;
 921        }
 922
 923        /*
 924         *  Lookup the destination list
 925         */
 926        dest = ip_vs_lookup_dest(svc, daddr, dport);
 927        if (dest == NULL) {
 928                IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
 929                return -ENOENT;
 930        }
 931
 932        __ip_vs_update_dest(svc, dest, udest);
 933
 934        write_lock_bh(&__ip_vs_svc_lock);
 935
 936        /* Wait until all other svc users go away */
 937        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
 938
 939        /* call the update_service, because server weight may be changed */
 940        svc->scheduler->update_service(svc);
 941
 942        write_unlock_bh(&__ip_vs_svc_lock);
 943
 944        LeaveFunction(2);
 945
 946        return 0;
 947}
 948
 949
 950/*
 951 *      Delete a destination (must be already unlinked from the service)
 952 */
 953static void __ip_vs_del_dest(struct ip_vs_dest *dest)
 954{
 955        ip_vs_kill_estimator(&dest->stats);
 956
 957        /*
 958         *  Remove it from the d-linked list with the real services.
 959         */
 960        write_lock_bh(&__ip_vs_rs_lock);
 961        ip_vs_rs_unhash(dest);
 962        write_unlock_bh(&__ip_vs_rs_lock);
 963
 964        /*
 965         *  Decrease the refcnt of the dest, and free the dest
 966         *  if nobody refers to it (refcnt=0). Otherwise, throw
 967         *  the destination into the trash.
 968         */
 969        if (atomic_dec_and_test(&dest->refcnt)) {
 970                ip_vs_dst_reset(dest);
 971                /* simply decrease svc->refcnt here, let the caller check
 972                   and release the service if nobody refers to it.
 973                   Only user context can release destination and service,
 974                   and only one user context can update virtual service at a
 975                   time, so the operation here is OK */
 976                atomic_dec(&dest->svc->refcnt);
 977                kfree(dest);
 978        } else {
 979                IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
 980                          "dest->refcnt=%d\n",
 981                          NIPQUAD(dest->addr), ntohs(dest->port),
 982                          atomic_read(&dest->refcnt));
 983                list_add(&dest->n_list, &ip_vs_dest_trash);
 984                atomic_inc(&dest->refcnt);
 985        }
 986}
 987
 988
 989/*
 990 *      Unlink a destination from the given service
 991 */
 992static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
 993                                struct ip_vs_dest *dest,
 994                                int svcupd)
 995{
 996        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
 997
 998        /*
 999         *  Remove it from the d-linked destination list.
1000         */
1001        list_del(&dest->n_list);
1002        svc->num_dests--;
1003        if (svcupd) {
1004                /*
1005                 *  Call the update_service function of its scheduler
1006                 */
1007                svc->scheduler->update_service(svc);
1008        }
1009}
1010
1011
1012/*
1013 *      Delete a destination server in the given service
1014 */
1015static int
1016ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest)
1017{
1018        struct ip_vs_dest *dest;
1019        __be32 daddr = udest->addr;
1020        __be16 dport = udest->port;
1021
1022        EnterFunction(2);
1023
1024        dest = ip_vs_lookup_dest(svc, daddr, dport);
1025        if (dest == NULL) {
1026                IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1027                return -ENOENT;
1028        }
1029
1030        write_lock_bh(&__ip_vs_svc_lock);
1031
1032        /*
1033         *      Wait until all other svc users go away.
1034         */
1035        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1036
1037        /*
1038         *      Unlink dest from the service
1039         */
1040        __ip_vs_unlink_dest(svc, dest, 1);
1041
1042        write_unlock_bh(&__ip_vs_svc_lock);
1043
1044        /*
1045         *      Delete the destination
1046         */
1047        __ip_vs_del_dest(dest);
1048
1049        LeaveFunction(2);
1050
1051        return 0;
1052}
1053
1054
1055/*
1056 *      Add a service into the service hash table
1057 */
1058static int
1059ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p)
1060{
1061        int ret = 0;
1062        struct ip_vs_scheduler *sched = NULL;
1063        struct ip_vs_service *svc = NULL;
1064
1065        /* increase the module use count */
1066        ip_vs_use_count_inc();
1067
1068        /* Lookup the scheduler by 'u->sched_name' */
1069        sched = ip_vs_scheduler_get(u->sched_name);
1070        if (sched == NULL) {
1071                IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1072                           u->sched_name);
1073                ret = -ENOENT;
1074                goto out_mod_dec;
1075        }
1076
1077        svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1078        if (svc == NULL) {
1079                IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1080                ret = -ENOMEM;
1081                goto out_err;
1082        }
1083
1084        /* I'm the first user of the service */
1085        atomic_set(&svc->usecnt, 1);
1086        atomic_set(&svc->refcnt, 0);
1087
1088        svc->protocol = u->protocol;
1089        svc->addr = u->addr;
1090        svc->port = u->port;
1091        svc->fwmark = u->fwmark;
1092        svc->flags = u->flags;
1093        svc->timeout = u->timeout * HZ;
1094        svc->netmask = u->netmask;
1095
1096        INIT_LIST_HEAD(&svc->destinations);
1097        rwlock_init(&svc->sched_lock);
1098        spin_lock_init(&svc->stats.lock);
1099
1100        /* Bind the scheduler */
1101        ret = ip_vs_bind_scheduler(svc, sched);
1102        if (ret)
1103                goto out_err;
1104        sched = NULL;
1105
1106        /* Update the virtual service counters */
1107        if (svc->port == FTPPORT)
1108                atomic_inc(&ip_vs_ftpsvc_counter);
1109        else if (svc->port == 0)
1110                atomic_inc(&ip_vs_nullsvc_counter);
1111
1112        ip_vs_new_estimator(&svc->stats);
1113        ip_vs_num_services++;
1114
1115        /* Hash the service into the service table */
1116        write_lock_bh(&__ip_vs_svc_lock);
1117        ip_vs_svc_hash(svc);
1118        write_unlock_bh(&__ip_vs_svc_lock);
1119
1120        *svc_p = svc;
1121        return 0;
1122
1123  out_err:
1124        if (svc != NULL) {
1125                if (svc->scheduler)
1126                        ip_vs_unbind_scheduler(svc);
1127                if (svc->inc) {
1128                        local_bh_disable();
1129                        ip_vs_app_inc_put(svc->inc);
1130                        local_bh_enable();
1131                }
1132                kfree(svc);
1133        }
1134        ip_vs_scheduler_put(sched);
1135
1136  out_mod_dec:
1137        /* decrease the module use count */
1138        ip_vs_use_count_dec();
1139
1140        return ret;
1141}
1142
1143
1144/*
1145 *      Edit a service and bind it with a new scheduler
1146 */
1147static int
1148ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u)
1149{
1150        struct ip_vs_scheduler *sched, *old_sched;
1151        int ret = 0;
1152
1153        /*
1154         * Lookup the scheduler, by 'u->sched_name'
1155         */
1156        sched = ip_vs_scheduler_get(u->sched_name);
1157        if (sched == NULL) {
1158                IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1159                           u->sched_name);
1160                return -ENOENT;
1161        }
1162        old_sched = sched;
1163
1164        write_lock_bh(&__ip_vs_svc_lock);
1165
1166        /*
1167         * Wait until all other svc users go away.
1168         */
1169        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1170
1171        /*
1172         * Set the flags and timeout value
1173         */
1174        svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1175        svc->timeout = u->timeout * HZ;
1176        svc->netmask = u->netmask;
1177
1178        old_sched = svc->scheduler;
1179        if (sched != old_sched) {
1180                /*
1181                 * Unbind the old scheduler
1182                 */
1183                if ((ret = ip_vs_unbind_scheduler(svc))) {
1184                        old_sched = sched;
1185                        goto out;
1186                }
1187
1188                /*
1189                 * Bind the new scheduler
1190                 */
1191                if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1192                        /*
1193                         * If ip_vs_bind_scheduler fails, restore the old
1194                         * scheduler.
1195                         * The main reason of failure is out of memory.
1196                         *
1197                         * The question is if the old scheduler can be
1198                         * restored all the time. TODO: if it cannot be
1199                         * restored some time, we must delete the service,
1200                         * otherwise the system may crash.
1201                         */
1202                        ip_vs_bind_scheduler(svc, old_sched);
1203                        old_sched = sched;
1204                        goto out;
1205                }
1206        }
1207
1208  out:
1209        write_unlock_bh(&__ip_vs_svc_lock);
1210
1211        if (old_sched)
1212                ip_vs_scheduler_put(old_sched);
1213
1214        return ret;
1215}
1216
1217
1218/*
1219 *      Delete a service from the service list
1220 *      - The service must be unlinked, unlocked and not referenced!
1221 *      - We are called under _bh lock
1222 */
1223static void __ip_vs_del_service(struct ip_vs_service *svc)
1224{
1225        struct ip_vs_dest *dest, *nxt;
1226        struct ip_vs_scheduler *old_sched;
1227
1228        ip_vs_num_services--;
1229        ip_vs_kill_estimator(&svc->stats);
1230
1231        /* Unbind scheduler */
1232        old_sched = svc->scheduler;
1233        ip_vs_unbind_scheduler(svc);
1234        if (old_sched)
1235                ip_vs_scheduler_put(old_sched);
1236
1237        /* Unbind app inc */
1238        if (svc->inc) {
1239                ip_vs_app_inc_put(svc->inc);
1240                svc->inc = NULL;
1241        }
1242
1243        /*
1244         *    Unlink the whole destination list
1245         */
1246        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1247                __ip_vs_unlink_dest(svc, dest, 0);
1248                __ip_vs_del_dest(dest);
1249        }
1250
1251        /*
1252         *    Update the virtual service counters
1253         */
1254        if (svc->port == FTPPORT)
1255                atomic_dec(&ip_vs_ftpsvc_counter);
1256        else if (svc->port == 0)
1257                atomic_dec(&ip_vs_nullsvc_counter);
1258
1259        /*
1260         *    Free the service if nobody refers to it
1261         */
1262        if (atomic_read(&svc->refcnt) == 0)
1263                kfree(svc);
1264
1265        /* decrease the module use count */
1266        ip_vs_use_count_dec();
1267}
1268
1269/*
1270 *      Delete a service from the service list
1271 */
1272static int ip_vs_del_service(struct ip_vs_service *svc)
1273{
1274        if (svc == NULL)
1275                return -EEXIST;
1276
1277        /*
1278         * Unhash it from the service table
1279         */
1280        write_lock_bh(&__ip_vs_svc_lock);
1281
1282        ip_vs_svc_unhash(svc);
1283
1284        /*
1285         * Wait until all the svc users go away.
1286         */
1287        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1288
1289        __ip_vs_del_service(svc);
1290
1291        write_unlock_bh(&__ip_vs_svc_lock);
1292
1293        return 0;
1294}
1295
1296
1297/*
1298 *      Flush all the virtual services
1299 */
1300static int ip_vs_flush(void)
1301{
1302        int idx;
1303        struct ip_vs_service *svc, *nxt;
1304
1305        /*
1306         * Flush the service table hashed by <protocol,addr,port>
1307         */
1308        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1309                list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1310                        write_lock_bh(&__ip_vs_svc_lock);
1311                        ip_vs_svc_unhash(svc);
1312                        /*
1313                         * Wait until all the svc users go away.
1314                         */
1315                        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1316                        __ip_vs_del_service(svc);
1317                        write_unlock_bh(&__ip_vs_svc_lock);
1318                }
1319        }
1320
1321        /*
1322         * Flush the service table hashed by fwmark
1323         */
1324        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1325                list_for_each_entry_safe(svc, nxt,
1326                                         &ip_vs_svc_fwm_table[idx], f_list) {
1327                        write_lock_bh(&__ip_vs_svc_lock);
1328                        ip_vs_svc_unhash(svc);
1329                        /*
1330                         * Wait until all the svc users go away.
1331                         */
1332                        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1333                        __ip_vs_del_service(svc);
1334                        write_unlock_bh(&__ip_vs_svc_lock);
1335                }
1336        }
1337
1338        return 0;
1339}
1340
1341
1342/*
1343 *      Zero counters in a service or all services
1344 */
1345static int ip_vs_zero_service(struct ip_vs_service *svc)
1346{
1347        struct ip_vs_dest *dest;
1348
1349        write_lock_bh(&__ip_vs_svc_lock);
1350        list_for_each_entry(dest, &svc->destinations, n_list) {
1351                ip_vs_zero_stats(&dest->stats);
1352        }
1353        ip_vs_zero_stats(&svc->stats);
1354        write_unlock_bh(&__ip_vs_svc_lock);
1355        return 0;
1356}
1357
1358static int ip_vs_zero_all(void)
1359{
1360        int idx;
1361        struct ip_vs_service *svc;
1362
1363        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1364                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1365                        ip_vs_zero_service(svc);
1366                }
1367        }
1368
1369        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1370                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1371                        ip_vs_zero_service(svc);
1372                }
1373        }
1374
1375        ip_vs_zero_stats(&ip_vs_stats);
1376        return 0;
1377}
1378
1379
1380static int
1381proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1382                     void __user *buffer, size_t *lenp, loff_t *ppos)
1383{
1384        int *valp = table->data;
1385        int val = *valp;
1386        int rc;
1387
1388        rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1389        if (write && (*valp != val)) {
1390                if ((*valp < 0) || (*valp > 3)) {
1391                        /* Restore the correct value */
1392                        *valp = val;
1393                } else {
1394                        update_defense_level();
1395                }
1396        }
1397        return rc;
1398}
1399
1400
1401static int
1402proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1403                       void __user *buffer, size_t *lenp, loff_t *ppos)
1404{
1405        int *valp = table->data;
1406        int val[2];
1407        int rc;
1408
1409        /* backup the value first */
1410        memcpy(val, valp, sizeof(val));
1411
1412        rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1413        if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1414                /* Restore the correct value */
1415                memcpy(valp, val, sizeof(val));
1416        }
1417        return rc;
1418}
1419
1420
1421/*
1422 *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1423 */
1424
1425static struct ctl_table vs_vars[] = {
1426        {
1427                .procname       = "amemthresh",
1428                .data           = &sysctl_ip_vs_amemthresh,
1429                .maxlen         = sizeof(int),
1430                .mode           = 0644,
1431                .proc_handler   = &proc_dointvec,
1432        },
1433#ifdef CONFIG_IP_VS_DEBUG
1434        {
1435                .procname       = "debug_level",
1436                .data           = &sysctl_ip_vs_debug_level,
1437                .maxlen         = sizeof(int),
1438                .mode           = 0644,
1439                .proc_handler   = &proc_dointvec,
1440        },
1441#endif
1442        {
1443                .procname       = "am_droprate",
1444                .data           = &sysctl_ip_vs_am_droprate,
1445                .maxlen         = sizeof(int),
1446                .mode           = 0644,
1447                .proc_handler   = &proc_dointvec,
1448        },
1449        {
1450                .procname       = "drop_entry",
1451                .data           = &sysctl_ip_vs_drop_entry,
1452                .maxlen         = sizeof(int),
1453                .mode           = 0644,
1454                .proc_handler   = &proc_do_defense_mode,
1455        },
1456        {
1457                .procname       = "drop_packet",
1458                .data           = &sysctl_ip_vs_drop_packet,
1459                .maxlen         = sizeof(int),
1460                .mode           = 0644,
1461                .proc_handler   = &proc_do_defense_mode,
1462        },
1463        {
1464                .procname       = "secure_tcp",
1465                .data           = &sysctl_ip_vs_secure_tcp,
1466                .maxlen         = sizeof(int),
1467                .mode           = 0644,
1468                .proc_handler   = &proc_do_defense_mode,
1469        },
1470#if 0
1471        {
1472                .procname       = "timeout_established",
1473                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1474                .maxlen         = sizeof(int),
1475                .mode           = 0644,
1476                .proc_handler   = &proc_dointvec_jiffies,
1477        },
1478        {
1479                .procname       = "timeout_synsent",
1480                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1481                .maxlen         = sizeof(int),
1482                .mode           = 0644,
1483                .proc_handler   = &proc_dointvec_jiffies,
1484        },
1485        {
1486                .procname       = "timeout_synrecv",
1487                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1488                .maxlen         = sizeof(int),
1489                .mode           = 0644,
1490                .proc_handler   = &proc_dointvec_jiffies,
1491        },
1492        {
1493                .procname       = "timeout_finwait",
1494                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1495                .maxlen         = sizeof(int),
1496                .mode           = 0644,
1497                .proc_handler   = &proc_dointvec_jiffies,
1498        },
1499        {
1500                .procname       = "timeout_timewait",
1501                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1502                .maxlen         = sizeof(int),
1503                .mode           = 0644,
1504                .proc_handler   = &proc_dointvec_jiffies,
1505        },
1506        {
1507                .procname       = "timeout_close",
1508                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1509                .maxlen         = sizeof(int),
1510                .mode           = 0644,
1511                .proc_handler   = &proc_dointvec_jiffies,
1512        },
1513        {
1514                .procname       = "timeout_closewait",
1515                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1516                .maxlen         = sizeof(int),
1517                .mode           = 0644,
1518                .proc_handler   = &proc_dointvec_jiffies,
1519        },
1520        {
1521                .procname       = "timeout_lastack",
1522                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1523                .maxlen         = sizeof(int),
1524                .mode           = 0644,
1525                .proc_handler   = &proc_dointvec_jiffies,
1526        },
1527        {
1528                .procname       = "timeout_listen",
1529                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1530                .maxlen         = sizeof(int),
1531                .mode           = 0644,
1532                .proc_handler   = &proc_dointvec_jiffies,
1533        },
1534        {
1535                .procname       = "timeout_synack",
1536                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1537                .maxlen         = sizeof(int),
1538                .mode           = 0644,
1539                .proc_handler   = &proc_dointvec_jiffies,
1540        },
1541        {
1542                .procname       = "timeout_udp",
1543                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1544                .maxlen         = sizeof(int),
1545                .mode           = 0644,
1546                .proc_handler   = &proc_dointvec_jiffies,
1547        },
1548        {
1549                .procname       = "timeout_icmp",
1550                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1551                .maxlen         = sizeof(int),
1552                .mode           = 0644,
1553                .proc_handler   = &proc_dointvec_jiffies,
1554        },
1555#endif
1556        {
1557                .procname       = "cache_bypass",
1558                .data           = &sysctl_ip_vs_cache_bypass,
1559                .maxlen         = sizeof(int),
1560                .mode           = 0644,
1561                .proc_handler   = &proc_dointvec,
1562        },
1563        {
1564                .procname       = "expire_nodest_conn",
1565                .data           = &sysctl_ip_vs_expire_nodest_conn,
1566                .maxlen         = sizeof(int),
1567                .mode           = 0644,
1568                .proc_handler   = &proc_dointvec,
1569        },
1570        {
1571                .procname       = "expire_quiescent_template",
1572                .data           = &sysctl_ip_vs_expire_quiescent_template,
1573                .maxlen         = sizeof(int),
1574                .mode           = 0644,
1575                .proc_handler   = &proc_dointvec,
1576        },
1577        {
1578                .procname       = "sync_threshold",
1579                .data           = &sysctl_ip_vs_sync_threshold,
1580                .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1581                .mode           = 0644,
1582                .proc_handler   = &proc_do_sync_threshold,
1583        },
1584        {
1585                .procname       = "nat_icmp_send",
1586                .data           = &sysctl_ip_vs_nat_icmp_send,
1587                .maxlen         = sizeof(int),
1588                .mode           = 0644,
1589                .proc_handler   = &proc_dointvec,
1590        },
1591        { .ctl_name = 0 }
1592};
1593
1594struct ctl_path net_vs_ctl_path[] = {
1595        { .procname = "net", .ctl_name = CTL_NET, },
1596        { .procname = "ipv4", .ctl_name = NET_IPV4, },
1597        { .procname = "vs", },
1598        { }
1599};
1600EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1601
1602static struct ctl_table_header * sysctl_header;
1603
1604#ifdef CONFIG_PROC_FS
1605
1606struct ip_vs_iter {
1607        struct list_head *table;
1608        int bucket;
1609};
1610
1611/*
1612 *      Write the contents of the VS rule table to a PROCfs file.
1613 *      (It is kept just for backward compatibility)
1614 */
1615static inline const char *ip_vs_fwd_name(unsigned flags)
1616{
1617        switch (flags & IP_VS_CONN_F_FWD_MASK) {
1618        case IP_VS_CONN_F_LOCALNODE:
1619                return "Local";
1620        case IP_VS_CONN_F_TUNNEL:
1621                return "Tunnel";
1622        case IP_VS_CONN_F_DROUTE:
1623                return "Route";
1624        default:
1625                return "Masq";
1626        }
1627}
1628
1629
1630/* Get the Nth entry in the two lists */
1631static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1632{
1633        struct ip_vs_iter *iter = seq->private;
1634        int idx;
1635        struct ip_vs_service *svc;
1636
1637        /* look in hash by protocol */
1638        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1639                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1640                        if (pos-- == 0){
1641                                iter->table = ip_vs_svc_table;
1642                                iter->bucket = idx;
1643                                return svc;
1644                        }
1645                }
1646        }
1647
1648        /* keep looking in fwmark */
1649        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1650                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1651                        if (pos-- == 0) {
1652                                iter->table = ip_vs_svc_fwm_table;
1653                                iter->bucket = idx;
1654                                return svc;
1655                        }
1656                }
1657        }
1658
1659        return NULL;
1660}
1661
1662static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1663{
1664
1665        read_lock_bh(&__ip_vs_svc_lock);
1666        return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1667}
1668
1669
1670static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1671{
1672        struct list_head *e;
1673        struct ip_vs_iter *iter;
1674        struct ip_vs_service *svc;
1675
1676        ++*pos;
1677        if (v == SEQ_START_TOKEN)
1678                return ip_vs_info_array(seq,0);
1679
1680        svc = v;
1681        iter = seq->private;
1682
1683        if (iter->table == ip_vs_svc_table) {
1684                /* next service in table hashed by protocol */
1685                if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1686                        return list_entry(e, struct ip_vs_service, s_list);
1687
1688
1689                while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1690                        list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1691                                            s_list) {
1692                                return svc;
1693                        }
1694                }
1695
1696                iter->table = ip_vs_svc_fwm_table;
1697                iter->bucket = -1;
1698                goto scan_fwmark;
1699        }
1700
1701        /* next service in hashed by fwmark */
1702        if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1703                return list_entry(e, struct ip_vs_service, f_list);
1704
1705 scan_fwmark:
1706        while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1707                list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1708                                    f_list)
1709                        return svc;
1710        }
1711
1712        return NULL;
1713}
1714
1715static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1716{
1717        read_unlock_bh(&__ip_vs_svc_lock);
1718}
1719
1720
1721static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1722{
1723        if (v == SEQ_START_TOKEN) {
1724                seq_printf(seq,
1725                        "IP Virtual Server version %d.%d.%d (size=%d)\n",
1726                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1727                seq_puts(seq,
1728                         "Prot LocalAddress:Port Scheduler Flags\n");
1729                seq_puts(seq,
1730                         "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1731        } else {
1732                const struct ip_vs_service *svc = v;
1733                const struct ip_vs_iter *iter = seq->private;
1734                const struct ip_vs_dest *dest;
1735
1736                if (iter->table == ip_vs_svc_table)
1737                        seq_printf(seq, "%s  %08X:%04X %s ",
1738                                   ip_vs_proto_name(svc->protocol),
1739                                   ntohl(svc->addr),
1740                                   ntohs(svc->port),
1741                                   svc->scheduler->name);
1742                else
1743                        seq_printf(seq, "FWM  %08X %s ",
1744                                   svc->fwmark, svc->scheduler->name);
1745
1746                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1747                        seq_printf(seq, "persistent %d %08X\n",
1748                                svc->timeout,
1749                                ntohl(svc->netmask));
1750                else
1751                        seq_putc(seq, '\n');
1752
1753                list_for_each_entry(dest, &svc->destinations, n_list) {
1754                        seq_printf(seq,
1755                                   "  -> %08X:%04X      %-7s %-6d %-10d %-10d\n",
1756                                   ntohl(dest->addr), ntohs(dest->port),
1757                                   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1758                                   atomic_read(&dest->weight),
1759                                   atomic_read(&dest->activeconns),
1760                                   atomic_read(&dest->inactconns));
1761                }
1762        }
1763        return 0;
1764}
1765
1766static const struct seq_operations ip_vs_info_seq_ops = {
1767        .start = ip_vs_info_seq_start,
1768        .next  = ip_vs_info_seq_next,
1769        .stop  = ip_vs_info_seq_stop,
1770        .show  = ip_vs_info_seq_show,
1771};
1772
1773static int ip_vs_info_open(struct inode *inode, struct file *file)
1774{
1775        return seq_open_private(file, &ip_vs_info_seq_ops,
1776                        sizeof(struct ip_vs_iter));
1777}
1778
1779static const struct file_operations ip_vs_info_fops = {
1780        .owner   = THIS_MODULE,
1781        .open    = ip_vs_info_open,
1782        .read    = seq_read,
1783        .llseek  = seq_lseek,
1784        .release = seq_release_private,
1785};
1786
1787#endif
1788
1789struct ip_vs_stats ip_vs_stats;
1790
1791#ifdef CONFIG_PROC_FS
1792static int ip_vs_stats_show(struct seq_file *seq, void *v)
1793{
1794
1795/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1796        seq_puts(seq,
1797                 "   Total Incoming Outgoing         Incoming         Outgoing\n");
1798        seq_printf(seq,
1799                   "   Conns  Packets  Packets            Bytes            Bytes\n");
1800
1801        spin_lock_bh(&ip_vs_stats.lock);
1802        seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1803                   ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1804                   (unsigned long long) ip_vs_stats.inbytes,
1805                   (unsigned long long) ip_vs_stats.outbytes);
1806
1807/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1808        seq_puts(seq,
1809                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1810        seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1811                        ip_vs_stats.cps,
1812                        ip_vs_stats.inpps,
1813                        ip_vs_stats.outpps,
1814                        ip_vs_stats.inbps,
1815                        ip_vs_stats.outbps);
1816        spin_unlock_bh(&ip_vs_stats.lock);
1817
1818        return 0;
1819}
1820
1821static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1822{
1823        return single_open(file, ip_vs_stats_show, NULL);
1824}
1825
1826static const struct file_operations ip_vs_stats_fops = {
1827        .owner = THIS_MODULE,
1828        .open = ip_vs_stats_seq_open,
1829        .read = seq_read,
1830        .llseek = seq_lseek,
1831        .release = single_release,
1832};
1833
1834#endif
1835
1836/*
1837 *      Set timeout values for tcp tcpfin udp in the timeout_table.
1838 */
1839static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1840{
1841        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1842                  u->tcp_timeout,
1843                  u->tcp_fin_timeout,
1844                  u->udp_timeout);
1845
1846#ifdef CONFIG_IP_VS_PROTO_TCP
1847        if (u->tcp_timeout) {
1848                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1849                        = u->tcp_timeout * HZ;
1850        }
1851
1852        if (u->tcp_fin_timeout) {
1853                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1854                        = u->tcp_fin_timeout * HZ;
1855        }
1856#endif
1857
1858#ifdef CONFIG_IP_VS_PROTO_UDP
1859        if (u->udp_timeout) {
1860                ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1861                        = u->udp_timeout * HZ;
1862        }
1863#endif
1864        return 0;
1865}
1866
1867
1868#define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
1869#define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
1870#define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
1871                                 sizeof(struct ip_vs_dest_user))
1872#define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
1873#define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
1874#define MAX_ARG_LEN             SVCDEST_ARG_LEN
1875
1876static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1877        [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
1878        [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
1879        [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
1880        [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
1881        [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
1882        [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
1883        [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
1884        [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
1885        [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
1886        [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
1887        [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
1888};
1889
1890static int
1891do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1892{
1893        int ret;
1894        unsigned char arg[MAX_ARG_LEN];
1895        struct ip_vs_service_user *usvc;
1896        struct ip_vs_service *svc;
1897        struct ip_vs_dest_user *udest;
1898
1899        if (!capable(CAP_NET_ADMIN))
1900                return -EPERM;
1901
1902        if (len != set_arglen[SET_CMDID(cmd)]) {
1903                IP_VS_ERR("set_ctl: len %u != %u\n",
1904                          len, set_arglen[SET_CMDID(cmd)]);
1905                return -EINVAL;
1906        }
1907
1908        if (copy_from_user(arg, user, len) != 0)
1909                return -EFAULT;
1910
1911        /* increase the module use count */
1912        ip_vs_use_count_inc();
1913
1914        if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1915                ret = -ERESTARTSYS;
1916                goto out_dec;
1917        }
1918
1919        if (cmd == IP_VS_SO_SET_FLUSH) {
1920                /* Flush the virtual service */
1921                ret = ip_vs_flush();
1922                goto out_unlock;
1923        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1924                /* Set timeout values for (tcp tcpfin udp) */
1925                ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1926                goto out_unlock;
1927        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1928                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1929                ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1930                goto out_unlock;
1931        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1932                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1933                ret = stop_sync_thread(dm->state);
1934                goto out_unlock;
1935        }
1936
1937        usvc = (struct ip_vs_service_user *)arg;
1938        udest = (struct ip_vs_dest_user *)(usvc + 1);
1939
1940        if (cmd == IP_VS_SO_SET_ZERO) {
1941                /* if no service address is set, zero counters in all */
1942                if (!usvc->fwmark && !usvc->addr && !usvc->port) {
1943                        ret = ip_vs_zero_all();
1944                        goto out_unlock;
1945                }
1946        }
1947
1948        /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
1949        if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) {
1950                IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
1951                          usvc->protocol, NIPQUAD(usvc->addr),
1952                          ntohs(usvc->port), usvc->sched_name);
1953                ret = -EFAULT;
1954                goto out_unlock;
1955        }
1956
1957        /* Lookup the exact service by <protocol, addr, port> or fwmark */
1958        if (usvc->fwmark == 0)
1959                svc = __ip_vs_service_get(usvc->protocol,
1960                                          usvc->addr, usvc->port);
1961        else
1962                svc = __ip_vs_svc_fwm_get(usvc->fwmark);
1963
1964        if (cmd != IP_VS_SO_SET_ADD
1965            && (svc == NULL || svc->protocol != usvc->protocol)) {
1966                ret = -ESRCH;
1967                goto out_unlock;
1968        }
1969
1970        switch (cmd) {
1971        case IP_VS_SO_SET_ADD:
1972                if (svc != NULL)
1973                        ret = -EEXIST;
1974                else
1975                        ret = ip_vs_add_service(usvc, &svc);
1976                break;
1977        case IP_VS_SO_SET_EDIT:
1978                ret = ip_vs_edit_service(svc, usvc);
1979                break;
1980        case IP_VS_SO_SET_DEL:
1981                ret = ip_vs_del_service(svc);
1982                if (!ret)
1983                        goto out_unlock;
1984                break;
1985        case IP_VS_SO_SET_ZERO:
1986                ret = ip_vs_zero_service(svc);
1987                break;
1988        case IP_VS_SO_SET_ADDDEST:
1989                ret = ip_vs_add_dest(svc, udest);
1990                break;
1991        case IP_VS_SO_SET_EDITDEST:
1992                ret = ip_vs_edit_dest(svc, udest);
1993                break;
1994        case IP_VS_SO_SET_DELDEST:
1995                ret = ip_vs_del_dest(svc, udest);
1996                break;
1997        default:
1998                ret = -EINVAL;
1999        }
2000
2001        if (svc)
2002                ip_vs_service_put(svc);
2003
2004  out_unlock:
2005        mutex_unlock(&__ip_vs_mutex);
2006  out_dec:
2007        /* decrease the module use count */
2008        ip_vs_use_count_dec();
2009
2010        return ret;
2011}
2012
2013
2014static void
2015ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2016{
2017        spin_lock_bh(&src->lock);
2018        memcpy(dst, src, (char*)&src->lock - (char*)src);
2019        spin_unlock_bh(&src->lock);
2020}
2021
2022static void
2023ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2024{
2025        dst->protocol = src->protocol;
2026        dst->addr = src->addr;
2027        dst->port = src->port;
2028        dst->fwmark = src->fwmark;
2029        strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2030        dst->flags = src->flags;
2031        dst->timeout = src->timeout / HZ;
2032        dst->netmask = src->netmask;
2033        dst->num_dests = src->num_dests;
2034        ip_vs_copy_stats(&dst->stats, &src->stats);
2035}
2036
2037static inline int
2038__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2039                            struct ip_vs_get_services __user *uptr)
2040{
2041        int idx, count=0;
2042        struct ip_vs_service *svc;
2043        struct ip_vs_service_entry entry;
2044        int ret = 0;
2045
2046        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2047                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2048                        if (count >= get->num_services)
2049                                goto out;
2050                        memset(&entry, 0, sizeof(entry));
2051                        ip_vs_copy_service(&entry, svc);
2052                        if (copy_to_user(&uptr->entrytable[count],
2053                                         &entry, sizeof(entry))) {
2054                                ret = -EFAULT;
2055                                goto out;
2056                        }
2057                        count++;
2058                }
2059        }
2060
2061        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2062                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2063                        if (count >= get->num_services)
2064                                goto out;
2065                        memset(&entry, 0, sizeof(entry));
2066                        ip_vs_copy_service(&entry, svc);
2067                        if (copy_to_user(&uptr->entrytable[count],
2068                                         &entry, sizeof(entry))) {
2069                                ret = -EFAULT;
2070                                goto out;
2071                        }
2072                        count++;
2073                }
2074        }
2075  out:
2076        return ret;
2077}
2078
2079static inline int
2080__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2081                         struct ip_vs_get_dests __user *uptr)
2082{
2083        struct ip_vs_service *svc;
2084        int ret = 0;
2085
2086        if (get->fwmark)
2087                svc = __ip_vs_svc_fwm_get(get->fwmark);
2088        else
2089                svc = __ip_vs_service_get(get->protocol,
2090                                          get->addr, get->port);
2091        if (svc) {
2092                int count = 0;
2093                struct ip_vs_dest *dest;
2094                struct ip_vs_dest_entry entry;
2095
2096                list_for_each_entry(dest, &svc->destinations, n_list) {
2097                        if (count >= get->num_dests)
2098                                break;
2099
2100                        entry.addr = dest->addr;
2101                        entry.port = dest->port;
2102                        entry.conn_flags = atomic_read(&dest->conn_flags);
2103                        entry.weight = atomic_read(&dest->weight);
2104                        entry.u_threshold = dest->u_threshold;
2105                        entry.l_threshold = dest->l_threshold;
2106                        entry.activeconns = atomic_read(&dest->activeconns);
2107                        entry.inactconns = atomic_read(&dest->inactconns);
2108                        entry.persistconns = atomic_read(&dest->persistconns);
2109                        ip_vs_copy_stats(&entry.stats, &dest->stats);
2110                        if (copy_to_user(&uptr->entrytable[count],
2111                                         &entry, sizeof(entry))) {
2112                                ret = -EFAULT;
2113                                break;
2114                        }
2115                        count++;
2116                }
2117                ip_vs_service_put(svc);
2118        } else
2119                ret = -ESRCH;
2120        return ret;
2121}
2122
2123static inline void
2124__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2125{
2126#ifdef CONFIG_IP_VS_PROTO_TCP
2127        u->tcp_timeout =
2128                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2129        u->tcp_fin_timeout =
2130                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2131#endif
2132#ifdef CONFIG_IP_VS_PROTO_UDP
2133        u->udp_timeout =
2134                ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2135#endif
2136}
2137
2138
2139#define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2140#define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2141#define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2142#define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2143#define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2144#define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2145#define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2146
2147static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2148        [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2149        [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2150        [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2151        [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2152        [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2153        [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2154        [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2155};
2156
2157static int
2158do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2159{
2160        unsigned char arg[128];
2161        int ret = 0;
2162
2163        if (!capable(CAP_NET_ADMIN))
2164                return -EPERM;
2165
2166        if (*len < get_arglen[GET_CMDID(cmd)]) {
2167                IP_VS_ERR("get_ctl: len %u < %u\n",
2168                          *len, get_arglen[GET_CMDID(cmd)]);
2169                return -EINVAL;
2170        }
2171
2172        if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2173                return -EFAULT;
2174
2175        if (mutex_lock_interruptible(&__ip_vs_mutex))
2176                return -ERESTARTSYS;
2177
2178        switch (cmd) {
2179        case IP_VS_SO_GET_VERSION:
2180        {
2181                char buf[64];
2182
2183                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2184                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2185                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2186                        ret = -EFAULT;
2187                        goto out;
2188                }
2189                *len = strlen(buf)+1;
2190        }
2191        break;
2192
2193        case IP_VS_SO_GET_INFO:
2194        {
2195                struct ip_vs_getinfo info;
2196                info.version = IP_VS_VERSION_CODE;
2197                info.size = IP_VS_CONN_TAB_SIZE;
2198                info.num_services = ip_vs_num_services;
2199                if (copy_to_user(user, &info, sizeof(info)) != 0)
2200                        ret = -EFAULT;
2201        }
2202        break;
2203
2204        case IP_VS_SO_GET_SERVICES:
2205        {
2206                struct ip_vs_get_services *get;
2207                int size;
2208
2209                get = (struct ip_vs_get_services *)arg;
2210                size = sizeof(*get) +
2211                        sizeof(struct ip_vs_service_entry) * get->num_services;
2212                if (*len != size) {
2213                        IP_VS_ERR("length: %u != %u\n", *len, size);
2214                        ret = -EINVAL;
2215                        goto out;
2216                }
2217                ret = __ip_vs_get_service_entries(get, user);
2218        }
2219        break;
2220
2221        case IP_VS_SO_GET_SERVICE:
2222        {
2223                struct ip_vs_service_entry *entry;
2224                struct ip_vs_service *svc;
2225
2226                entry = (struct ip_vs_service_entry *)arg;
2227                if (entry->fwmark)
2228                        svc = __ip_vs_svc_fwm_get(entry->fwmark);
2229                else
2230                        svc = __ip_vs_service_get(entry->protocol,
2231                                                  entry->addr, entry->port);
2232                if (svc) {
2233                        ip_vs_copy_service(entry, svc);
2234                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2235                                ret = -EFAULT;
2236                        ip_vs_service_put(svc);
2237                } else
2238                        ret = -ESRCH;
2239        }
2240        break;
2241
2242        case IP_VS_SO_GET_DESTS:
2243        {
2244                struct ip_vs_get_dests *get;
2245                int size;
2246
2247                get = (struct ip_vs_get_dests *)arg;
2248                size = sizeof(*get) +
2249                        sizeof(struct ip_vs_dest_entry) * get->num_dests;
2250                if (*len != size) {
2251                        IP_VS_ERR("length: %u != %u\n", *len, size);
2252                        ret = -EINVAL;
2253                        goto out;
2254                }
2255                ret = __ip_vs_get_dest_entries(get, user);
2256        }
2257        break;
2258
2259        case IP_VS_SO_GET_TIMEOUT:
2260        {
2261                struct ip_vs_timeout_user t;
2262
2263                __ip_vs_get_timeouts(&t);
2264                if (copy_to_user(user, &t, sizeof(t)) != 0)
2265                        ret = -EFAULT;
2266        }
2267        break;
2268
2269        case IP_VS_SO_GET_DAEMON:
2270        {
2271                struct ip_vs_daemon_user d[2];
2272
2273                memset(&d, 0, sizeof(d));
2274                if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2275                        d[0].state = IP_VS_STATE_MASTER;
2276                        strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2277                        d[0].syncid = ip_vs_master_syncid;
2278                }
2279                if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2280                        d[1].state = IP_VS_STATE_BACKUP;
2281                        strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2282                        d[1].syncid = ip_vs_backup_syncid;
2283                }
2284                if (copy_to_user(user, &d, sizeof(d)) != 0)
2285                        ret = -EFAULT;
2286        }
2287        break;
2288
2289        default:
2290                ret = -EINVAL;
2291        }
2292
2293  out:
2294        mutex_unlock(&__ip_vs_mutex);
2295        return ret;
2296}
2297
2298
2299static struct nf_sockopt_ops ip_vs_sockopts = {
2300        .pf             = PF_INET,
2301        .set_optmin     = IP_VS_BASE_CTL,
2302        .set_optmax     = IP_VS_SO_SET_MAX+1,
2303        .set            = do_ip_vs_set_ctl,
2304        .get_optmin     = IP_VS_BASE_CTL,
2305        .get_optmax     = IP_VS_SO_GET_MAX+1,
2306        .get            = do_ip_vs_get_ctl,
2307        .owner          = THIS_MODULE,
2308};
2309
2310
2311int ip_vs_control_init(void)
2312{
2313        int ret;
2314        int idx;
2315
2316        EnterFunction(2);
2317
2318        ret = nf_register_sockopt(&ip_vs_sockopts);
2319        if (ret) {
2320                IP_VS_ERR("cannot register sockopt.\n");
2321                return ret;
2322        }
2323
2324        proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
2325        proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
2326
2327        sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
2328
2329        /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
2330        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
2331                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2332                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2333        }
2334        for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
2335                INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2336        }
2337
2338        memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2339        spin_lock_init(&ip_vs_stats.lock);
2340        ip_vs_new_estimator(&ip_vs_stats);
2341
2342        /* Hook the defense timer */
2343        schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
2344
2345        LeaveFunction(2);
2346        return 0;
2347}
2348
2349
2350void ip_vs_control_cleanup(void)
2351{
2352        EnterFunction(2);
2353        ip_vs_trash_cleanup();
2354        cancel_rearming_delayed_work(&defense_work);
2355        cancel_work_sync(&defense_work.work);
2356        ip_vs_kill_estimator(&ip_vs_stats);
2357        unregister_sysctl_table(sysctl_header);
2358        proc_net_remove(&init_net, "ip_vs_stats");
2359        proc_net_remove(&init_net, "ip_vs");
2360        nf_unregister_sockopt(&ip_vs_sockopts);
2361        LeaveFunction(2);
2362}
2363
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.