linux/net/netfilter/ipvs/ip_vs_ctl.c
<<
>>
Prefs
   1/*
   2 * IPVS         An implementation of the IP virtual server support for the
   3 *              LINUX operating system.  IPVS is now implemented as a module
   4 *              over the NetFilter framework. IPVS can be used to build a
   5 *              high-performance and highly available server based on a
   6 *              cluster of servers.
   7 *
   8 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   9 *              Peter Kese <peter.kese@ijs.si>
  10 *              Julian Anastasov <ja@ssi.bg>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 *
  17 * Changes:
  18 *
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/init.h>
  23#include <linux/types.h>
  24#include <linux/capability.h>
  25#include <linux/fs.h>
  26#include <linux/sysctl.h>
  27#include <linux/proc_fs.h>
  28#include <linux/workqueue.h>
  29#include <linux/swap.h>
  30#include <linux/seq_file.h>
  31
  32#include <linux/netfilter.h>
  33#include <linux/netfilter_ipv4.h>
  34#include <linux/mutex.h>
  35
  36#include <net/net_namespace.h>
  37#include <net/ip.h>
  38#ifdef CONFIG_IP_VS_IPV6
  39#include <net/ipv6.h>
  40#include <net/ip6_route.h>
  41#endif
  42#include <net/route.h>
  43#include <net/sock.h>
  44#include <net/genetlink.h>
  45
  46#include <asm/uaccess.h>
  47
  48#include <net/ip_vs.h>
  49
  50/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
  51static DEFINE_MUTEX(__ip_vs_mutex);
  52
  53/* lock for service table */
  54static DEFINE_RWLOCK(__ip_vs_svc_lock);
  55
  56/* lock for table with the real services */
  57static DEFINE_RWLOCK(__ip_vs_rs_lock);
  58
  59/* lock for state and timeout tables */
  60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
  61
  62/* lock for drop entry handling */
  63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
  64
  65/* lock for drop packet handling */
  66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
  67
  68/* 1/rate drop and drop-entry variables */
  69int ip_vs_drop_rate = 0;
  70int ip_vs_drop_counter = 0;
  71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
  72
  73/* number of virtual services */
  74static int ip_vs_num_services = 0;
  75
  76/* sysctl variables */
  77static int sysctl_ip_vs_drop_entry = 0;
  78static int sysctl_ip_vs_drop_packet = 0;
  79static int sysctl_ip_vs_secure_tcp = 0;
  80static int sysctl_ip_vs_amemthresh = 1024;
  81static int sysctl_ip_vs_am_droprate = 10;
  82int sysctl_ip_vs_cache_bypass = 0;
  83int sysctl_ip_vs_expire_nodest_conn = 0;
  84int sysctl_ip_vs_expire_quiescent_template = 0;
  85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
  86int sysctl_ip_vs_nat_icmp_send = 0;
  87
  88
  89#ifdef CONFIG_IP_VS_DEBUG
  90static int sysctl_ip_vs_debug_level = 0;
  91
  92int ip_vs_get_debug_level(void)
  93{
  94        return sysctl_ip_vs_debug_level;
  95}
  96#endif
  97
  98#ifdef CONFIG_IP_VS_IPV6
  99/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
 100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
 101{
 102        struct rt6_info *rt;
 103        struct flowi fl = {
 104                .oif = 0,
 105                .nl_u = {
 106                        .ip6_u = {
 107                                .daddr = *addr,
 108                                .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
 109        };
 110
 111        rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
 112        if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
 113                        return 1;
 114
 115        return 0;
 116}
 117#endif
 118/*
 119 *      update_defense_level is called from keventd and from sysctl,
 120 *      so it needs to protect itself from softirqs
 121 */
 122static void update_defense_level(void)
 123{
 124        struct sysinfo i;
 125        static int old_secure_tcp = 0;
 126        int availmem;
 127        int nomem;
 128        int to_change = -1;
 129
 130        /* we only count free and buffered memory (in pages) */
 131        si_meminfo(&i);
 132        availmem = i.freeram + i.bufferram;
 133        /* however in linux 2.5 the i.bufferram is total page cache size,
 134           we need adjust it */
 135        /* si_swapinfo(&i); */
 136        /* availmem = availmem - (i.totalswap - i.freeswap); */
 137
 138        nomem = (availmem < sysctl_ip_vs_amemthresh);
 139
 140        local_bh_disable();
 141
 142        /* drop_entry */
 143        spin_lock(&__ip_vs_dropentry_lock);
 144        switch (sysctl_ip_vs_drop_entry) {
 145        case 0:
 146                atomic_set(&ip_vs_dropentry, 0);
 147                break;
 148        case 1:
 149                if (nomem) {
 150                        atomic_set(&ip_vs_dropentry, 1);
 151                        sysctl_ip_vs_drop_entry = 2;
 152                } else {
 153                        atomic_set(&ip_vs_dropentry, 0);
 154                }
 155                break;
 156        case 2:
 157                if (nomem) {
 158                        atomic_set(&ip_vs_dropentry, 1);
 159                } else {
 160                        atomic_set(&ip_vs_dropentry, 0);
 161                        sysctl_ip_vs_drop_entry = 1;
 162                };
 163                break;
 164        case 3:
 165                atomic_set(&ip_vs_dropentry, 1);
 166                break;
 167        }
 168        spin_unlock(&__ip_vs_dropentry_lock);
 169
 170        /* drop_packet */
 171        spin_lock(&__ip_vs_droppacket_lock);
 172        switch (sysctl_ip_vs_drop_packet) {
 173        case 0:
 174                ip_vs_drop_rate = 0;
 175                break;
 176        case 1:
 177                if (nomem) {
 178                        ip_vs_drop_rate = ip_vs_drop_counter
 179                                = sysctl_ip_vs_amemthresh /
 180                                (sysctl_ip_vs_amemthresh-availmem);
 181                        sysctl_ip_vs_drop_packet = 2;
 182                } else {
 183                        ip_vs_drop_rate = 0;
 184                }
 185                break;
 186        case 2:
 187                if (nomem) {
 188                        ip_vs_drop_rate = ip_vs_drop_counter
 189                                = sysctl_ip_vs_amemthresh /
 190                                (sysctl_ip_vs_amemthresh-availmem);
 191                } else {
 192                        ip_vs_drop_rate = 0;
 193                        sysctl_ip_vs_drop_packet = 1;
 194                }
 195                break;
 196        case 3:
 197                ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
 198                break;
 199        }
 200        spin_unlock(&__ip_vs_droppacket_lock);
 201
 202        /* secure_tcp */
 203        write_lock(&__ip_vs_securetcp_lock);
 204        switch (sysctl_ip_vs_secure_tcp) {
 205        case 0:
 206                if (old_secure_tcp >= 2)
 207                        to_change = 0;
 208                break;
 209        case 1:
 210                if (nomem) {
 211                        if (old_secure_tcp < 2)
 212                                to_change = 1;
 213                        sysctl_ip_vs_secure_tcp = 2;
 214                } else {
 215                        if (old_secure_tcp >= 2)
 216                                to_change = 0;
 217                }
 218                break;
 219        case 2:
 220                if (nomem) {
 221                        if (old_secure_tcp < 2)
 222                                to_change = 1;
 223                } else {
 224                        if (old_secure_tcp >= 2)
 225                                to_change = 0;
 226                        sysctl_ip_vs_secure_tcp = 1;
 227                }
 228                break;
 229        case 3:
 230                if (old_secure_tcp < 2)
 231                        to_change = 1;
 232                break;
 233        }
 234        old_secure_tcp = sysctl_ip_vs_secure_tcp;
 235        if (to_change >= 0)
 236                ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
 237        write_unlock(&__ip_vs_securetcp_lock);
 238
 239        local_bh_enable();
 240}
 241
 242
 243/*
 244 *      Timer for checking the defense
 245 */
 246#define DEFENSE_TIMER_PERIOD    1*HZ
 247static void defense_work_handler(struct work_struct *work);
 248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 249
 250static void defense_work_handler(struct work_struct *work)
 251{
 252        update_defense_level();
 253        if (atomic_read(&ip_vs_dropentry))
 254                ip_vs_random_dropentry();
 255
 256        schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
 257}
 258
 259int
 260ip_vs_use_count_inc(void)
 261{
 262        return try_module_get(THIS_MODULE);
 263}
 264
 265void
 266ip_vs_use_count_dec(void)
 267{
 268        module_put(THIS_MODULE);
 269}
 270
 271
 272/*
 273 *      Hash table: for virtual service lookups
 274 */
 275#define IP_VS_SVC_TAB_BITS 8
 276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
 277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
 278
 279/* the service table hashed by <protocol, addr, port> */
 280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 281/* the service table hashed by fwmark */
 282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 283
 284/*
 285 *      Hash table: for real service lookups
 286 */
 287#define IP_VS_RTAB_BITS 4
 288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
 289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
 290
 291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
 292
 293/*
 294 *      Trash for destinations
 295 */
 296static LIST_HEAD(ip_vs_dest_trash);
 297
 298/*
 299 *      FTP & NULL virtual service counters
 300 */
 301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
 302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
 303
 304
 305/*
 306 *      Returns hash value for virtual service
 307 */
 308static __inline__ unsigned
 309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 310                  __be16 port)
 311{
 312        register unsigned porth = ntohs(port);
 313        __be32 addr_fold = addr->ip;
 314
 315#ifdef CONFIG_IP_VS_IPV6
 316        if (af == AF_INET6)
 317                addr_fold = addr->ip6[0]^addr->ip6[1]^
 318                            addr->ip6[2]^addr->ip6[3];
 319#endif
 320
 321        return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
 322                & IP_VS_SVC_TAB_MASK;
 323}
 324
 325/*
 326 *      Returns hash value of fwmark for virtual service lookup
 327 */
 328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
 329{
 330        return fwmark & IP_VS_SVC_TAB_MASK;
 331}
 332
 333/*
 334 *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
 335 *      or in the ip_vs_svc_fwm_table by fwmark.
 336 *      Should be called with locked tables.
 337 */
 338static int ip_vs_svc_hash(struct ip_vs_service *svc)
 339{
 340        unsigned hash;
 341
 342        if (svc->flags & IP_VS_SVC_F_HASHED) {
 343                IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
 344                          "called from %p\n", __builtin_return_address(0));
 345                return 0;
 346        }
 347
 348        if (svc->fwmark == 0) {
 349                /*
 350                 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
 351                 */
 352                hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
 353                                         svc->port);
 354                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
 355        } else {
 356                /*
 357                 *  Hash it by fwmark in ip_vs_svc_fwm_table
 358                 */
 359                hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
 360                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 361        }
 362
 363        svc->flags |= IP_VS_SVC_F_HASHED;
 364        /* increase its refcnt because it is referenced by the svc table */
 365        atomic_inc(&svc->refcnt);
 366        return 1;
 367}
 368
 369
 370/*
 371 *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
 372 *      Should be called with locked tables.
 373 */
 374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 375{
 376        if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
 377                IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
 378                          "called from %p\n", __builtin_return_address(0));
 379                return 0;
 380        }
 381
 382        if (svc->fwmark == 0) {
 383                /* Remove it from the ip_vs_svc_table table */
 384                list_del(&svc->s_list);
 385        } else {
 386                /* Remove it from the ip_vs_svc_fwm_table table */
 387                list_del(&svc->f_list);
 388        }
 389
 390        svc->flags &= ~IP_VS_SVC_F_HASHED;
 391        atomic_dec(&svc->refcnt);
 392        return 1;
 393}
 394
 395
 396/*
 397 *      Get service by {proto,addr,port} in the service table.
 398 */
 399static inline struct ip_vs_service *
 400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
 401                    __be16 vport)
 402{
 403        unsigned hash;
 404        struct ip_vs_service *svc;
 405
 406        /* Check for "full" addressed entries */
 407        hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
 408
 409        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
 410                if ((svc->af == af)
 411                    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 412                    && (svc->port == vport)
 413                    && (svc->protocol == protocol)) {
 414                        /* HIT */
 415                        atomic_inc(&svc->usecnt);
 416                        return svc;
 417                }
 418        }
 419
 420        return NULL;
 421}
 422
 423
 424/*
 425 *      Get service by {fwmark} in the service table.
 426 */
 427static inline struct ip_vs_service *
 428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
 429{
 430        unsigned hash;
 431        struct ip_vs_service *svc;
 432
 433        /* Check for fwmark addressed entries */
 434        hash = ip_vs_svc_fwm_hashkey(fwmark);
 435
 436        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 437                if (svc->fwmark == fwmark && svc->af == af) {
 438                        /* HIT */
 439                        atomic_inc(&svc->usecnt);
 440                        return svc;
 441                }
 442        }
 443
 444        return NULL;
 445}
 446
 447struct ip_vs_service *
 448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
 449                  const union nf_inet_addr *vaddr, __be16 vport)
 450{
 451        struct ip_vs_service *svc;
 452
 453        read_lock(&__ip_vs_svc_lock);
 454
 455        /*
 456         *      Check the table hashed by fwmark first
 457         */
 458        if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
 459                goto out;
 460
 461        /*
 462         *      Check the table hashed by <protocol,addr,port>
 463         *      for "full" addressed entries
 464         */
 465        svc = __ip_vs_service_get(af, protocol, vaddr, vport);
 466
 467        if (svc == NULL
 468            && protocol == IPPROTO_TCP
 469            && atomic_read(&ip_vs_ftpsvc_counter)
 470            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
 471                /*
 472                 * Check if ftp service entry exists, the packet
 473                 * might belong to FTP data connections.
 474                 */
 475                svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
 476        }
 477
 478        if (svc == NULL
 479            && atomic_read(&ip_vs_nullsvc_counter)) {
 480                /*
 481                 * Check if the catch-all port (port zero) exists
 482                 */
 483                svc = __ip_vs_service_get(af, protocol, vaddr, 0);
 484        }
 485
 486  out:
 487        read_unlock(&__ip_vs_svc_lock);
 488
 489        IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
 490                      fwmark, ip_vs_proto_name(protocol),
 491                      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
 492                      svc ? "hit" : "not hit");
 493
 494        return svc;
 495}
 496
 497
 498static inline void
 499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
 500{
 501        atomic_inc(&svc->refcnt);
 502        dest->svc = svc;
 503}
 504
 505static inline void
 506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
 507{
 508        struct ip_vs_service *svc = dest->svc;
 509
 510        dest->svc = NULL;
 511        if (atomic_dec_and_test(&svc->refcnt))
 512                kfree(svc);
 513}
 514
 515
 516/*
 517 *      Returns hash value for real service
 518 */
 519static inline unsigned ip_vs_rs_hashkey(int af,
 520                                            const union nf_inet_addr *addr,
 521                                            __be16 port)
 522{
 523        register unsigned porth = ntohs(port);
 524        __be32 addr_fold = addr->ip;
 525
 526#ifdef CONFIG_IP_VS_IPV6
 527        if (af == AF_INET6)
 528                addr_fold = addr->ip6[0]^addr->ip6[1]^
 529                            addr->ip6[2]^addr->ip6[3];
 530#endif
 531
 532        return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
 533                & IP_VS_RTAB_MASK;
 534}
 535
 536/*
 537 *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
 538 *      should be called with locked tables.
 539 */
 540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
 541{
 542        unsigned hash;
 543
 544        if (!list_empty(&dest->d_list)) {
 545                return 0;
 546        }
 547
 548        /*
 549         *      Hash by proto,addr,port,
 550         *      which are the parameters of the real service.
 551         */
 552        hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 553
 554        list_add(&dest->d_list, &ip_vs_rtable[hash]);
 555
 556        return 1;
 557}
 558
 559/*
 560 *      UNhashes ip_vs_dest from ip_vs_rtable.
 561 *      should be called with locked tables.
 562 */
 563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 564{
 565        /*
 566         * Remove it from the ip_vs_rtable table.
 567         */
 568        if (!list_empty(&dest->d_list)) {
 569                list_del(&dest->d_list);
 570                INIT_LIST_HEAD(&dest->d_list);
 571        }
 572
 573        return 1;
 574}
 575
 576/*
 577 *      Lookup real service by <proto,addr,port> in the real service table.
 578 */
 579struct ip_vs_dest *
 580ip_vs_lookup_real_service(int af, __u16 protocol,
 581                          const union nf_inet_addr *daddr,
 582                          __be16 dport)
 583{
 584        unsigned hash;
 585        struct ip_vs_dest *dest;
 586
 587        /*
 588         *      Check for "full" addressed entries
 589         *      Return the first found entry
 590         */
 591        hash = ip_vs_rs_hashkey(af, daddr, dport);
 592
 593        read_lock(&__ip_vs_rs_lock);
 594        list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
 595                if ((dest->af == af)
 596                    && ip_vs_addr_equal(af, &dest->addr, daddr)
 597                    && (dest->port == dport)
 598                    && ((dest->protocol == protocol) ||
 599                        dest->vfwmark)) {
 600                        /* HIT */
 601                        read_unlock(&__ip_vs_rs_lock);
 602                        return dest;
 603                }
 604        }
 605        read_unlock(&__ip_vs_rs_lock);
 606
 607        return NULL;
 608}
 609
 610/*
 611 *      Lookup destination by {addr,port} in the given service
 612 */
 613static struct ip_vs_dest *
 614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 615                  __be16 dport)
 616{
 617        struct ip_vs_dest *dest;
 618
 619        /*
 620         * Find the destination for the given service
 621         */
 622        list_for_each_entry(dest, &svc->destinations, n_list) {
 623                if ((dest->af == svc->af)
 624                    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
 625                    && (dest->port == dport)) {
 626                        /* HIT */
 627                        return dest;
 628                }
 629        }
 630
 631        return NULL;
 632}
 633
 634/*
 635 * Find destination by {daddr,dport,vaddr,protocol}
 636 * Cretaed to be used in ip_vs_process_message() in
 637 * the backup synchronization daemon. It finds the
 638 * destination to be bound to the received connection
 639 * on the backup.
 640 *
 641 * ip_vs_lookup_real_service() looked promissing, but
 642 * seems not working as expected.
 643 */
 644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
 645                                   __be16 dport,
 646                                   const union nf_inet_addr *vaddr,
 647                                   __be16 vport, __u16 protocol)
 648{
 649        struct ip_vs_dest *dest;
 650        struct ip_vs_service *svc;
 651
 652        svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
 653        if (!svc)
 654                return NULL;
 655        dest = ip_vs_lookup_dest(svc, daddr, dport);
 656        if (dest)
 657                atomic_inc(&dest->refcnt);
 658        ip_vs_service_put(svc);
 659        return dest;
 660}
 661
 662/*
 663 *  Lookup dest by {svc,addr,port} in the destination trash.
 664 *  The destination trash is used to hold the destinations that are removed
 665 *  from the service table but are still referenced by some conn entries.
 666 *  The reason to add the destination trash is when the dest is temporary
 667 *  down (either by administrator or by monitor program), the dest can be
 668 *  picked back from the trash, the remaining connections to the dest can
 669 *  continue, and the counting information of the dest is also useful for
 670 *  scheduling.
 671 */
 672static struct ip_vs_dest *
 673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
 674                     __be16 dport)
 675{
 676        struct ip_vs_dest *dest, *nxt;
 677
 678        /*
 679         * Find the destination in trash
 680         */
 681        list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
 682                IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
 683                              "dest->refcnt=%d\n",
 684                              dest->vfwmark,
 685                              IP_VS_DBG_ADDR(svc->af, &dest->addr),
 686                              ntohs(dest->port),
 687                              atomic_read(&dest->refcnt));
 688                if (dest->af == svc->af &&
 689                    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
 690                    dest->port == dport &&
 691                    dest->vfwmark == svc->fwmark &&
 692                    dest->protocol == svc->protocol &&
 693                    (svc->fwmark ||
 694                     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
 695                      dest->vport == svc->port))) {
 696                        /* HIT */
 697                        return dest;
 698                }
 699
 700                /*
 701                 * Try to purge the destination from trash if not referenced
 702                 */
 703                if (atomic_read(&dest->refcnt) == 1) {
 704                        IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
 705                                      "from trash\n",
 706                                      dest->vfwmark,
 707                                      IP_VS_DBG_ADDR(svc->af, &dest->addr),
 708                                      ntohs(dest->port));
 709                        list_del(&dest->n_list);
 710                        ip_vs_dst_reset(dest);
 711                        __ip_vs_unbind_svc(dest);
 712                        kfree(dest);
 713                }
 714        }
 715
 716        return NULL;
 717}
 718
 719
 720/*
 721 *  Clean up all the destinations in the trash
 722 *  Called by the ip_vs_control_cleanup()
 723 *
 724 *  When the ip_vs_control_clearup is activated by ipvs module exit,
 725 *  the service tables must have been flushed and all the connections
 726 *  are expired, and the refcnt of each destination in the trash must
 727 *  be 1, so we simply release them here.
 728 */
 729static void ip_vs_trash_cleanup(void)
 730{
 731        struct ip_vs_dest *dest, *nxt;
 732
 733        list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
 734                list_del(&dest->n_list);
 735                ip_vs_dst_reset(dest);
 736                __ip_vs_unbind_svc(dest);
 737                kfree(dest);
 738        }
 739}
 740
 741
 742static void
 743ip_vs_zero_stats(struct ip_vs_stats *stats)
 744{
 745        spin_lock_bh(&stats->lock);
 746
 747        memset(&stats->ustats, 0, sizeof(stats->ustats));
 748        ip_vs_zero_estimator(stats);
 749
 750        spin_unlock_bh(&stats->lock);
 751}
 752
 753/*
 754 *      Update a destination in the given service
 755 */
 756static void
 757__ip_vs_update_dest(struct ip_vs_service *svc,
 758                    struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
 759{
 760        int conn_flags;
 761
 762        /* set the weight and the flags */
 763        atomic_set(&dest->weight, udest->weight);
 764        conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
 765
 766        /* check if local node and update the flags */
 767#ifdef CONFIG_IP_VS_IPV6
 768        if (svc->af == AF_INET6) {
 769                if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
 770                        conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 771                                | IP_VS_CONN_F_LOCALNODE;
 772                }
 773        } else
 774#endif
 775                if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
 776                        conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
 777                                | IP_VS_CONN_F_LOCALNODE;
 778                }
 779
 780        /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
 781        if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
 782                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 783        } else {
 784                /*
 785                 *    Put the real service in ip_vs_rtable if not present.
 786                 *    For now only for NAT!
 787                 */
 788                write_lock_bh(&__ip_vs_rs_lock);
 789                ip_vs_rs_hash(dest);
 790                write_unlock_bh(&__ip_vs_rs_lock);
 791        }
 792        atomic_set(&dest->conn_flags, conn_flags);
 793
 794        /* bind the service */
 795        if (!dest->svc) {
 796                __ip_vs_bind_svc(dest, svc);
 797        } else {
 798                if (dest->svc != svc) {
 799                        __ip_vs_unbind_svc(dest);
 800                        ip_vs_zero_stats(&dest->stats);
 801                        __ip_vs_bind_svc(dest, svc);
 802                }
 803        }
 804
 805        /* set the dest status flags */
 806        dest->flags |= IP_VS_DEST_F_AVAILABLE;
 807
 808        if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
 809                dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
 810        dest->u_threshold = udest->u_threshold;
 811        dest->l_threshold = udest->l_threshold;
 812}
 813
 814
 815/*
 816 *      Create a destination for the given service
 817 */
 818static int
 819ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 820               struct ip_vs_dest **dest_p)
 821{
 822        struct ip_vs_dest *dest;
 823        unsigned atype;
 824
 825        EnterFunction(2);
 826
 827#ifdef CONFIG_IP_VS_IPV6
 828        if (svc->af == AF_INET6) {
 829                atype = ipv6_addr_type(&udest->addr.in6);
 830                if ((!(atype & IPV6_ADDR_UNICAST) ||
 831                        atype & IPV6_ADDR_LINKLOCAL) &&
 832                        !__ip_vs_addr_is_local_v6(&udest->addr.in6))
 833                        return -EINVAL;
 834        } else
 835#endif
 836        {
 837                atype = inet_addr_type(&init_net, udest->addr.ip);
 838                if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 839                        return -EINVAL;
 840        }
 841
 842        dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
 843        if (dest == NULL) {
 844                IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
 845                return -ENOMEM;
 846        }
 847
 848        dest->af = svc->af;
 849        dest->protocol = svc->protocol;
 850        dest->vaddr = svc->addr;
 851        dest->vport = svc->port;
 852        dest->vfwmark = svc->fwmark;
 853        ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
 854        dest->port = udest->port;
 855
 856        atomic_set(&dest->activeconns, 0);
 857        atomic_set(&dest->inactconns, 0);
 858        atomic_set(&dest->persistconns, 0);
 859        atomic_set(&dest->refcnt, 0);
 860
 861        INIT_LIST_HEAD(&dest->d_list);
 862        spin_lock_init(&dest->dst_lock);
 863        spin_lock_init(&dest->stats.lock);
 864        __ip_vs_update_dest(svc, dest, udest);
 865        ip_vs_new_estimator(&dest->stats);
 866
 867        *dest_p = dest;
 868
 869        LeaveFunction(2);
 870        return 0;
 871}
 872
 873
 874/*
 875 *      Add a destination into an existing service
 876 */
 877static int
 878ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 879{
 880        struct ip_vs_dest *dest;
 881        union nf_inet_addr daddr;
 882        __be16 dport = udest->port;
 883        int ret;
 884
 885        EnterFunction(2);
 886
 887        if (udest->weight < 0) {
 888                IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
 889                return -ERANGE;
 890        }
 891
 892        if (udest->l_threshold > udest->u_threshold) {
 893                IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
 894                          "upper threshold\n");
 895                return -ERANGE;
 896        }
 897
 898        ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
 899
 900        /*
 901         * Check if the dest already exists in the list
 902         */
 903        dest = ip_vs_lookup_dest(svc, &daddr, dport);
 904
 905        if (dest != NULL) {
 906                IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
 907                return -EEXIST;
 908        }
 909
 910        /*
 911         * Check if the dest already exists in the trash and
 912         * is from the same service
 913         */
 914        dest = ip_vs_trash_get_dest(svc, &daddr, dport);
 915
 916        if (dest != NULL) {
 917                IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
 918                              "dest->refcnt=%d, service %u/%s:%u\n",
 919                              IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
 920                              atomic_read(&dest->refcnt),
 921                              dest->vfwmark,
 922                              IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
 923                              ntohs(dest->vport));
 924
 925                __ip_vs_update_dest(svc, dest, udest);
 926
 927                /*
 928                 * Get the destination from the trash
 929                 */
 930                list_del(&dest->n_list);
 931
 932                ip_vs_new_estimator(&dest->stats);
 933
 934                write_lock_bh(&__ip_vs_svc_lock);
 935
 936                /*
 937                 * Wait until all other svc users go away.
 938                 */
 939                IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
 940
 941                list_add(&dest->n_list, &svc->destinations);
 942                svc->num_dests++;
 943
 944                /* call the update_service function of its scheduler */
 945                if (svc->scheduler->update_service)
 946                        svc->scheduler->update_service(svc);
 947
 948                write_unlock_bh(&__ip_vs_svc_lock);
 949                return 0;
 950        }
 951
 952        /*
 953         * Allocate and initialize the dest structure
 954         */
 955        ret = ip_vs_new_dest(svc, udest, &dest);
 956        if (ret) {
 957                return ret;
 958        }
 959
 960        /*
 961         * Add the dest entry into the list
 962         */
 963        atomic_inc(&dest->refcnt);
 964
 965        write_lock_bh(&__ip_vs_svc_lock);
 966
 967        /*
 968         * Wait until all other svc users go away.
 969         */
 970        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
 971
 972        list_add(&dest->n_list, &svc->destinations);
 973        svc->num_dests++;
 974
 975        /* call the update_service function of its scheduler */
 976        if (svc->scheduler->update_service)
 977                svc->scheduler->update_service(svc);
 978
 979        write_unlock_bh(&__ip_vs_svc_lock);
 980
 981        LeaveFunction(2);
 982
 983        return 0;
 984}
 985
 986
 987/*
 988 *      Edit a destination in the given service
 989 */
 990static int
 991ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 992{
 993        struct ip_vs_dest *dest;
 994        union nf_inet_addr daddr;
 995        __be16 dport = udest->port;
 996
 997        EnterFunction(2);
 998
 999        if (udest->weight < 0) {
1000                IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1001                return -ERANGE;
1002        }
1003
1004        if (udest->l_threshold > udest->u_threshold) {
1005                IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1006                          "upper threshold\n");
1007                return -ERANGE;
1008        }
1009
1010        ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1011
1012        /*
1013         *  Lookup the destination list
1014         */
1015        dest = ip_vs_lookup_dest(svc, &daddr, dport);
1016
1017        if (dest == NULL) {
1018                IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1019                return -ENOENT;
1020        }
1021
1022        __ip_vs_update_dest(svc, dest, udest);
1023
1024        write_lock_bh(&__ip_vs_svc_lock);
1025
1026        /* Wait until all other svc users go away */
1027        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1028
1029        /* call the update_service, because server weight may be changed */
1030        if (svc->scheduler->update_service)
1031                svc->scheduler->update_service(svc);
1032
1033        write_unlock_bh(&__ip_vs_svc_lock);
1034
1035        LeaveFunction(2);
1036
1037        return 0;
1038}
1039
1040
1041/*
1042 *      Delete a destination (must be already unlinked from the service)
1043 */
1044static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1045{
1046        ip_vs_kill_estimator(&dest->stats);
1047
1048        /*
1049         *  Remove it from the d-linked list with the real services.
1050         */
1051        write_lock_bh(&__ip_vs_rs_lock);
1052        ip_vs_rs_unhash(dest);
1053        write_unlock_bh(&__ip_vs_rs_lock);
1054
1055        /*
1056         *  Decrease the refcnt of the dest, and free the dest
1057         *  if nobody refers to it (refcnt=0). Otherwise, throw
1058         *  the destination into the trash.
1059         */
1060        if (atomic_dec_and_test(&dest->refcnt)) {
1061                ip_vs_dst_reset(dest);
1062                /* simply decrease svc->refcnt here, let the caller check
1063                   and release the service if nobody refers to it.
1064                   Only user context can release destination and service,
1065                   and only one user context can update virtual service at a
1066                   time, so the operation here is OK */
1067                atomic_dec(&dest->svc->refcnt);
1068                kfree(dest);
1069        } else {
1070                IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1071                              "dest->refcnt=%d\n",
1072                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
1073                              ntohs(dest->port),
1074                              atomic_read(&dest->refcnt));
1075                list_add(&dest->n_list, &ip_vs_dest_trash);
1076                atomic_inc(&dest->refcnt);
1077        }
1078}
1079
1080
1081/*
1082 *      Unlink a destination from the given service
1083 */
1084static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1085                                struct ip_vs_dest *dest,
1086                                int svcupd)
1087{
1088        dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1089
1090        /*
1091         *  Remove it from the d-linked destination list.
1092         */
1093        list_del(&dest->n_list);
1094        svc->num_dests--;
1095
1096        /*
1097         *  Call the update_service function of its scheduler
1098         */
1099        if (svcupd && svc->scheduler->update_service)
1100                        svc->scheduler->update_service(svc);
1101}
1102
1103
1104/*
1105 *      Delete a destination server in the given service
1106 */
1107static int
1108ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1109{
1110        struct ip_vs_dest *dest;
1111        __be16 dport = udest->port;
1112
1113        EnterFunction(2);
1114
1115        dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1116
1117        if (dest == NULL) {
1118                IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1119                return -ENOENT;
1120        }
1121
1122        write_lock_bh(&__ip_vs_svc_lock);
1123
1124        /*
1125         *      Wait until all other svc users go away.
1126         */
1127        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1128
1129        /*
1130         *      Unlink dest from the service
1131         */
1132        __ip_vs_unlink_dest(svc, dest, 1);
1133
1134        write_unlock_bh(&__ip_vs_svc_lock);
1135
1136        /*
1137         *      Delete the destination
1138         */
1139        __ip_vs_del_dest(dest);
1140
1141        LeaveFunction(2);
1142
1143        return 0;
1144}
1145
1146
1147/*
1148 *      Add a service into the service hash table
1149 */
1150static int
1151ip_vs_add_service(struct ip_vs_service_user_kern *u,
1152                  struct ip_vs_service **svc_p)
1153{
1154        int ret = 0;
1155        struct ip_vs_scheduler *sched = NULL;
1156        struct ip_vs_service *svc = NULL;
1157
1158        /* increase the module use count */
1159        ip_vs_use_count_inc();
1160
1161        /* Lookup the scheduler by 'u->sched_name' */
1162        sched = ip_vs_scheduler_get(u->sched_name);
1163        if (sched == NULL) {
1164                IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1165                           u->sched_name);
1166                ret = -ENOENT;
1167                goto out_mod_dec;
1168        }
1169
1170#ifdef CONFIG_IP_VS_IPV6
1171        if (u->af == AF_INET6) {
1172                if (!sched->supports_ipv6) {
1173                        ret = -EAFNOSUPPORT;
1174                        goto out_err;
1175                }
1176                if ((u->netmask < 1) || (u->netmask > 128)) {
1177                        ret = -EINVAL;
1178                        goto out_err;
1179                }
1180        }
1181#endif
1182
1183        svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1184        if (svc == NULL) {
1185                IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1186                ret = -ENOMEM;
1187                goto out_err;
1188        }
1189
1190        /* I'm the first user of the service */
1191        atomic_set(&svc->usecnt, 1);
1192        atomic_set(&svc->refcnt, 0);
1193
1194        svc->af = u->af;
1195        svc->protocol = u->protocol;
1196        ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1197        svc->port = u->port;
1198        svc->fwmark = u->fwmark;
1199        svc->flags = u->flags;
1200        svc->timeout = u->timeout * HZ;
1201        svc->netmask = u->netmask;
1202
1203        INIT_LIST_HEAD(&svc->destinations);
1204        rwlock_init(&svc->sched_lock);
1205        spin_lock_init(&svc->stats.lock);
1206
1207        /* Bind the scheduler */
1208        ret = ip_vs_bind_scheduler(svc, sched);
1209        if (ret)
1210                goto out_err;
1211        sched = NULL;
1212
1213        /* Update the virtual service counters */
1214        if (svc->port == FTPPORT)
1215                atomic_inc(&ip_vs_ftpsvc_counter);
1216        else if (svc->port == 0)
1217                atomic_inc(&ip_vs_nullsvc_counter);
1218
1219        ip_vs_new_estimator(&svc->stats);
1220
1221        /* Count only IPv4 services for old get/setsockopt interface */
1222        if (svc->af == AF_INET)
1223                ip_vs_num_services++;
1224
1225        /* Hash the service into the service table */
1226        write_lock_bh(&__ip_vs_svc_lock);
1227        ip_vs_svc_hash(svc);
1228        write_unlock_bh(&__ip_vs_svc_lock);
1229
1230        *svc_p = svc;
1231        return 0;
1232
1233  out_err:
1234        if (svc != NULL) {
1235                if (svc->scheduler)
1236                        ip_vs_unbind_scheduler(svc);
1237                if (svc->inc) {
1238                        local_bh_disable();
1239                        ip_vs_app_inc_put(svc->inc);
1240                        local_bh_enable();
1241                }
1242                kfree(svc);
1243        }
1244        ip_vs_scheduler_put(sched);
1245
1246  out_mod_dec:
1247        /* decrease the module use count */
1248        ip_vs_use_count_dec();
1249
1250        return ret;
1251}
1252
1253
1254/*
1255 *      Edit a service and bind it with a new scheduler
1256 */
1257static int
1258ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1259{
1260        struct ip_vs_scheduler *sched, *old_sched;
1261        int ret = 0;
1262
1263        /*
1264         * Lookup the scheduler, by 'u->sched_name'
1265         */
1266        sched = ip_vs_scheduler_get(u->sched_name);
1267        if (sched == NULL) {
1268                IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1269                           u->sched_name);
1270                return -ENOENT;
1271        }
1272        old_sched = sched;
1273
1274#ifdef CONFIG_IP_VS_IPV6
1275        if (u->af == AF_INET6) {
1276                if (!sched->supports_ipv6) {
1277                        ret = -EAFNOSUPPORT;
1278                        goto out;
1279                }
1280                if ((u->netmask < 1) || (u->netmask > 128)) {
1281                        ret = -EINVAL;
1282                        goto out;
1283                }
1284        }
1285#endif
1286
1287        write_lock_bh(&__ip_vs_svc_lock);
1288
1289        /*
1290         * Wait until all other svc users go away.
1291         */
1292        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1293
1294        /*
1295         * Set the flags and timeout value
1296         */
1297        svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1298        svc->timeout = u->timeout * HZ;
1299        svc->netmask = u->netmask;
1300
1301        old_sched = svc->scheduler;
1302        if (sched != old_sched) {
1303                /*
1304                 * Unbind the old scheduler
1305                 */
1306                if ((ret = ip_vs_unbind_scheduler(svc))) {
1307                        old_sched = sched;
1308                        goto out_unlock;
1309                }
1310
1311                /*
1312                 * Bind the new scheduler
1313                 */
1314                if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1315                        /*
1316                         * If ip_vs_bind_scheduler fails, restore the old
1317                         * scheduler.
1318                         * The main reason of failure is out of memory.
1319                         *
1320                         * The question is if the old scheduler can be
1321                         * restored all the time. TODO: if it cannot be
1322                         * restored some time, we must delete the service,
1323                         * otherwise the system may crash.
1324                         */
1325                        ip_vs_bind_scheduler(svc, old_sched);
1326                        old_sched = sched;
1327                        goto out_unlock;
1328                }
1329        }
1330
1331  out_unlock:
1332        write_unlock_bh(&__ip_vs_svc_lock);
1333#ifdef CONFIG_IP_VS_IPV6
1334  out:
1335#endif
1336
1337        if (old_sched)
1338                ip_vs_scheduler_put(old_sched);
1339
1340        return ret;
1341}
1342
1343
1344/*
1345 *      Delete a service from the service list
1346 *      - The service must be unlinked, unlocked and not referenced!
1347 *      - We are called under _bh lock
1348 */
1349static void __ip_vs_del_service(struct ip_vs_service *svc)
1350{
1351        struct ip_vs_dest *dest, *nxt;
1352        struct ip_vs_scheduler *old_sched;
1353
1354        /* Count only IPv4 services for old get/setsockopt interface */
1355        if (svc->af == AF_INET)
1356                ip_vs_num_services--;
1357
1358        ip_vs_kill_estimator(&svc->stats);
1359
1360        /* Unbind scheduler */
1361        old_sched = svc->scheduler;
1362        ip_vs_unbind_scheduler(svc);
1363        if (old_sched)
1364                ip_vs_scheduler_put(old_sched);
1365
1366        /* Unbind app inc */
1367        if (svc->inc) {
1368                ip_vs_app_inc_put(svc->inc);
1369                svc->inc = NULL;
1370        }
1371
1372        /*
1373         *    Unlink the whole destination list
1374         */
1375        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1376                __ip_vs_unlink_dest(svc, dest, 0);
1377                __ip_vs_del_dest(dest);
1378        }
1379
1380        /*
1381         *    Update the virtual service counters
1382         */
1383        if (svc->port == FTPPORT)
1384                atomic_dec(&ip_vs_ftpsvc_counter);
1385        else if (svc->port == 0)
1386                atomic_dec(&ip_vs_nullsvc_counter);
1387
1388        /*
1389         *    Free the service if nobody refers to it
1390         */
1391        if (atomic_read(&svc->refcnt) == 0)
1392                kfree(svc);
1393
1394        /* decrease the module use count */
1395        ip_vs_use_count_dec();
1396}
1397
1398/*
1399 *      Delete a service from the service list
1400 */
1401static int ip_vs_del_service(struct ip_vs_service *svc)
1402{
1403        if (svc == NULL)
1404                return -EEXIST;
1405
1406        /*
1407         * Unhash it from the service table
1408         */
1409        write_lock_bh(&__ip_vs_svc_lock);
1410
1411        ip_vs_svc_unhash(svc);
1412
1413        /*
1414         * Wait until all the svc users go away.
1415         */
1416        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1417
1418        __ip_vs_del_service(svc);
1419
1420        write_unlock_bh(&__ip_vs_svc_lock);
1421
1422        return 0;
1423}
1424
1425
1426/*
1427 *      Flush all the virtual services
1428 */
1429static int ip_vs_flush(void)
1430{
1431        int idx;
1432        struct ip_vs_service *svc, *nxt;
1433
1434        /*
1435         * Flush the service table hashed by <protocol,addr,port>
1436         */
1437        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1438                list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1439                        write_lock_bh(&__ip_vs_svc_lock);
1440                        ip_vs_svc_unhash(svc);
1441                        /*
1442                         * Wait until all the svc users go away.
1443                         */
1444                        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1445                        __ip_vs_del_service(svc);
1446                        write_unlock_bh(&__ip_vs_svc_lock);
1447                }
1448        }
1449
1450        /*
1451         * Flush the service table hashed by fwmark
1452         */
1453        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1454                list_for_each_entry_safe(svc, nxt,
1455                                         &ip_vs_svc_fwm_table[idx], f_list) {
1456                        write_lock_bh(&__ip_vs_svc_lock);
1457                        ip_vs_svc_unhash(svc);
1458                        /*
1459                         * Wait until all the svc users go away.
1460                         */
1461                        IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1462                        __ip_vs_del_service(svc);
1463                        write_unlock_bh(&__ip_vs_svc_lock);
1464                }
1465        }
1466
1467        return 0;
1468}
1469
1470
1471/*
1472 *      Zero counters in a service or all services
1473 */
1474static int ip_vs_zero_service(struct ip_vs_service *svc)
1475{
1476        struct ip_vs_dest *dest;
1477
1478        write_lock_bh(&__ip_vs_svc_lock);
1479        list_for_each_entry(dest, &svc->destinations, n_list) {
1480                ip_vs_zero_stats(&dest->stats);
1481        }
1482        ip_vs_zero_stats(&svc->stats);
1483        write_unlock_bh(&__ip_vs_svc_lock);
1484        return 0;
1485}
1486
1487static int ip_vs_zero_all(void)
1488{
1489        int idx;
1490        struct ip_vs_service *svc;
1491
1492        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1493                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1494                        ip_vs_zero_service(svc);
1495                }
1496        }
1497
1498        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1499                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1500                        ip_vs_zero_service(svc);
1501                }
1502        }
1503
1504        ip_vs_zero_stats(&ip_vs_stats);
1505        return 0;
1506}
1507
1508
1509static int
1510proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1511                     void __user *buffer, size_t *lenp, loff_t *ppos)
1512{
1513        int *valp = table->data;
1514        int val = *valp;
1515        int rc;
1516
1517        rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1518        if (write && (*valp != val)) {
1519                if ((*valp < 0) || (*valp > 3)) {
1520                        /* Restore the correct value */
1521                        *valp = val;
1522                } else {
1523                        update_defense_level();
1524                }
1525        }
1526        return rc;
1527}
1528
1529
1530static int
1531proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1532                       void __user *buffer, size_t *lenp, loff_t *ppos)
1533{
1534        int *valp = table->data;
1535        int val[2];
1536        int rc;
1537
1538        /* backup the value first */
1539        memcpy(val, valp, sizeof(val));
1540
1541        rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1542        if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1543                /* Restore the correct value */
1544                memcpy(valp, val, sizeof(val));
1545        }
1546        return rc;
1547}
1548
1549
1550/*
1551 *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1552 */
1553
1554static struct ctl_table vs_vars[] = {
1555        {
1556                .procname       = "amemthresh",
1557                .data           = &sysctl_ip_vs_amemthresh,
1558                .maxlen         = sizeof(int),
1559                .mode           = 0644,
1560                .proc_handler   = &proc_dointvec,
1561        },
1562#ifdef CONFIG_IP_VS_DEBUG
1563        {
1564                .procname       = "debug_level",
1565                .data           = &sysctl_ip_vs_debug_level,
1566                .maxlen         = sizeof(int),
1567                .mode           = 0644,
1568                .proc_handler   = &proc_dointvec,
1569        },
1570#endif
1571        {
1572                .procname       = "am_droprate",
1573                .data           = &sysctl_ip_vs_am_droprate,
1574                .maxlen         = sizeof(int),
1575                .mode           = 0644,
1576                .proc_handler   = &proc_dointvec,
1577        },
1578        {
1579                .procname       = "drop_entry",
1580                .data           = &sysctl_ip_vs_drop_entry,
1581                .maxlen         = sizeof(int),
1582                .mode           = 0644,
1583                .proc_handler   = &proc_do_defense_mode,
1584        },
1585        {
1586                .procname       = "drop_packet",
1587                .data           = &sysctl_ip_vs_drop_packet,
1588                .maxlen         = sizeof(int),
1589                .mode           = 0644,
1590                .proc_handler   = &proc_do_defense_mode,
1591        },
1592        {
1593                .procname       = "secure_tcp",
1594                .data           = &sysctl_ip_vs_secure_tcp,
1595                .maxlen         = sizeof(int),
1596                .mode           = 0644,
1597                .proc_handler   = &proc_do_defense_mode,
1598        },
1599#if 0
1600        {
1601                .procname       = "timeout_established",
1602                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1603                .maxlen         = sizeof(int),
1604                .mode           = 0644,
1605                .proc_handler   = &proc_dointvec_jiffies,
1606        },
1607        {
1608                .procname       = "timeout_synsent",
1609                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1610                .maxlen         = sizeof(int),
1611                .mode           = 0644,
1612                .proc_handler   = &proc_dointvec_jiffies,
1613        },
1614        {
1615                .procname       = "timeout_synrecv",
1616                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1617                .maxlen         = sizeof(int),
1618                .mode           = 0644,
1619                .proc_handler   = &proc_dointvec_jiffies,
1620        },
1621        {
1622                .procname       = "timeout_finwait",
1623                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1624                .maxlen         = sizeof(int),
1625                .mode           = 0644,
1626                .proc_handler   = &proc_dointvec_jiffies,
1627        },
1628        {
1629                .procname       = "timeout_timewait",
1630                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1631                .maxlen         = sizeof(int),
1632                .mode           = 0644,
1633                .proc_handler   = &proc_dointvec_jiffies,
1634        },
1635        {
1636                .procname       = "timeout_close",
1637                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1638                .maxlen         = sizeof(int),
1639                .mode           = 0644,
1640                .proc_handler   = &proc_dointvec_jiffies,
1641        },
1642        {
1643                .procname       = "timeout_closewait",
1644                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1645                .maxlen         = sizeof(int),
1646                .mode           = 0644,
1647                .proc_handler   = &proc_dointvec_jiffies,
1648        },
1649        {
1650                .procname       = "timeout_lastack",
1651                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1652                .maxlen         = sizeof(int),
1653                .mode           = 0644,
1654                .proc_handler   = &proc_dointvec_jiffies,
1655        },
1656        {
1657                .procname       = "timeout_listen",
1658                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1659                .maxlen         = sizeof(int),
1660                .mode           = 0644,
1661                .proc_handler   = &proc_dointvec_jiffies,
1662        },
1663        {
1664                .procname       = "timeout_synack",
1665                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1666                .maxlen         = sizeof(int),
1667                .mode           = 0644,
1668                .proc_handler   = &proc_dointvec_jiffies,
1669        },
1670        {
1671                .procname       = "timeout_udp",
1672                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1673                .maxlen         = sizeof(int),
1674                .mode           = 0644,
1675                .proc_handler   = &proc_dointvec_jiffies,
1676        },
1677        {
1678                .procname       = "timeout_icmp",
1679                .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1680                .maxlen         = sizeof(int),
1681                .mode           = 0644,
1682                .proc_handler   = &proc_dointvec_jiffies,
1683        },
1684#endif
1685        {
1686                .procname       = "cache_bypass",
1687                .data           = &sysctl_ip_vs_cache_bypass,
1688                .maxlen         = sizeof(int),
1689                .mode           = 0644,
1690                .proc_handler   = &proc_dointvec,
1691        },
1692        {
1693                .procname       = "expire_nodest_conn",
1694                .data           = &sysctl_ip_vs_expire_nodest_conn,
1695                .maxlen         = sizeof(int),
1696                .mode           = 0644,
1697                .proc_handler   = &proc_dointvec,
1698        },
1699        {
1700                .procname       = "expire_quiescent_template",
1701                .data           = &sysctl_ip_vs_expire_quiescent_template,
1702                .maxlen         = sizeof(int),
1703                .mode           = 0644,
1704                .proc_handler   = &proc_dointvec,
1705        },
1706        {
1707                .procname       = "sync_threshold",
1708                .data           = &sysctl_ip_vs_sync_threshold,
1709                .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1710                .mode           = 0644,
1711                .proc_handler   = &proc_do_sync_threshold,
1712        },
1713        {
1714                .procname       = "nat_icmp_send",
1715                .data           = &sysctl_ip_vs_nat_icmp_send,
1716                .maxlen         = sizeof(int),
1717                .mode           = 0644,
1718                .proc_handler   = &proc_dointvec,
1719        },
1720        { .ctl_name = 0 }
1721};
1722
1723const struct ctl_path net_vs_ctl_path[] = {
1724        { .procname = "net", .ctl_name = CTL_NET, },
1725        { .procname = "ipv4", .ctl_name = NET_IPV4, },
1726        { .procname = "vs", },
1727        { }
1728};
1729EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1730
1731static struct ctl_table_header * sysctl_header;
1732
1733#ifdef CONFIG_PROC_FS
1734
1735struct ip_vs_iter {
1736        struct list_head *table;
1737        int bucket;
1738};
1739
1740/*
1741 *      Write the contents of the VS rule table to a PROCfs file.
1742 *      (It is kept just for backward compatibility)
1743 */
1744static inline const char *ip_vs_fwd_name(unsigned flags)
1745{
1746        switch (flags & IP_VS_CONN_F_FWD_MASK) {
1747        case IP_VS_CONN_F_LOCALNODE:
1748                return "Local";
1749        case IP_VS_CONN_F_TUNNEL:
1750                return "Tunnel";
1751        case IP_VS_CONN_F_DROUTE:
1752                return "Route";
1753        default:
1754                return "Masq";
1755        }
1756}
1757
1758
1759/* Get the Nth entry in the two lists */
1760static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1761{
1762        struct ip_vs_iter *iter = seq->private;
1763        int idx;
1764        struct ip_vs_service *svc;
1765
1766        /* look in hash by protocol */
1767        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1768                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1769                        if (pos-- == 0){
1770                                iter->table = ip_vs_svc_table;
1771                                iter->bucket = idx;
1772                                return svc;
1773                        }
1774                }
1775        }
1776
1777        /* keep looking in fwmark */
1778        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1779                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1780                        if (pos-- == 0) {
1781                                iter->table = ip_vs_svc_fwm_table;
1782                                iter->bucket = idx;
1783                                return svc;
1784                        }
1785                }
1786        }
1787
1788        return NULL;
1789}
1790
1791static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1792__acquires(__ip_vs_svc_lock)
1793{
1794
1795        read_lock_bh(&__ip_vs_svc_lock);
1796        return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1797}
1798
1799
1800static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1801{
1802        struct list_head *e;
1803        struct ip_vs_iter *iter;
1804        struct ip_vs_service *svc;
1805
1806        ++*pos;
1807        if (v == SEQ_START_TOKEN)
1808                return ip_vs_info_array(seq,0);
1809
1810        svc = v;
1811        iter = seq->private;
1812
1813        if (iter->table == ip_vs_svc_table) {
1814                /* next service in table hashed by protocol */
1815                if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1816                        return list_entry(e, struct ip_vs_service, s_list);
1817
1818
1819                while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1820                        list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1821                                            s_list) {
1822                                return svc;
1823                        }
1824                }
1825
1826                iter->table = ip_vs_svc_fwm_table;
1827                iter->bucket = -1;
1828                goto scan_fwmark;
1829        }
1830
1831        /* next service in hashed by fwmark */
1832        if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1833                return list_entry(e, struct ip_vs_service, f_list);
1834
1835 scan_fwmark:
1836        while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1837                list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1838                                    f_list)
1839                        return svc;
1840        }
1841
1842        return NULL;
1843}
1844
1845static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1846__releases(__ip_vs_svc_lock)
1847{
1848        read_unlock_bh(&__ip_vs_svc_lock);
1849}
1850
1851
1852static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1853{
1854        if (v == SEQ_START_TOKEN) {
1855                seq_printf(seq,
1856                        "IP Virtual Server version %d.%d.%d (size=%d)\n",
1857                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1858                seq_puts(seq,
1859                         "Prot LocalAddress:Port Scheduler Flags\n");
1860                seq_puts(seq,
1861                         "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1862        } else {
1863                const struct ip_vs_service *svc = v;
1864                const struct ip_vs_iter *iter = seq->private;
1865                const struct ip_vs_dest *dest;
1866
1867                if (iter->table == ip_vs_svc_table) {
1868#ifdef CONFIG_IP_VS_IPV6
1869                        if (svc->af == AF_INET6)
1870                                seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
1871                                           ip_vs_proto_name(svc->protocol),
1872                                           NIP6(svc->addr.in6),
1873                                           ntohs(svc->port),
1874                                           svc->scheduler->name);
1875                        else
1876#endif
1877                                seq_printf(seq, "%s  %08X:%04X %s ",
1878                                           ip_vs_proto_name(svc->protocol),
1879                                           ntohl(svc->addr.ip),
1880                                           ntohs(svc->port),
1881                                           svc->scheduler->name);
1882                } else {
1883                        seq_printf(seq, "FWM  %08X %s ",
1884                                   svc->fwmark, svc->scheduler->name);
1885                }
1886
1887                if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1888                        seq_printf(seq, "persistent %d %08X\n",
1889                                svc->timeout,
1890                                ntohl(svc->netmask));
1891                else
1892                        seq_putc(seq, '\n');
1893
1894                list_for_each_entry(dest, &svc->destinations, n_list) {
1895#ifdef CONFIG_IP_VS_IPV6
1896                        if (dest->af == AF_INET6)
1897                                seq_printf(seq,
1898                                           "  -> [" NIP6_FMT "]:%04X"
1899                                           "      %-7s %-6d %-10d %-10d\n",
1900                                           NIP6(dest->addr.in6),
1901                                           ntohs(dest->port),
1902                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1903                                           atomic_read(&dest->weight),
1904                                           atomic_read(&dest->activeconns),
1905                                           atomic_read(&dest->inactconns));
1906                        else
1907#endif
1908                                seq_printf(seq,
1909                                           "  -> %08X:%04X      "
1910                                           "%-7s %-6d %-10d %-10d\n",
1911                                           ntohl(dest->addr.ip),
1912                                           ntohs(dest->port),
1913                                           ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1914                                           atomic_read(&dest->weight),
1915                                           atomic_read(&dest->activeconns),
1916                                           atomic_read(&dest->inactconns));
1917
1918                }
1919        }
1920        return 0;
1921}
1922
1923static const struct seq_operations ip_vs_info_seq_ops = {
1924        .start = ip_vs_info_seq_start,
1925        .next  = ip_vs_info_seq_next,
1926        .stop  = ip_vs_info_seq_stop,
1927        .show  = ip_vs_info_seq_show,
1928};
1929
1930static int ip_vs_info_open(struct inode *inode, struct file *file)
1931{
1932        return seq_open_private(file, &ip_vs_info_seq_ops,
1933                        sizeof(struct ip_vs_iter));
1934}
1935
1936static const struct file_operations ip_vs_info_fops = {
1937        .owner   = THIS_MODULE,
1938        .open    = ip_vs_info_open,
1939        .read    = seq_read,
1940        .llseek  = seq_lseek,
1941        .release = seq_release_private,
1942};
1943
1944#endif
1945
1946struct ip_vs_stats ip_vs_stats = {
1947        .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1948};
1949
1950#ifdef CONFIG_PROC_FS
1951static int ip_vs_stats_show(struct seq_file *seq, void *v)
1952{
1953
1954/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1955        seq_puts(seq,
1956                 "   Total Incoming Outgoing         Incoming         Outgoing\n");
1957        seq_printf(seq,
1958                   "   Conns  Packets  Packets            Bytes            Bytes\n");
1959
1960        spin_lock_bh(&ip_vs_stats.lock);
1961        seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1962                   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1963                   (unsigned long long) ip_vs_stats.ustats.inbytes,
1964                   (unsigned long long) ip_vs_stats.ustats.outbytes);
1965
1966/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1967        seq_puts(seq,
1968                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1969        seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1970                        ip_vs_stats.ustats.cps,
1971                        ip_vs_stats.ustats.inpps,
1972                        ip_vs_stats.ustats.outpps,
1973                        ip_vs_stats.ustats.inbps,
1974                        ip_vs_stats.ustats.outbps);
1975        spin_unlock_bh(&ip_vs_stats.lock);
1976
1977        return 0;
1978}
1979
1980static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1981{
1982        return single_open(file, ip_vs_stats_show, NULL);
1983}
1984
1985static const struct file_operations ip_vs_stats_fops = {
1986        .owner = THIS_MODULE,
1987        .open = ip_vs_stats_seq_open,
1988        .read = seq_read,
1989        .llseek = seq_lseek,
1990        .release = single_release,
1991};
1992
1993#endif
1994
1995/*
1996 *      Set timeout values for tcp tcpfin udp in the timeout_table.
1997 */
1998static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1999{
2000        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2001                  u->tcp_timeout,
2002                  u->tcp_fin_timeout,
2003                  u->udp_timeout);
2004
2005#ifdef CONFIG_IP_VS_PROTO_TCP
2006        if (u->tcp_timeout) {
2007                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2008                        = u->tcp_timeout * HZ;
2009        }
2010
2011        if (u->tcp_fin_timeout) {
2012                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2013                        = u->tcp_fin_timeout * HZ;
2014        }
2015#endif
2016
2017#ifdef CONFIG_IP_VS_PROTO_UDP
2018        if (u->udp_timeout) {
2019                ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2020                        = u->udp_timeout * HZ;
2021        }
2022#endif
2023        return 0;
2024}
2025
2026
2027#define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2028#define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2029#define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2030                                 sizeof(struct ip_vs_dest_user))
2031#define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2032#define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2033#define MAX_ARG_LEN             SVCDEST_ARG_LEN
2034
2035static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2036        [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2037        [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2038        [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2039        [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2040        [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2041        [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2042        [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2043        [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2044        [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2045        [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2046        [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2047};
2048
2049static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2050                                  struct ip_vs_service_user *usvc_compat)
2051{
2052        usvc->af                = AF_INET;
2053        usvc->protocol          = usvc_compat->protocol;
2054        usvc->addr.ip           = usvc_compat->addr;
2055        usvc->port              = usvc_compat->port;
2056        usvc->fwmark            = usvc_compat->fwmark;
2057
2058        /* Deep copy of sched_name is not needed here */
2059        usvc->sched_name        = usvc_compat->sched_name;
2060
2061        usvc->flags             = usvc_compat->flags;
2062        usvc->timeout           = usvc_compat->timeout;
2063        usvc->netmask           = usvc_compat->netmask;
2064}
2065
2066static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2067                                   struct ip_vs_dest_user *udest_compat)
2068{
2069        udest->addr.ip          = udest_compat->addr;
2070        udest->port             = udest_compat->port;
2071        udest->conn_flags       = udest_compat->conn_flags;
2072        udest->weight           = udest_compat->weight;
2073        udest->u_threshold      = udest_compat->u_threshold;
2074        udest->l_threshold      = udest_compat->l_threshold;
2075}
2076
2077static int
2078do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2079{
2080        int ret;
2081        unsigned char arg[MAX_ARG_LEN];
2082        struct ip_vs_service_user *usvc_compat;
2083        struct ip_vs_service_user_kern usvc;
2084        struct ip_vs_service *svc;
2085        struct ip_vs_dest_user *udest_compat;
2086        struct ip_vs_dest_user_kern udest;
2087
2088        if (!capable(CAP_NET_ADMIN))
2089                return -EPERM;
2090
2091        if (len != set_arglen[SET_CMDID(cmd)]) {
2092                IP_VS_ERR("set_ctl: len %u != %u\n",
2093                          len, set_arglen[SET_CMDID(cmd)]);
2094                return -EINVAL;
2095        }
2096
2097        if (copy_from_user(arg, user, len) != 0)
2098                return -EFAULT;
2099
2100        /* increase the module use count */
2101        ip_vs_use_count_inc();
2102
2103        if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2104                ret = -ERESTARTSYS;
2105                goto out_dec;
2106        }
2107
2108        if (cmd == IP_VS_SO_SET_FLUSH) {
2109                /* Flush the virtual service */
2110                ret = ip_vs_flush();
2111                goto out_unlock;
2112        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2113                /* Set timeout values for (tcp tcpfin udp) */
2114                ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2115                goto out_unlock;
2116        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2117                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2118                ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2119                goto out_unlock;
2120        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2121                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2122                ret = stop_sync_thread(dm->state);
2123                goto out_unlock;
2124        }
2125
2126        usvc_compat = (struct ip_vs_service_user *)arg;
2127        udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2128
2129        /* We only use the new structs internally, so copy userspace compat
2130         * structs to extended internal versions */
2131        ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2132        ip_vs_copy_udest_compat(&udest, udest_compat);
2133
2134        if (cmd == IP_VS_SO_SET_ZERO) {
2135                /* if no service address is set, zero counters in all */
2136                if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2137                        ret = ip_vs_zero_all();
2138                        goto out_unlock;
2139                }
2140        }
2141
2142        /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2143        if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2144                IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
2145                          usvc.protocol, NIPQUAD(usvc.addr.ip),
2146                          ntohs(usvc.port), usvc.sched_name);
2147                ret = -EFAULT;
2148                goto out_unlock;
2149        }
2150
2151        /* Lookup the exact service by <protocol, addr, port> or fwmark */
2152        if (usvc.fwmark == 0)
2153                svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2154                                          &usvc.addr, usvc.port);
2155        else
2156                svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2157
2158        if (cmd != IP_VS_SO_SET_ADD
2159            && (svc == NULL || svc->protocol != usvc.protocol)) {
2160                ret = -ESRCH;
2161                goto out_unlock;
2162        }
2163
2164        switch (cmd) {
2165        case IP_VS_SO_SET_ADD:
2166                if (svc != NULL)
2167                        ret = -EEXIST;
2168                else
2169                        ret = ip_vs_add_service(&usvc, &svc);
2170                break;
2171        case IP_VS_SO_SET_EDIT:
2172                ret = ip_vs_edit_service(svc, &usvc);
2173                break;
2174        case IP_VS_SO_SET_DEL:
2175                ret = ip_vs_del_service(svc);
2176                if (!ret)
2177                        goto out_unlock;
2178                break;
2179        case IP_VS_SO_SET_ZERO:
2180                ret = ip_vs_zero_service(svc);
2181                break;
2182        case IP_VS_SO_SET_ADDDEST:
2183                ret = ip_vs_add_dest(svc, &udest);
2184                break;
2185        case IP_VS_SO_SET_EDITDEST:
2186                ret = ip_vs_edit_dest(svc, &udest);
2187                break;
2188        case IP_VS_SO_SET_DELDEST:
2189                ret = ip_vs_del_dest(svc, &udest);
2190                break;
2191        default:
2192                ret = -EINVAL;
2193        }
2194
2195        if (svc)
2196                ip_vs_service_put(svc);
2197
2198  out_unlock:
2199        mutex_unlock(&__ip_vs_mutex);
2200  out_dec:
2201        /* decrease the module use count */
2202        ip_vs_use_count_dec();
2203
2204        return ret;
2205}
2206
2207
2208static void
2209ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2210{
2211        spin_lock_bh(&src->lock);
2212        memcpy(dst, &src->ustats, sizeof(*dst));
2213        spin_unlock_bh(&src->lock);
2214}
2215
2216static void
2217ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2218{
2219        dst->protocol = src->protocol;
2220        dst->addr = src->addr.ip;
2221        dst->port = src->port;
2222        dst->fwmark = src->fwmark;
2223        strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2224        dst->flags = src->flags;
2225        dst->timeout = src->timeout / HZ;
2226        dst->netmask = src->netmask;
2227        dst->num_dests = src->num_dests;
2228        ip_vs_copy_stats(&dst->stats, &src->stats);
2229}
2230
2231static inline int
2232__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2233                            struct ip_vs_get_services __user *uptr)
2234{
2235        int idx, count=0;
2236        struct ip_vs_service *svc;
2237        struct ip_vs_service_entry entry;
2238        int ret = 0;
2239
2240        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2241                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2242                        /* Only expose IPv4 entries to old interface */
2243                        if (svc->af != AF_INET)
2244                                continue;
2245
2246                        if (count >= get->num_services)
2247                                goto out;
2248                        memset(&entry, 0, sizeof(entry));
2249                        ip_vs_copy_service(&entry, svc);
2250                        if (copy_to_user(&uptr->entrytable[count],
2251                                         &entry, sizeof(entry))) {
2252                                ret = -EFAULT;
2253                                goto out;
2254                        }
2255                        count++;
2256                }
2257        }
2258
2259        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2260                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2261                        /* Only expose IPv4 entries to old interface */
2262                        if (svc->af != AF_INET)
2263                                continue;
2264
2265                        if (count >= get->num_services)
2266                                goto out;
2267                        memset(&entry, 0, sizeof(entry));
2268                        ip_vs_copy_service(&entry, svc);
2269                        if (copy_to_user(&uptr->entrytable[count],
2270                                         &entry, sizeof(entry))) {
2271                                ret = -EFAULT;
2272                                goto out;
2273                        }
2274                        count++;
2275                }
2276        }
2277  out:
2278        return ret;
2279}
2280
2281static inline int
2282__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2283                         struct ip_vs_get_dests __user *uptr)
2284{
2285        struct ip_vs_service *svc;
2286        union nf_inet_addr addr = { .ip = get->addr };
2287        int ret = 0;
2288
2289        if (get->fwmark)
2290                svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2291        else
2292                svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2293                                          get->port);
2294
2295        if (svc) {
2296                int count = 0;
2297                struct ip_vs_dest *dest;
2298                struct ip_vs_dest_entry entry;
2299
2300                list_for_each_entry(dest, &svc->destinations, n_list) {
2301                        if (count >= get->num_dests)
2302                                break;
2303
2304                        entry.addr = dest->addr.ip;
2305                        entry.port = dest->port;
2306                        entry.conn_flags = atomic_read(&dest->conn_flags);
2307                        entry.weight = atomic_read(&dest->weight);
2308                        entry.u_threshold = dest->u_threshold;
2309                        entry.l_threshold = dest->l_threshold;
2310                        entry.activeconns = atomic_read(&dest->activeconns);
2311                        entry.inactconns = atomic_read(&dest->inactconns);
2312                        entry.persistconns = atomic_read(&dest->persistconns);
2313                        ip_vs_copy_stats(&entry.stats, &dest->stats);
2314                        if (copy_to_user(&uptr->entrytable[count],
2315                                         &entry, sizeof(entry))) {
2316                                ret = -EFAULT;
2317                                break;
2318                        }
2319                        count++;
2320                }
2321                ip_vs_service_put(svc);
2322        } else
2323                ret = -ESRCH;
2324        return ret;
2325}
2326
2327static inline void
2328__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2329{
2330#ifdef CONFIG_IP_VS_PROTO_TCP
2331        u->tcp_timeout =
2332                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2333        u->tcp_fin_timeout =
2334                ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2335#endif
2336#ifdef CONFIG_IP_VS_PROTO_UDP
2337        u->udp_timeout =
2338                ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2339#endif
2340}
2341
2342
2343#define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2344#define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2345#define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2346#define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2347#define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2348#define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2349#define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2350
2351static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2352        [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2353        [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2354        [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2355        [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2356        [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2357        [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2358        [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2359};
2360
2361static int
2362do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2363{
2364        unsigned char arg[128];
2365        int ret = 0;
2366
2367        if (!capable(CAP_NET_ADMIN))
2368                return -EPERM;
2369
2370        if (*len < get_arglen[GET_CMDID(cmd)]) {
2371                IP_VS_ERR("get_ctl: len %u < %u\n",
2372                          *len, get_arglen[GET_CMDID(cmd)]);
2373                return -EINVAL;
2374        }
2375
2376        if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2377                return -EFAULT;
2378
2379        if (mutex_lock_interruptible(&__ip_vs_mutex))
2380                return -ERESTARTSYS;
2381
2382        switch (cmd) {
2383        case IP_VS_SO_GET_VERSION:
2384        {
2385                char buf[64];
2386
2387                sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2388                        NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2389                if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2390                        ret = -EFAULT;
2391                        goto out;
2392                }
2393                *len = strlen(buf)+1;
2394        }
2395        break;
2396
2397        case IP_VS_SO_GET_INFO:
2398        {
2399                struct ip_vs_getinfo info;
2400                info.version = IP_VS_VERSION_CODE;
2401                info.size = IP_VS_CONN_TAB_SIZE;
2402                info.num_services = ip_vs_num_services;
2403                if (copy_to_user(user, &info, sizeof(info)) != 0)
2404                        ret = -EFAULT;
2405        }
2406        break;
2407
2408        case IP_VS_SO_GET_SERVICES:
2409        {
2410                struct ip_vs_get_services *get;
2411                int size;
2412
2413                get = (struct ip_vs_get_services *)arg;
2414                size = sizeof(*get) +
2415                        sizeof(struct ip_vs_service_entry) * get->num_services;
2416                if (*len != size) {
2417                        IP_VS_ERR("length: %u != %u\n", *len, size);
2418                        ret = -EINVAL;
2419                        goto out;
2420                }
2421                ret = __ip_vs_get_service_entries(get, user);
2422        }
2423        break;
2424
2425        case IP_VS_SO_GET_SERVICE:
2426        {
2427                struct ip_vs_service_entry *entry;
2428                struct ip_vs_service *svc;
2429                union nf_inet_addr addr;
2430
2431                entry = (struct ip_vs_service_entry *)arg;
2432                addr.ip = entry->addr;
2433                if (entry->fwmark)
2434                        svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2435                else
2436                        svc = __ip_vs_service_get(AF_INET, entry->protocol,
2437                                                  &addr, entry->port);
2438                if (svc) {
2439                        ip_vs_copy_service(entry, svc);
2440                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2441                                ret = -EFAULT;
2442                        ip_vs_service_put(svc);
2443                } else
2444                        ret = -ESRCH;
2445        }
2446        break;
2447
2448        case IP_VS_SO_GET_DESTS:
2449        {
2450                struct ip_vs_get_dests *get;
2451                int size;
2452
2453                get = (struct ip_vs_get_dests *)arg;
2454                size = sizeof(*get) +
2455                        sizeof(struct ip_vs_dest_entry) * get->num_dests;
2456                if (*len != size) {
2457                        IP_VS_ERR("length: %u != %u\n", *len, size);
2458                        ret = -EINVAL;
2459                        goto out;
2460                }
2461                ret = __ip_vs_get_dest_entries(get, user);
2462        }
2463        break;
2464
2465        case IP_VS_SO_GET_TIMEOUT:
2466        {
2467                struct ip_vs_timeout_user t;
2468
2469                __ip_vs_get_timeouts(&t);
2470                if (copy_to_user(user, &t, sizeof(t)) != 0)
2471                        ret = -EFAULT;
2472        }
2473        break;
2474
2475        case IP_VS_SO_GET_DAEMON:
2476        {
2477                struct ip_vs_daemon_user d[2];
2478
2479                memset(&d, 0, sizeof(d));
2480                if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2481                        d[0].state = IP_VS_STATE_MASTER;
2482                        strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2483                        d[0].syncid = ip_vs_master_syncid;
2484                }
2485                if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2486                        d[1].state = IP_VS_STATE_BACKUP;
2487                        strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2488                        d[1].syncid = ip_vs_backup_syncid;
2489                }
2490                if (copy_to_user(user, &d, sizeof(d)) != 0)
2491                        ret = -EFAULT;
2492        }
2493        break;
2494
2495        default:
2496                ret = -EINVAL;
2497        }
2498
2499  out:
2500        mutex_unlock(&__ip_vs_mutex);
2501        return ret;
2502}
2503
2504
2505static struct nf_sockopt_ops ip_vs_sockopts = {
2506        .pf             = PF_INET,
2507        .set_optmin     = IP_VS_BASE_CTL,
2508        .set_optmax     = IP_VS_SO_SET_MAX+1,
2509        .set            = do_ip_vs_set_ctl,
2510        .get_optmin     = IP_VS_BASE_CTL,
2511        .get_optmax     = IP_VS_SO_GET_MAX+1,
2512        .get            = do_ip_vs_get_ctl,
2513        .owner          = THIS_MODULE,
2514};
2515
2516/*
2517 * Generic Netlink interface
2518 */
2519
2520/* IPVS genetlink family */
2521static struct genl_family ip_vs_genl_family = {
2522        .id             = GENL_ID_GENERATE,
2523        .hdrsize        = 0,
2524        .name           = IPVS_GENL_NAME,
2525        .version        = IPVS_GENL_VERSION,
2526        .maxattr        = IPVS_CMD_MAX,
2527};
2528
2529/* Policy used for first-level command attributes */
2530static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2531        [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2532        [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2533        [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2534        [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2535        [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2536        [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2537};
2538
2539/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2540static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2541        [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2542        [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2543                                            .len = IP_VS_IFNAME_MAXLEN },
2544        [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2545};
2546
2547/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2548static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2549        [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2550        [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2551        [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2552                                            .len = sizeof(union nf_inet_addr) },
2553        [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2554        [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2555        [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2556                                            .len = IP_VS_SCHEDNAME_MAXLEN },
2557        [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2558                                            .len = sizeof(struct ip_vs_flags) },
2559        [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2560        [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2561        [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2562};
2563
2564/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2565static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2566        [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2567                                            .len = sizeof(union nf_inet_addr) },
2568        [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2569        [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2570        [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2571        [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2572        [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2573        [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2574        [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2575        [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2576        [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2577};
2578
2579static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2580                                 struct ip_vs_stats *stats)
2581{
2582        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2583        if (!nl_stats)
2584                return -EMSGSIZE;
2585
2586        spin_lock_bh(&stats->lock);
2587
2588        NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2589        NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2590        NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2591        NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2592        NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2593        NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2594        NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2595        NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2596        NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2597        NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2598
2599        spin_unlock_bh(&stats->lock);
2600
2601        nla_nest_end(skb, nl_stats);
2602
2603        return 0;
2604
2605nla_put_failure:
2606        spin_unlock_bh(&stats->lock);
2607        nla_nest_cancel(skb, nl_stats);
2608        return -EMSGSIZE;
2609}
2610
2611static int ip_vs_genl_fill_service(struct sk_buff *skb,
2612                                   struct ip_vs_service *svc)
2613{
2614        struct nlattr *nl_service;
2615        struct ip_vs_flags flags = { .flags = svc->flags,
2616                                     .mask = ~0 };
2617
2618        nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2619        if (!nl_service)
2620                return -EMSGSIZE;
2621
2622        NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2623
2624        if (svc->fwmark) {
2625                NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2626        } else {
2627                NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2628                NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2629                NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2630        }
2631
2632        NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2633        NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2634        NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2635        NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2636
2637        if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2638                goto nla_put_failure;
2639
2640        nla_nest_end(skb, nl_service);
2641
2642        return 0;
2643
2644nla_put_failure:
2645        nla_nest_cancel(skb, nl_service);
2646        return -EMSGSIZE;
2647}
2648
2649static int ip_vs_genl_dump_service(struct sk_buff *skb,
2650                                   struct ip_vs_service *svc,
2651                                   struct netlink_callback *cb)
2652{
2653        void *hdr;
2654
2655        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2656                          &ip_vs_genl_family, NLM_F_MULTI,
2657                          IPVS_CMD_NEW_SERVICE);
2658        if (!hdr)
2659                return -EMSGSIZE;
2660
2661        if (ip_vs_genl_fill_service(skb, svc) < 0)
2662                goto nla_put_failure;
2663
2664        return genlmsg_end(skb, hdr);
2665
2666nla_put_failure:
2667        genlmsg_cancel(skb, hdr);
2668        return -EMSGSIZE;
2669}
2670
2671static int ip_vs_genl_dump_services(struct sk_buff *skb,
2672                                    struct netlink_callback *cb)
2673{
2674        int idx = 0, i;
2675        int start = cb->args[0];
2676        struct ip_vs_service *svc;
2677
2678        mutex_lock(&__ip_vs_mutex);
2679        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2680                list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2681                        if (++idx <= start)
2682                                continue;
2683                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2684                                idx--;
2685                                goto nla_put_failure;
2686                        }
2687                }
2688        }
2689
2690        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2691                list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2692                        if (++idx <= start)
2693                                continue;
2694                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2695                                idx--;
2696                                goto nla_put_failure;
2697                        }
2698                }
2699        }
2700
2701nla_put_failure:
2702        mutex_unlock(&__ip_vs_mutex);
2703        cb->args[0] = idx;
2704
2705        return skb->len;
2706}
2707
2708static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2709                                    struct nlattr *nla, int full_entry)
2710{
2711        struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2712        struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2713
2714        /* Parse mandatory identifying service fields first */
2715        if (nla == NULL ||
2716            nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2717                return -EINVAL;
2718
2719        nla_af          = attrs[IPVS_SVC_ATTR_AF];
2720        nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2721        nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2722        nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2723        nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2724
2725        if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2726                return -EINVAL;
2727
2728        usvc->af = nla_get_u16(nla_af);
2729#ifdef CONFIG_IP_VS_IPV6
2730        if (usvc->af != AF_INET && usvc->af != AF_INET6)
2731#else
2732        if (usvc->af != AF_INET)
2733#endif
2734                return -EAFNOSUPPORT;
2735
2736        if (nla_fwmark) {
2737                usvc->protocol = IPPROTO_TCP;
2738                usvc->fwmark = nla_get_u32(nla_fwmark);
2739        } else {
2740                usvc->protocol = nla_get_u16(nla_protocol);
2741                nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2742                usvc->port = nla_get_u16(nla_port);
2743                usvc->fwmark = 0;
2744        }
2745
2746        /* If a full entry was requested, check for the additional fields */
2747        if (full_entry) {
2748                struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2749                              *nla_netmask;
2750                struct ip_vs_flags flags;
2751                struct ip_vs_service *svc;
2752
2753                nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2754                nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2755                nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2756                nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2757
2758                if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2759                        return -EINVAL;
2760
2761                nla_memcpy(&flags, nla_flags, sizeof(flags));
2762
2763                /* prefill flags from service if it already exists */
2764                if (usvc->fwmark)
2765                        svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2766                else
2767                        svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2768                                                  &usvc->addr, usvc->port);
2769                if (svc) {
2770                        usvc->flags = svc->flags;
2771                        ip_vs_service_put(svc);
2772                } else
2773                        usvc->flags = 0;
2774
2775                /* set new flags from userland */
2776                usvc->flags = (usvc->flags & ~flags.mask) |
2777                              (flags.flags & flags.mask);
2778                usvc->sched_name = nla_data(nla_sched);
2779                usvc->timeout = nla_get_u32(nla_timeout);
2780                usvc->netmask = nla_get_u32(nla_netmask);
2781        }
2782
2783        return 0;
2784}
2785
2786static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2787{
2788        struct ip_vs_service_user_kern usvc;
2789        int ret;
2790
2791        ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2792        if (ret)
2793                return ERR_PTR(ret);
2794
2795        if (usvc.fwmark)
2796                return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2797        else
2798                return __ip_vs_service_get(usvc.af, usvc.protocol,
2799                                           &usvc.addr, usvc.port);
2800}
2801
2802static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2803{
2804        struct nlattr *nl_dest;
2805
2806        nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2807        if (!nl_dest)
2808                return -EMSGSIZE;
2809
2810        NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2811        NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2812
2813        NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2814                    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2815        NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2816        NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2817        NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2818        NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2819                    atomic_read(&dest->activeconns));
2820        NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2821                    atomic_read(&dest->inactconns));
2822        NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2823                    atomic_read(&dest->persistconns));
2824
2825        if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2826                goto nla_put_failure;
2827
2828        nla_nest_end(skb, nl_dest);
2829
2830        return 0;
2831
2832nla_put_failure:
2833        nla_nest_cancel(skb, nl_dest);
2834        return -EMSGSIZE;
2835}
2836
2837static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2838                                struct netlink_callback *cb)
2839{
2840        void *hdr;
2841
2842        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2843                          &ip_vs_genl_family, NLM_F_MULTI,
2844                          IPVS_CMD_NEW_DEST);
2845        if (!hdr)
2846                return -EMSGSIZE;
2847
2848        if (ip_vs_genl_fill_dest(skb, dest) < 0)
2849                goto nla_put_failure;
2850
2851        return genlmsg_end(skb, hdr);
2852
2853nla_put_failure:
2854        genlmsg_cancel(skb, hdr);
2855        return -EMSGSIZE;
2856}
2857
2858static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2859                                 struct netlink_callback *cb)
2860{
2861        int idx = 0;
2862        int start = cb->args[0];
2863        struct ip_vs_service *svc;
2864        struct ip_vs_dest *dest;
2865        struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2866
2867        mutex_lock(&__ip_vs_mutex);
2868
2869        /* Try to find the service for which to dump destinations */
2870        if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2871                        IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2872                goto out_err;
2873
2874        svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2875        if (IS_ERR(svc) || svc == NULL)
2876                goto out_err;
2877
2878        /* Dump the destinations */
2879        list_for_each_entry(dest, &svc->destinations, n_list) {
2880                if (++idx <= start)
2881                        continue;
2882                if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2883                        idx--;
2884                        goto nla_put_failure;
2885                }
2886        }
2887
2888nla_put_failure:
2889        cb->args[0] = idx;
2890        ip_vs_service_put(svc);
2891
2892out_err:
2893        mutex_unlock(&__ip_vs_mutex);
2894
2895        return skb->len;
2896}
2897
2898static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2899                                 struct nlattr *nla, int full_entry)
2900{
2901        struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2902        struct nlattr *nla_addr, *nla_port;
2903
2904        /* Parse mandatory identifying destination fields first */
2905        if (nla == NULL ||
2906            nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2907                return -EINVAL;
2908
2909        nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2910        nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2911
2912        if (!(nla_addr && nla_port))
2913                return -EINVAL;
2914
2915        nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2916        udest->port = nla_get_u16(nla_port);
2917
2918        /* If a full entry was requested, check for the additional fields */
2919        if (full_entry) {
2920                struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2921                              *nla_l_thresh;
2922
2923                nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2924                nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2925                nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2926                nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2927
2928                if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2929                        return -EINVAL;
2930
2931                udest->conn_flags = nla_get_u32(nla_fwd)
2932                                    & IP_VS_CONN_F_FWD_MASK;
2933                udest->weight = nla_get_u32(nla_weight);
2934                udest->u_threshold = nla_get_u32(nla_u_thresh);
2935                udest->l_threshold = nla_get_u32(nla_l_thresh);
2936        }
2937
2938        return 0;
2939}
2940
2941static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2942                                  const char *mcast_ifn, __be32 syncid)
2943{
2944        struct nlattr *nl_daemon;
2945
2946        nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2947        if (!nl_daemon)
2948                return -EMSGSIZE;
2949
2950        NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2951        NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2952        NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2953
2954        nla_nest_end(skb, nl_daemon);
2955
2956        return 0;
2957
2958nla_put_failure:
2959        nla_nest_cancel(skb, nl_daemon);
2960        return -EMSGSIZE;
2961}
2962
2963static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2964                                  const char *mcast_ifn, __be32 syncid,
2965                                  struct netlink_callback *cb)
2966{
2967        void *hdr;
2968        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2969                          &ip_vs_genl_family, NLM_F_MULTI,
2970                          IPVS_CMD_NEW_DAEMON);
2971        if (!hdr)
2972                return -EMSGSIZE;
2973
2974        if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2975                goto nla_put_failure;
2976
2977        return genlmsg_end(skb, hdr);
2978
2979nla_put_failure:
2980        genlmsg_cancel(skb, hdr);
2981        return -EMSGSIZE;
2982}
2983
2984static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2985                                   struct netlink_callback *cb)
2986{
2987        mutex_lock(&__ip_vs_mutex);
2988        if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2989                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2990                                           ip_vs_master_mcast_ifn,
2991                                           ip_vs_master_syncid, cb) < 0)
2992                        goto nla_put_failure;
2993
2994                cb->args[0] = 1;
2995        }
2996
2997        if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2998                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2999                                           ip_vs_backup_mcast_ifn,
3000                                           ip_vs_backup_syncid, cb) < 0)
3001                        goto nla_put_failure;
3002
3003                cb->args[1] = 1;
3004        }
3005
3006nla_put_failure:
3007        mutex_unlock(&__ip_vs_mutex);
3008
3009        return skb->len;
3010}
3011
3012static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3013{
3014        if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3015              attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3016              attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3017                return -EINVAL;
3018
3019        return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3020                                 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3021                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3022}
3023
3024static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3025{
3026        if (!attrs[IPVS_DAEMON_ATTR_STATE])
3027                return -EINVAL;
3028
3029        return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3030}
3031
3032static int ip_vs_genl_set_config(struct nlattr **attrs)
3033{
3034        struct ip_vs_timeout_user t;
3035
3036        __ip_vs_get_timeouts(&t);
3037
3038        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3039                t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3040
3041        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3042                t.tcp_fin_timeout =
3043                        nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3044
3045        if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3046                t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3047
3048        return ip_vs_set_timeout(&t);
3049}
3050
3051static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3052{
3053        struct ip_vs_service *svc = NULL;
3054        struct ip_vs_service_user_kern usvc;
3055        struct ip_vs_dest_user_kern udest;
3056        int ret = 0, cmd;
3057        int need_full_svc = 0, need_full_dest = 0;
3058
3059        cmd = info->genlhdr->cmd;
3060
3061        mutex_lock(&__ip_vs_mutex);
3062
3063        if (cmd == IPVS_CMD_FLUSH) {
3064                ret = ip_vs_flush();
3065                goto out;
3066        } else if (cmd == IPVS_CMD_SET_CONFIG) {
3067                ret = ip_vs_genl_set_config(info->attrs);
3068                goto out;
3069        } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3070                   cmd == IPVS_CMD_DEL_DAEMON) {
3071
3072                struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3073
3074                if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3075                    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3076                                     info->attrs[IPVS_CMD_ATTR_DAEMON],
3077                                     ip_vs_daemon_policy)) {
3078                        ret = -EINVAL;
3079                        goto out;
3080                }
3081
3082                if (cmd == IPVS_CMD_NEW_DAEMON)
3083                        ret = ip_vs_genl_new_daemon(daemon_attrs);
3084                else
3085                        ret = ip_vs_genl_del_daemon(daemon_attrs);
3086                goto out;
3087        } else if (cmd == IPVS_CMD_ZERO &&
3088                   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3089                ret = ip_vs_zero_all();
3090                goto out;
3091        }
3092
3093        /* All following commands require a service argument, so check if we
3094         * received a valid one. We need a full service specification when
3095         * adding / editing a service. Only identifying members otherwise. */
3096        if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3097                need_full_svc = 1;
3098
3099        ret = ip_vs_genl_parse_service(&usvc,
3100                                       info->attrs[IPVS_CMD_ATTR_SERVICE],
3101                                       need_full_svc);
3102        if (ret)
3103                goto out;
3104
3105        /* Lookup the exact service by <protocol, addr, port> or fwmark */
3106        if (usvc.fwmark == 0)
3107                svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3108                                          &usvc.addr, usvc.port);
3109        else
3110                svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3111
3112        /* Unless we're adding a new service, the service must already exist */
3113        if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3114                ret = -ESRCH;
3115                goto out;
3116        }
3117
3118        /* Destination commands require a valid destination argument. For
3119         * adding / editing a destination, we need a full destination
3120         * specification. */
3121        if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3122            cmd == IPVS_CMD_DEL_DEST) {
3123                if (cmd != IPVS_CMD_DEL_DEST)
3124                        need_full_dest = 1;
3125
3126                ret = ip_vs_genl_parse_dest(&udest,
3127                                            info->attrs[IPVS_CMD_ATTR_DEST],
3128                                            need_full_dest);
3129                if (ret)
3130                        goto out;
3131        }
3132
3133        switch (cmd) {
3134        case IPVS_CMD_NEW_SERVICE:
3135                if (svc == NULL)
3136                        ret = ip_vs_add_service(&usvc, &svc);
3137                else
3138                        ret = -EEXIST;
3139                break;
3140        case IPVS_CMD_SET_SERVICE:
3141                ret = ip_vs_edit_service(svc, &usvc);
3142                break;
3143        case IPVS_CMD_DEL_SERVICE:
3144                ret = ip_vs_del_service(svc);
3145                break;
3146        case IPVS_CMD_NEW_DEST:
3147                ret = ip_vs_add_dest(svc, &udest);
3148                break;
3149        case IPVS_CMD_SET_DEST:
3150                ret = ip_vs_edit_dest(svc, &udest);
3151                break;
3152        case IPVS_CMD_DEL_DEST:
3153                ret = ip_vs_del_dest(svc, &udest);
3154                break;
3155        case IPVS_CMD_ZERO:
3156                ret = ip_vs_zero_service(svc);
3157                break;
3158        default:
3159                ret = -EINVAL;
3160        }
3161
3162out:
3163        if (svc)
3164                ip_vs_service_put(svc);
3165        mutex_unlock(&__ip_vs_mutex);
3166
3167        return ret;
3168}
3169
3170static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3171{
3172        struct sk_buff *msg;
3173        void *reply;
3174        int ret, cmd, reply_cmd;
3175
3176        cmd = info->genlhdr->cmd;
3177
3178        if (cmd == IPVS_CMD_GET_SERVICE)
3179                reply_cmd = IPVS_CMD_NEW_SERVICE;
3180        else if (cmd == IPVS_CMD_GET_INFO)
3181                reply_cmd = IPVS_CMD_SET_INFO;
3182        else if (cmd == IPVS_CMD_GET_CONFIG)
3183                reply_cmd = IPVS_CMD_SET_CONFIG;
3184        else {
3185                IP_VS_ERR("unknown Generic Netlink command\n");
3186                return -EINVAL;
3187        }
3188
3189        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3190        if (!msg)
3191                return -ENOMEM;
3192
3193        mutex_lock(&__ip_vs_mutex);
3194
3195        reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3196        if (reply == NULL)
3197                goto nla_put_failure;
3198
3199        switch (cmd) {
3200        case IPVS_CMD_GET_SERVICE:
3201        {
3202                struct ip_vs_service *svc;
3203
3204                svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3205                if (IS_ERR(svc)) {
3206                        ret = PTR_ERR(svc);
3207                        goto out_err;
3208                } else if (svc) {
3209                        ret = ip_vs_genl_fill_service(msg, svc);
3210                        ip_vs_service_put(svc);
3211                        if (ret)
3212                                goto nla_put_failure;
3213                } else {
3214                        ret = -ESRCH;
3215                        goto out_err;
3216                }
3217
3218                break;
3219        }
3220
3221        case IPVS_CMD_GET_CONFIG:
3222        {
3223                struct ip_vs_timeout_user t;
3224
3225                __ip_vs_get_timeouts(&t);
3226#ifdef CONFIG_IP_VS_PROTO_TCP
3227                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3228                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3229                            t.tcp_fin_timeout);
3230#endif
3231#ifdef CONFIG_IP_VS_PROTO_UDP
3232                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3233#endif
3234
3235                break;
3236        }
3237
3238        case IPVS_CMD_GET_INFO:
3239                NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3240                NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3241                            IP_VS_CONN_TAB_SIZE);
3242                break;
3243        }
3244
3245        genlmsg_end(msg, reply);
3246        ret = genlmsg_unicast(msg, info->snd_pid);
3247        goto out;
3248
3249nla_put_failure:
3250        IP_VS_ERR("not enough space in Netlink message\n");
3251        ret = -EMSGSIZE;
3252
3253out_err:
3254        nlmsg_free(msg);
3255out:
3256        mutex_unlock(&__ip_vs_mutex);
3257
3258        return ret;
3259}
3260
3261
3262static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3263        {
3264                .cmd    = IPVS_CMD_NEW_SERVICE,
3265                .flags  = GENL_ADMIN_PERM,
3266                .policy = ip_vs_cmd_policy,
3267                .doit   = ip_vs_genl_set_cmd,
3268        },
3269        {
3270                .cmd    = IPVS_CMD_SET_SERVICE,
3271                .flags  = GENL_ADMIN_PERM,
3272                .policy = ip_vs_cmd_policy,
3273                .doit   = ip_vs_genl_set_cmd,
3274        },
3275        {
3276                .cmd    = IPVS_CMD_DEL_SERVICE,
3277                .flags  = GENL_ADMIN_PERM,
3278                .policy = ip_vs_cmd_policy,
3279                .doit   = ip_vs_genl_set_cmd,
3280        },
3281        {
3282                .cmd    = IPVS_CMD_GET_SERVICE,
3283                .flags  = GENL_ADMIN_PERM,
3284                .doit   = ip_vs_genl_get_cmd,
3285                .dumpit = ip_vs_genl_dump_services,
3286                .policy = ip_vs_cmd_policy,
3287        },
3288        {
3289                .cmd    = IPVS_CMD_NEW_DEST,
3290                .flags  = GENL_ADMIN_PERM,
3291                .policy = ip_vs_cmd_policy,
3292                .doit   = ip_vs_genl_set_cmd,
3293        },
3294        {
3295                .cmd    = IPVS_CMD_SET_DEST,
3296                .flags  = GENL_ADMIN_PERM,
3297                .policy = ip_vs_cmd_policy,
3298                .doit   = ip_vs_genl_set_cmd,
3299        },
3300        {
3301                .cmd    = IPVS_CMD_DEL_DEST,
3302                .flags  = GENL_ADMIN_PERM,
3303                .policy = ip_vs_cmd_policy,
3304                .doit   = ip_vs_genl_set_cmd,
3305        },
3306        {
3307                .cmd    = IPVS_CMD_GET_DEST,
3308                .flags  = GENL_ADMIN_PERM,
3309                .policy = ip_vs_cmd_policy,
3310                .dumpit = ip_vs_genl_dump_dests,
3311        },
3312        {
3313                .cmd    = IPVS_CMD_NEW_DAEMON,
3314                .flags  = GENL_ADMIN_PERM,
3315                .policy = ip_vs_cmd_policy,
3316                .doit   = ip_vs_genl_set_cmd,
3317        },
3318        {
3319                .cmd    = IPVS_CMD_DEL_DAEMON,
3320                .flags  = GENL_ADMIN_PERM,
3321                .policy = ip_vs_cmd_policy,
3322                .doit   = ip_vs_genl_set_cmd,
3323        },
3324        {
3325                .cmd    = IPVS_CMD_GET_DAEMON,
3326                .flags  = GENL_ADMIN_PERM,
3327                .dumpit = ip_vs_genl_dump_daemons,
3328        },
3329        {
3330                .cmd    = IPVS_CMD_SET_CONFIG,
3331                .flags  = GENL_ADMIN_PERM,
3332                .policy = ip_vs_cmd_policy,
3333                .doit   = ip_vs_genl_set_cmd,
3334        },
3335        {
3336                .cmd    = IPVS_CMD_GET_CONFIG,
3337                .flags  = GENL_ADMIN_PERM,
3338                .doit   = ip_vs_genl_get_cmd,
3339        },
3340        {
3341                .cmd    = IPVS_CMD_GET_INFO,
3342                .flags  = GENL_ADMIN_PERM,
3343                .doit   = ip_vs_genl_get_cmd,
3344        },
3345        {
3346                .cmd    = IPVS_CMD_ZERO,
3347                .flags  = GENL_ADMIN_PERM,
3348                .policy = ip_vs_cmd_policy,
3349                .doit   = ip_vs_genl_set_cmd,
3350        },
3351        {
3352                .cmd    = IPVS_CMD_FLUSH,
3353                .flags  = GENL_ADMIN_PERM,
3354                .doit   = ip_vs_genl_set_cmd,
3355        },
3356};
3357
3358static int __init ip_vs_genl_register(void)
3359{
3360        int ret, i;
3361
3362        ret = genl_register_family(&ip_vs_genl_family);
3363        if (ret)
3364                return ret;
3365
3366        for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3367                ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3368                if (ret)
3369                        goto err_out;
3370        }
3371        return 0;
3372
3373err_out:
3374        genl_unregister_family(&ip_vs_genl_family);
3375        return ret;
3376}
3377
3378static void ip_vs_genl_unregister(void)
3379{
3380        genl_unregister_family(&ip_vs_genl_family);
3381}
3382
3383/* End of Generic Netlink interface definitions */
3384
3385
3386int __init ip_vs_control_init(void)
3387{
3388        int ret;
3389        int idx;
3390
3391        EnterFunction(2);
3392
3393        ret = nf_register_sockopt(&ip_vs_sockopts);
3394        if (ret) {
3395                IP_VS_ERR("cannot register sockopt.\n");
3396                return ret;
3397        }
3398
3399        ret = ip_vs_genl_register();
3400        if (ret) {
3401                IP_VS_ERR("cannot register Generic Netlink interface.\n");
3402                nf_unregister_sockopt(&ip_vs_sockopts);
3403                return ret;
3404        }
3405
3406        proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3407        proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3408
3409        sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3410
3411        /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3412        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3413                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3414                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3415        }
3416        for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3417                INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3418        }
3419
3420        ip_vs_new_estimator(&ip_vs_stats);
3421
3422        /* Hook the defense timer */
3423        schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3424
3425        LeaveFunction(2);
3426        return 0;
3427}
3428
3429
3430void ip_vs_control_cleanup(void)
3431{
3432        EnterFunction(2);
3433        ip_vs_trash_cleanup();
3434        cancel_rearming_delayed_work(&defense_work);
3435        cancel_work_sync(&defense_work.work);
3436        ip_vs_kill_estimator(&ip_vs_stats);
3437        unregister_sysctl_table(sysctl_header);
3438        proc_net_remove(&init_net, "ip_vs_stats");
3439        proc_net_remove(&init_net, "ip_vs");
3440        ip_vs_genl_unregister();
3441        nf_unregister_sockopt(&ip_vs_sockopts);
3442        LeaveFunction(2);
3443}
3444