linux/net/sunrpc/svc.c
<<
>>
Prefs
   1/*
   2 * linux/net/sunrpc/svc.c
   3 *
   4 * High-level RPC service routines
   5 *
   6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
   7 *
   8 * Multiple threads pools and NUMAisation
   9 * Copyright (c) 2006 Silicon Graphics, Inc.
  10 * by Greg Banks <gnb@melbourne.sgi.com>
  11 */
  12
  13#include <linux/linkage.h>
  14#include <linux/sched.h>
  15#include <linux/errno.h>
  16#include <linux/net.h>
  17#include <linux/in.h>
  18#include <linux/mm.h>
  19#include <linux/interrupt.h>
  20#include <linux/module.h>
  21#include <linux/kthread.h>
  22#include <linux/slab.h>
  23
  24#include <linux/sunrpc/types.h>
  25#include <linux/sunrpc/xdr.h>
  26#include <linux/sunrpc/stats.h>
  27#include <linux/sunrpc/svcsock.h>
  28#include <linux/sunrpc/clnt.h>
  29#include <linux/sunrpc/bc_xprt.h>
  30
  31#define RPCDBG_FACILITY RPCDBG_SVCDSP
  32
  33static void svc_unregister(const struct svc_serv *serv);
  34
  35#define svc_serv_is_pooled(serv)    ((serv)->sv_function)
  36
  37/*
  38 * Mode for mapping cpus to pools.
  39 */
  40enum {
  41        SVC_POOL_AUTO = -1,     /* choose one of the others */
  42        SVC_POOL_GLOBAL,        /* no mapping, just a single global pool
  43                                 * (legacy & UP mode) */
  44        SVC_POOL_PERCPU,        /* one pool per cpu */
  45        SVC_POOL_PERNODE        /* one pool per numa node */
  46};
  47#define SVC_POOL_DEFAULT        SVC_POOL_GLOBAL
  48
  49/*
  50 * Structure for mapping cpus to pools and vice versa.
  51 * Setup once during sunrpc initialisation.
  52 */
  53static struct svc_pool_map {
  54        int count;                      /* How many svc_servs use us */
  55        int mode;                       /* Note: int not enum to avoid
  56                                         * warnings about "enumeration value
  57                                         * not handled in switch" */
  58        unsigned int npools;
  59        unsigned int *pool_to;          /* maps pool id to cpu or node */
  60        unsigned int *to_pool;          /* maps cpu or node to pool id */
  61} svc_pool_map = {
  62        .count = 0,
  63        .mode = SVC_POOL_DEFAULT
  64};
  65static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
  66
  67static int
  68param_set_pool_mode(const char *val, struct kernel_param *kp)
  69{
  70        int *ip = (int *)kp->arg;
  71        struct svc_pool_map *m = &svc_pool_map;
  72        int err;
  73
  74        mutex_lock(&svc_pool_map_mutex);
  75
  76        err = -EBUSY;
  77        if (m->count)
  78                goto out;
  79
  80        err = 0;
  81        if (!strncmp(val, "auto", 4))
  82                *ip = SVC_POOL_AUTO;
  83        else if (!strncmp(val, "global", 6))
  84                *ip = SVC_POOL_GLOBAL;
  85        else if (!strncmp(val, "percpu", 6))
  86                *ip = SVC_POOL_PERCPU;
  87        else if (!strncmp(val, "pernode", 7))
  88                *ip = SVC_POOL_PERNODE;
  89        else
  90                err = -EINVAL;
  91
  92out:
  93        mutex_unlock(&svc_pool_map_mutex);
  94        return err;
  95}
  96
  97static int
  98param_get_pool_mode(char *buf, struct kernel_param *kp)
  99{
 100        int *ip = (int *)kp->arg;
 101
 102        switch (*ip)
 103        {
 104        case SVC_POOL_AUTO:
 105                return strlcpy(buf, "auto", 20);
 106        case SVC_POOL_GLOBAL:
 107                return strlcpy(buf, "global", 20);
 108        case SVC_POOL_PERCPU:
 109                return strlcpy(buf, "percpu", 20);
 110        case SVC_POOL_PERNODE:
 111                return strlcpy(buf, "pernode", 20);
 112        default:
 113                return sprintf(buf, "%d", *ip);
 114        }
 115}
 116
 117module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
 118                 &svc_pool_map.mode, 0644);
 119
 120/*
 121 * Detect best pool mapping mode heuristically,
 122 * according to the machine's topology.
 123 */
 124static int
 125svc_pool_map_choose_mode(void)
 126{
 127        unsigned int node;
 128
 129        if (nr_online_nodes > 1) {
 130                /*
 131                 * Actually have multiple NUMA nodes,
 132                 * so split pools on NUMA node boundaries
 133                 */
 134                return SVC_POOL_PERNODE;
 135        }
 136
 137        node = first_online_node;
 138        if (nr_cpus_node(node) > 2) {
 139                /*
 140                 * Non-trivial SMP, or CONFIG_NUMA on
 141                 * non-NUMA hardware, e.g. with a generic
 142                 * x86_64 kernel on Xeons.  In this case we
 143                 * want to divide the pools on cpu boundaries.
 144                 */
 145                return SVC_POOL_PERCPU;
 146        }
 147
 148        /* default: one global pool */
 149        return SVC_POOL_GLOBAL;
 150}
 151
 152/*
 153 * Allocate the to_pool[] and pool_to[] arrays.
 154 * Returns 0 on success or an errno.
 155 */
 156static int
 157svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
 158{
 159        m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
 160        if (!m->to_pool)
 161                goto fail;
 162        m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
 163        if (!m->pool_to)
 164                goto fail_free;
 165
 166        return 0;
 167
 168fail_free:
 169        kfree(m->to_pool);
 170fail:
 171        return -ENOMEM;
 172}
 173
 174/*
 175 * Initialise the pool map for SVC_POOL_PERCPU mode.
 176 * Returns number of pools or <0 on error.
 177 */
 178static int
 179svc_pool_map_init_percpu(struct svc_pool_map *m)
 180{
 181        unsigned int maxpools = nr_cpu_ids;
 182        unsigned int pidx = 0;
 183        unsigned int cpu;
 184        int err;
 185
 186        err = svc_pool_map_alloc_arrays(m, maxpools);
 187        if (err)
 188                return err;
 189
 190        for_each_online_cpu(cpu) {
 191                BUG_ON(pidx > maxpools);
 192                m->to_pool[cpu] = pidx;
 193                m->pool_to[pidx] = cpu;
 194                pidx++;
 195        }
 196        /* cpus brought online later all get mapped to pool0, sorry */
 197
 198        return pidx;
 199};
 200
 201
 202/*
 203 * Initialise the pool map for SVC_POOL_PERNODE mode.
 204 * Returns number of pools or <0 on error.
 205 */
 206static int
 207svc_pool_map_init_pernode(struct svc_pool_map *m)
 208{
 209        unsigned int maxpools = nr_node_ids;
 210        unsigned int pidx = 0;
 211        unsigned int node;
 212        int err;
 213
 214        err = svc_pool_map_alloc_arrays(m, maxpools);
 215        if (err)
 216                return err;
 217
 218        for_each_node_with_cpus(node) {
 219                /* some architectures (e.g. SN2) have cpuless nodes */
 220                BUG_ON(pidx > maxpools);
 221                m->to_pool[node] = pidx;
 222                m->pool_to[pidx] = node;
 223                pidx++;
 224        }
 225        /* nodes brought online later all get mapped to pool0, sorry */
 226
 227        return pidx;
 228}
 229
 230
 231/*
 232 * Add a reference to the global map of cpus to pools (and
 233 * vice versa).  Initialise the map if we're the first user.
 234 * Returns the number of pools.
 235 */
 236static unsigned int
 237svc_pool_map_get(void)
 238{
 239        struct svc_pool_map *m = &svc_pool_map;
 240        int npools = -1;
 241
 242        mutex_lock(&svc_pool_map_mutex);
 243
 244        if (m->count++) {
 245                mutex_unlock(&svc_pool_map_mutex);
 246                return m->npools;
 247        }
 248
 249        if (m->mode == SVC_POOL_AUTO)
 250                m->mode = svc_pool_map_choose_mode();
 251
 252        switch (m->mode) {
 253        case SVC_POOL_PERCPU:
 254                npools = svc_pool_map_init_percpu(m);
 255                break;
 256        case SVC_POOL_PERNODE:
 257                npools = svc_pool_map_init_pernode(m);
 258                break;
 259        }
 260
 261        if (npools < 0) {
 262                /* default, or memory allocation failure */
 263                npools = 1;
 264                m->mode = SVC_POOL_GLOBAL;
 265        }
 266        m->npools = npools;
 267
 268        mutex_unlock(&svc_pool_map_mutex);
 269        return m->npools;
 270}
 271
 272
 273/*
 274 * Drop a reference to the global map of cpus to pools.
 275 * When the last reference is dropped, the map data is
 276 * freed; this allows the sysadmin to change the pool
 277 * mode using the pool_mode module option without
 278 * rebooting or re-loading sunrpc.ko.
 279 */
 280static void
 281svc_pool_map_put(void)
 282{
 283        struct svc_pool_map *m = &svc_pool_map;
 284
 285        mutex_lock(&svc_pool_map_mutex);
 286
 287        if (!--m->count) {
 288                m->mode = SVC_POOL_DEFAULT;
 289                kfree(m->to_pool);
 290                kfree(m->pool_to);
 291                m->npools = 0;
 292        }
 293
 294        mutex_unlock(&svc_pool_map_mutex);
 295}
 296
 297
 298/*
 299 * Set the given thread's cpus_allowed mask so that it
 300 * will only run on cpus in the given pool.
 301 */
 302static inline void
 303svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
 304{
 305        struct svc_pool_map *m = &svc_pool_map;
 306        unsigned int node = m->pool_to[pidx];
 307
 308        /*
 309         * The caller checks for sv_nrpools > 1, which
 310         * implies that we've been initialized.
 311         */
 312        BUG_ON(m->count == 0);
 313
 314        switch (m->mode) {
 315        case SVC_POOL_PERCPU:
 316        {
 317                set_cpus_allowed_ptr(task, cpumask_of(node));
 318                break;
 319        }
 320        case SVC_POOL_PERNODE:
 321        {
 322                set_cpus_allowed_ptr(task, cpumask_of_node(node));
 323                break;
 324        }
 325        }
 326}
 327
 328/*
 329 * Use the mapping mode to choose a pool for a given CPU.
 330 * Used when enqueueing an incoming RPC.  Always returns
 331 * a non-NULL pool pointer.
 332 */
 333struct svc_pool *
 334svc_pool_for_cpu(struct svc_serv *serv, int cpu)
 335{
 336        struct svc_pool_map *m = &svc_pool_map;
 337        unsigned int pidx = 0;
 338
 339        /*
 340         * An uninitialised map happens in a pure client when
 341         * lockd is brought up, so silently treat it the
 342         * same as SVC_POOL_GLOBAL.
 343         */
 344        if (svc_serv_is_pooled(serv)) {
 345                switch (m->mode) {
 346                case SVC_POOL_PERCPU:
 347                        pidx = m->to_pool[cpu];
 348                        break;
 349                case SVC_POOL_PERNODE:
 350                        pidx = m->to_pool[cpu_to_node(cpu)];
 351                        break;
 352                }
 353        }
 354        return &serv->sv_pools[pidx % serv->sv_nrpools];
 355}
 356
 357
 358/*
 359 * Create an RPC service
 360 */
 361static struct svc_serv *
 362__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 363             void (*shutdown)(struct svc_serv *serv))
 364{
 365        struct svc_serv *serv;
 366        unsigned int vers;
 367        unsigned int xdrsize;
 368        unsigned int i;
 369
 370        if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
 371                return NULL;
 372        serv->sv_name      = prog->pg_name;
 373        serv->sv_program   = prog;
 374        serv->sv_nrthreads = 1;
 375        serv->sv_stats     = prog->pg_stats;
 376        if (bufsize > RPCSVC_MAXPAYLOAD)
 377                bufsize = RPCSVC_MAXPAYLOAD;
 378        serv->sv_max_payload = bufsize? bufsize : 4096;
 379        serv->sv_max_mesg  = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
 380        serv->sv_shutdown  = shutdown;
 381        xdrsize = 0;
 382        while (prog) {
 383                prog->pg_lovers = prog->pg_nvers-1;
 384                for (vers=0; vers<prog->pg_nvers ; vers++)
 385                        if (prog->pg_vers[vers]) {
 386                                prog->pg_hivers = vers;
 387                                if (prog->pg_lovers > vers)
 388                                        prog->pg_lovers = vers;
 389                                if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
 390                                        xdrsize = prog->pg_vers[vers]->vs_xdrsize;
 391                        }
 392                prog = prog->pg_next;
 393        }
 394        serv->sv_xdrsize   = xdrsize;
 395        INIT_LIST_HEAD(&serv->sv_tempsocks);
 396        INIT_LIST_HEAD(&serv->sv_permsocks);
 397        init_timer(&serv->sv_temptimer);
 398        spin_lock_init(&serv->sv_lock);
 399
 400        serv->sv_nrpools = npools;
 401        serv->sv_pools =
 402                kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
 403                        GFP_KERNEL);
 404        if (!serv->sv_pools) {
 405                kfree(serv);
 406                return NULL;
 407        }
 408
 409        for (i = 0; i < serv->sv_nrpools; i++) {
 410                struct svc_pool *pool = &serv->sv_pools[i];
 411
 412                dprintk("svc: initialising pool %u for %s\n",
 413                                i, serv->sv_name);
 414
 415                pool->sp_id = i;
 416                INIT_LIST_HEAD(&pool->sp_threads);
 417                INIT_LIST_HEAD(&pool->sp_sockets);
 418                INIT_LIST_HEAD(&pool->sp_all_threads);
 419                spin_lock_init(&pool->sp_lock);
 420        }
 421
 422        /* Remove any stale portmap registrations */
 423        svc_unregister(serv);
 424
 425        return serv;
 426}
 427
 428struct svc_serv *
 429svc_create(struct svc_program *prog, unsigned int bufsize,
 430           void (*shutdown)(struct svc_serv *serv))
 431{
 432        return __svc_create(prog, bufsize, /*npools*/1, shutdown);
 433}
 434EXPORT_SYMBOL_GPL(svc_create);
 435
 436struct svc_serv *
 437svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 438                  void (*shutdown)(struct svc_serv *serv),
 439                  svc_thread_fn func, struct module *mod)
 440{
 441        struct svc_serv *serv;
 442        unsigned int npools = svc_pool_map_get();
 443
 444        serv = __svc_create(prog, bufsize, npools, shutdown);
 445
 446        if (serv != NULL) {
 447                serv->sv_function = func;
 448                serv->sv_module = mod;
 449        }
 450
 451        return serv;
 452}
 453EXPORT_SYMBOL_GPL(svc_create_pooled);
 454
 455/*
 456 * Destroy an RPC service. Should be called with appropriate locking to
 457 * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
 458 */
 459void
 460svc_destroy(struct svc_serv *serv)
 461{
 462        dprintk("svc: svc_destroy(%s, %d)\n",
 463                                serv->sv_program->pg_name,
 464                                serv->sv_nrthreads);
 465
 466        if (serv->sv_nrthreads) {
 467                if (--(serv->sv_nrthreads) != 0) {
 468                        svc_sock_update_bufs(serv);
 469                        return;
 470                }
 471        } else
 472                printk("svc_destroy: no threads for serv=%p!\n", serv);
 473
 474        del_timer_sync(&serv->sv_temptimer);
 475
 476        svc_close_all(&serv->sv_tempsocks);
 477
 478        if (serv->sv_shutdown)
 479                serv->sv_shutdown(serv);
 480
 481        svc_close_all(&serv->sv_permsocks);
 482
 483        BUG_ON(!list_empty(&serv->sv_permsocks));
 484        BUG_ON(!list_empty(&serv->sv_tempsocks));
 485
 486        cache_clean_deferred(serv);
 487
 488        if (svc_serv_is_pooled(serv))
 489                svc_pool_map_put();
 490
 491#if defined(CONFIG_NFS_V4_1)
 492        svc_sock_destroy(serv->bc_xprt);
 493#endif /* CONFIG_NFS_V4_1 */
 494
 495        svc_unregister(serv);
 496        kfree(serv->sv_pools);
 497        kfree(serv);
 498}
 499EXPORT_SYMBOL_GPL(svc_destroy);
 500
 501/*
 502 * Allocate an RPC server's buffer space.
 503 * We allocate pages and place them in rq_argpages.
 504 */
 505static int
 506svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
 507{
 508        unsigned int pages, arghi;
 509
 510        /* bc_xprt uses fore channel allocated buffers */
 511        if (svc_is_backchannel(rqstp))
 512                return 1;
 513
 514        pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
 515                                       * We assume one is at most one page
 516                                       */
 517        arghi = 0;
 518        BUG_ON(pages > RPCSVC_MAXPAGES);
 519        while (pages) {
 520                struct page *p = alloc_page(GFP_KERNEL);
 521                if (!p)
 522                        break;
 523                rqstp->rq_pages[arghi++] = p;
 524                pages--;
 525        }
 526        return pages == 0;
 527}
 528
 529/*
 530 * Release an RPC server buffer
 531 */
 532static void
 533svc_release_buffer(struct svc_rqst *rqstp)
 534{
 535        unsigned int i;
 536
 537        for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++)
 538                if (rqstp->rq_pages[i])
 539                        put_page(rqstp->rq_pages[i]);
 540}
 541
 542struct svc_rqst *
 543svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
 544{
 545        struct svc_rqst *rqstp;
 546
 547        rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
 548        if (!rqstp)
 549                goto out_enomem;
 550
 551        init_waitqueue_head(&rqstp->rq_wait);
 552
 553        serv->sv_nrthreads++;
 554        spin_lock_bh(&pool->sp_lock);
 555        pool->sp_nrthreads++;
 556        list_add(&rqstp->rq_all, &pool->sp_all_threads);
 557        spin_unlock_bh(&pool->sp_lock);
 558        rqstp->rq_server = serv;
 559        rqstp->rq_pool = pool;
 560
 561        rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
 562        if (!rqstp->rq_argp)
 563                goto out_thread;
 564
 565        rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
 566        if (!rqstp->rq_resp)
 567                goto out_thread;
 568
 569        if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
 570                goto out_thread;
 571
 572        return rqstp;
 573out_thread:
 574        svc_exit_thread(rqstp);
 575out_enomem:
 576        return ERR_PTR(-ENOMEM);
 577}
 578EXPORT_SYMBOL_GPL(svc_prepare_thread);
 579
 580/*
 581 * Choose a pool in which to create a new thread, for svc_set_num_threads
 582 */
 583static inline struct svc_pool *
 584choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 585{
 586        if (pool != NULL)
 587                return pool;
 588
 589        return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
 590}
 591
 592/*
 593 * Choose a thread to kill, for svc_set_num_threads
 594 */
 595static inline struct task_struct *
 596choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 597{
 598        unsigned int i;
 599        struct task_struct *task = NULL;
 600
 601        if (pool != NULL) {
 602                spin_lock_bh(&pool->sp_lock);
 603        } else {
 604                /* choose a pool in round-robin fashion */
 605                for (i = 0; i < serv->sv_nrpools; i++) {
 606                        pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
 607                        spin_lock_bh(&pool->sp_lock);
 608                        if (!list_empty(&pool->sp_all_threads))
 609                                goto found_pool;
 610                        spin_unlock_bh(&pool->sp_lock);
 611                }
 612                return NULL;
 613        }
 614
 615found_pool:
 616        if (!list_empty(&pool->sp_all_threads)) {
 617                struct svc_rqst *rqstp;
 618
 619                /*
 620                 * Remove from the pool->sp_all_threads list
 621                 * so we don't try to kill it again.
 622                 */
 623                rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
 624                list_del_init(&rqstp->rq_all);
 625                task = rqstp->rq_task;
 626        }
 627        spin_unlock_bh(&pool->sp_lock);
 628
 629        return task;
 630}
 631
 632/*
 633 * Create or destroy enough new threads to make the number
 634 * of threads the given number.  If `pool' is non-NULL, applies
 635 * only to threads in that pool, otherwise round-robins between
 636 * all pools.  Must be called with a svc_get() reference and
 637 * the BKL or another lock to protect access to svc_serv fields.
 638 *
 639 * Destroying threads relies on the service threads filling in
 640 * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
 641 * has been created using svc_create_pooled().
 642 *
 643 * Based on code that used to be in nfsd_svc() but tweaked
 644 * to be pool-aware.
 645 */
 646int
 647svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 648{
 649        struct svc_rqst *rqstp;
 650        struct task_struct *task;
 651        struct svc_pool *chosen_pool;
 652        int error = 0;
 653        unsigned int state = serv->sv_nrthreads-1;
 654
 655        if (pool == NULL) {
 656                /* The -1 assumes caller has done a svc_get() */
 657                nrservs -= (serv->sv_nrthreads-1);
 658        } else {
 659                spin_lock_bh(&pool->sp_lock);
 660                nrservs -= pool->sp_nrthreads;
 661                spin_unlock_bh(&pool->sp_lock);
 662        }
 663
 664        /* create new threads */
 665        while (nrservs > 0) {
 666                nrservs--;
 667                chosen_pool = choose_pool(serv, pool, &state);
 668
 669                rqstp = svc_prepare_thread(serv, chosen_pool);
 670                if (IS_ERR(rqstp)) {
 671                        error = PTR_ERR(rqstp);
 672                        break;
 673                }
 674
 675                __module_get(serv->sv_module);
 676                task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
 677                if (IS_ERR(task)) {
 678                        error = PTR_ERR(task);
 679                        module_put(serv->sv_module);
 680                        svc_exit_thread(rqstp);
 681                        break;
 682                }
 683
 684                rqstp->rq_task = task;
 685                if (serv->sv_nrpools > 1)
 686                        svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
 687
 688                svc_sock_update_bufs(serv);
 689                wake_up_process(task);
 690        }
 691        /* destroy old threads */
 692        while (nrservs < 0 &&
 693               (task = choose_victim(serv, pool, &state)) != NULL) {
 694                send_sig(SIGINT, task, 1);
 695                nrservs++;
 696        }
 697
 698        return error;
 699}
 700EXPORT_SYMBOL_GPL(svc_set_num_threads);
 701
 702/*
 703 * Called from a server thread as it's exiting. Caller must hold the BKL or
 704 * the "service mutex", whichever is appropriate for the service.
 705 */
 706void
 707svc_exit_thread(struct svc_rqst *rqstp)
 708{
 709        struct svc_serv *serv = rqstp->rq_server;
 710        struct svc_pool *pool = rqstp->rq_pool;
 711
 712        svc_release_buffer(rqstp);
 713        kfree(rqstp->rq_resp);
 714        kfree(rqstp->rq_argp);
 715        kfree(rqstp->rq_auth_data);
 716
 717        spin_lock_bh(&pool->sp_lock);
 718        pool->sp_nrthreads--;
 719        list_del(&rqstp->rq_all);
 720        spin_unlock_bh(&pool->sp_lock);
 721
 722        kfree(rqstp);
 723
 724        /* Release the server */
 725        if (serv)
 726                svc_destroy(serv);
 727}
 728EXPORT_SYMBOL_GPL(svc_exit_thread);
 729
 730/*
 731 * Register an "inet" protocol family netid with the local
 732 * rpcbind daemon via an rpcbind v4 SET request.
 733 *
 734 * No netconfig infrastructure is available in the kernel, so
 735 * we map IP_ protocol numbers to netids by hand.
 736 *
 737 * Returns zero on success; a negative errno value is returned
 738 * if any error occurs.
 739 */
 740static int __svc_rpcb_register4(const u32 program, const u32 version,
 741                                const unsigned short protocol,
 742                                const unsigned short port)
 743{
 744        const struct sockaddr_in sin = {
 745                .sin_family             = AF_INET,
 746                .sin_addr.s_addr        = htonl(INADDR_ANY),
 747                .sin_port               = htons(port),
 748        };
 749        const char *netid;
 750        int error;
 751
 752        switch (protocol) {
 753        case IPPROTO_UDP:
 754                netid = RPCBIND_NETID_UDP;
 755                break;
 756        case IPPROTO_TCP:
 757                netid = RPCBIND_NETID_TCP;
 758                break;
 759        default:
 760                return -ENOPROTOOPT;
 761        }
 762
 763        error = rpcb_v4_register(program, version,
 764                                        (const struct sockaddr *)&sin, netid);
 765
 766        /*
 767         * User space didn't support rpcbind v4, so retry this
 768         * registration request with the legacy rpcbind v2 protocol.
 769         */
 770        if (error == -EPROTONOSUPPORT)
 771                error = rpcb_register(program, version, protocol, port);
 772
 773        return error;
 774}
 775
 776#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 777/*
 778 * Register an "inet6" protocol family netid with the local
 779 * rpcbind daemon via an rpcbind v4 SET request.
 780 *
 781 * No netconfig infrastructure is available in the kernel, so
 782 * we map IP_ protocol numbers to netids by hand.
 783 *
 784 * Returns zero on success; a negative errno value is returned
 785 * if any error occurs.
 786 */
 787static int __svc_rpcb_register6(const u32 program, const u32 version,
 788                                const unsigned short protocol,
 789                                const unsigned short port)
 790{
 791        const struct sockaddr_in6 sin6 = {
 792                .sin6_family            = AF_INET6,
 793                .sin6_addr              = IN6ADDR_ANY_INIT,
 794                .sin6_port              = htons(port),
 795        };
 796        const char *netid;
 797        int error;
 798
 799        switch (protocol) {
 800        case IPPROTO_UDP:
 801                netid = RPCBIND_NETID_UDP6;
 802                break;
 803        case IPPROTO_TCP:
 804                netid = RPCBIND_NETID_TCP6;
 805                break;
 806        default:
 807                return -ENOPROTOOPT;
 808        }
 809
 810        error = rpcb_v4_register(program, version,
 811                                        (const struct sockaddr *)&sin6, netid);
 812
 813        /*
 814         * User space didn't support rpcbind version 4, so we won't
 815         * use a PF_INET6 listener.
 816         */
 817        if (error == -EPROTONOSUPPORT)
 818                error = -EAFNOSUPPORT;
 819
 820        return error;
 821}
 822#endif  /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 823
 824/*
 825 * Register a kernel RPC service via rpcbind version 4.
 826 *
 827 * Returns zero on success; a negative errno value is returned
 828 * if any error occurs.
 829 */
 830static int __svc_register(const char *progname,
 831                          const u32 program, const u32 version,
 832                          const int family,
 833                          const unsigned short protocol,
 834                          const unsigned short port)
 835{
 836        int error = -EAFNOSUPPORT;
 837
 838        switch (family) {
 839        case PF_INET:
 840                error = __svc_rpcb_register4(program, version,
 841                                                protocol, port);
 842                break;
 843#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 844        case PF_INET6:
 845                error = __svc_rpcb_register6(program, version,
 846                                                protocol, port);
 847#endif  /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 848        }
 849
 850        if (error < 0)
 851                printk(KERN_WARNING "svc: failed to register %sv%u RPC "
 852                        "service (errno %d).\n", progname, version, -error);
 853        return error;
 854}
 855
 856/**
 857 * svc_register - register an RPC service with the local portmapper
 858 * @serv: svc_serv struct for the service to register
 859 * @family: protocol family of service's listener socket
 860 * @proto: transport protocol number to advertise
 861 * @port: port to advertise
 862 *
 863 * Service is registered for any address in the passed-in protocol family
 864 */
 865int svc_register(const struct svc_serv *serv, const int family,
 866                 const unsigned short proto, const unsigned short port)
 867{
 868        struct svc_program      *progp;
 869        unsigned int            i;
 870        int                     error = 0;
 871
 872        BUG_ON(proto == 0 && port == 0);
 873
 874        for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 875                for (i = 0; i < progp->pg_nvers; i++) {
 876                        if (progp->pg_vers[i] == NULL)
 877                                continue;
 878
 879                        dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
 880                                        progp->pg_name,
 881                                        i,
 882                                        proto == IPPROTO_UDP?  "udp" : "tcp",
 883                                        port,
 884                                        family,
 885                                        progp->pg_vers[i]->vs_hidden?
 886                                                " (but not telling portmap)" : "");
 887
 888                        if (progp->pg_vers[i]->vs_hidden)
 889                                continue;
 890
 891                        error = __svc_register(progp->pg_name, progp->pg_prog,
 892                                                i, family, proto, port);
 893                        if (error < 0)
 894                                break;
 895                }
 896        }
 897
 898        return error;
 899}
 900
 901/*
 902 * If user space is running rpcbind, it should take the v4 UNSET
 903 * and clear everything for this [program, version].  If user space
 904 * is running portmap, it will reject the v4 UNSET, but won't have
 905 * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
 906 * in this case to clear all existing entries for [program, version].
 907 */
 908static void __svc_unregister(const u32 program, const u32 version,
 909                             const char *progname)
 910{
 911        int error;
 912
 913        error = rpcb_v4_register(program, version, NULL, "");
 914
 915        /*
 916         * User space didn't support rpcbind v4, so retry this
 917         * request with the legacy rpcbind v2 protocol.
 918         */
 919        if (error == -EPROTONOSUPPORT)
 920                error = rpcb_register(program, version, 0, 0);
 921
 922        dprintk("svc: %s(%sv%u), error %d\n",
 923                        __func__, progname, version, error);
 924}
 925
 926/*
 927 * All netids, bind addresses and ports registered for [program, version]
 928 * are removed from the local rpcbind database (if the service is not
 929 * hidden) to make way for a new instance of the service.
 930 *
 931 * The result of unregistration is reported via dprintk for those who want
 932 * verification of the result, but is otherwise not important.
 933 */
 934static void svc_unregister(const struct svc_serv *serv)
 935{
 936        struct svc_program *progp;
 937        unsigned long flags;
 938        unsigned int i;
 939
 940        clear_thread_flag(TIF_SIGPENDING);
 941
 942        for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 943                for (i = 0; i < progp->pg_nvers; i++) {
 944                        if (progp->pg_vers[i] == NULL)
 945                                continue;
 946                        if (progp->pg_vers[i]->vs_hidden)
 947                                continue;
 948
 949                        __svc_unregister(progp->pg_prog, i, progp->pg_name);
 950                }
 951        }
 952
 953        spin_lock_irqsave(&current->sighand->siglock, flags);
 954        recalc_sigpending();
 955        spin_unlock_irqrestore(&current->sighand->siglock, flags);
 956}
 957
 958/*
 959 * Printk the given error with the address of the client that caused it.
 960 */
 961static int
 962__attribute__ ((format (printf, 2, 3)))
 963svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 964{
 965        va_list args;
 966        int     r;
 967        char    buf[RPC_MAX_ADDRBUFLEN];
 968
 969        if (!net_ratelimit())
 970                return 0;
 971
 972        printk(KERN_WARNING "svc: %s: ",
 973                svc_print_addr(rqstp, buf, sizeof(buf)));
 974
 975        va_start(args, fmt);
 976        r = vprintk(fmt, args);
 977        va_end(args);
 978
 979        return r;
 980}
 981
 982/*
 983 * Common routine for processing the RPC request.
 984 */
 985static int
 986svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 987{
 988        struct svc_program      *progp;
 989        struct svc_version      *versp = NULL;  /* compiler food */
 990        struct svc_procedure    *procp = NULL;
 991        struct svc_serv         *serv = rqstp->rq_server;
 992        kxdrproc_t              xdr;
 993        __be32                  *statp;
 994        u32                     prog, vers, proc;
 995        __be32                  auth_stat, rpc_stat;
 996        int                     auth_res;
 997        __be32                  *reply_statp;
 998
 999        rpc_stat = rpc_success;
1000
1001        if (argv->iov_len < 6*4)
1002                goto err_short_len;
1003
1004        /* Will be turned off only in gss privacy case: */
1005        rqstp->rq_splice_ok = 1;
1006        /* Will be turned off only when NFSv4 Sessions are used */
1007        rqstp->rq_usedeferral = 1;
1008
1009        /* Setup reply header */
1010        rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
1011
1012        svc_putu32(resv, rqstp->rq_xid);
1013
1014        vers = svc_getnl(argv);
1015
1016        /* First words of reply: */
1017        svc_putnl(resv, 1);             /* REPLY */
1018
1019        if (vers != 2)          /* RPC version number */
1020                goto err_bad_rpc;
1021
1022        /* Save position in case we later decide to reject: */
1023        reply_statp = resv->iov_base + resv->iov_len;
1024
1025        svc_putnl(resv, 0);             /* ACCEPT */
1026
1027        rqstp->rq_prog = prog = svc_getnl(argv);        /* program number */
1028        rqstp->rq_vers = vers = svc_getnl(argv);        /* version number */
1029        rqstp->rq_proc = proc = svc_getnl(argv);        /* procedure number */
1030
1031        progp = serv->sv_program;
1032
1033        for (progp = serv->sv_program; progp; progp = progp->pg_next)
1034                if (prog == progp->pg_prog)
1035                        break;
1036
1037        /*
1038         * Decode auth data, and add verifier to reply buffer.
1039         * We do this before anything else in order to get a decent
1040         * auth verifier.
1041         */
1042        auth_res = svc_authenticate(rqstp, &auth_stat);
1043        /* Also give the program a chance to reject this call: */
1044        if (auth_res == SVC_OK && progp) {
1045                auth_stat = rpc_autherr_badcred;
1046                auth_res = progp->pg_authenticate(rqstp);
1047        }
1048        switch (auth_res) {
1049        case SVC_OK:
1050                break;
1051        case SVC_GARBAGE:
1052                goto err_garbage;
1053        case SVC_SYSERR:
1054                rpc_stat = rpc_system_err;
1055                goto err_bad;
1056        case SVC_DENIED:
1057                goto err_bad_auth;
1058        case SVC_DROP:
1059                goto dropit;
1060        case SVC_COMPLETE:
1061                goto sendit;
1062        }
1063
1064        if (progp == NULL)
1065                goto err_bad_prog;
1066
1067        if (vers >= progp->pg_nvers ||
1068          !(versp = progp->pg_vers[vers]))
1069                goto err_bad_vers;
1070
1071        procp = versp->vs_proc + proc;
1072        if (proc >= versp->vs_nproc || !procp->pc_func)
1073                goto err_bad_proc;
1074        rqstp->rq_procinfo = procp;
1075
1076        /* Syntactic check complete */
1077        serv->sv_stats->rpccnt++;
1078
1079        /* Build the reply header. */
1080        statp = resv->iov_base +resv->iov_len;
1081        svc_putnl(resv, RPC_SUCCESS);
1082
1083        /* Bump per-procedure stats counter */
1084        procp->pc_count++;
1085
1086        /* Initialize storage for argp and resp */
1087        memset(rqstp->rq_argp, 0, procp->pc_argsize);
1088        memset(rqstp->rq_resp, 0, procp->pc_ressize);
1089
1090        /* un-reserve some of the out-queue now that we have a
1091         * better idea of reply size
1092         */
1093        if (procp->pc_xdrressize)
1094                svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
1095
1096        /* Call the function that processes the request. */
1097        if (!versp->vs_dispatch) {
1098                /* Decode arguments */
1099                xdr = procp->pc_decode;
1100                if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
1101                        goto err_garbage;
1102
1103                *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
1104
1105                /* Encode reply */
1106                if (*statp == rpc_drop_reply) {
1107                        if (procp->pc_release)
1108                                procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1109                        goto dropit;
1110                }
1111                if (*statp == rpc_success &&
1112                    (xdr = procp->pc_encode) &&
1113                    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
1114                        dprintk("svc: failed to encode reply\n");
1115                        /* serv->sv_stats->rpcsystemerr++; */
1116                        *statp = rpc_system_err;
1117                }
1118        } else {
1119                dprintk("svc: calling dispatcher\n");
1120                if (!versp->vs_dispatch(rqstp, statp)) {
1121                        /* Release reply info */
1122                        if (procp->pc_release)
1123                                procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1124                        goto dropit;
1125                }
1126        }
1127
1128        /* Check RPC status result */
1129        if (*statp != rpc_success)
1130                resv->iov_len = ((void*)statp)  - resv->iov_base + 4;
1131
1132        /* Release reply info */
1133        if (procp->pc_release)
1134                procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1135
1136        if (procp->pc_encode == NULL)
1137                goto dropit;
1138
1139 sendit:
1140        if (svc_authorise(rqstp))
1141                goto dropit;
1142        return 1;               /* Caller can now send it */
1143
1144 dropit:
1145        svc_authorise(rqstp);   /* doesn't hurt to call this twice */
1146        dprintk("svc: svc_process dropit\n");
1147        svc_drop(rqstp);
1148        return 0;
1149
1150err_short_len:
1151        svc_printk(rqstp, "short len %Zd, dropping request\n",
1152                        argv->iov_len);
1153
1154        goto dropit;                    /* drop request */
1155
1156err_bad_rpc:
1157        serv->sv_stats->rpcbadfmt++;
1158        svc_putnl(resv, 1);     /* REJECT */
1159        svc_putnl(resv, 0);     /* RPC_MISMATCH */
1160        svc_putnl(resv, 2);     /* Only RPCv2 supported */
1161        svc_putnl(resv, 2);
1162        goto sendit;
1163
1164err_bad_auth:
1165        dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
1166        serv->sv_stats->rpcbadauth++;
1167        /* Restore write pointer to location of accept status: */
1168        xdr_ressize_check(rqstp, reply_statp);
1169        svc_putnl(resv, 1);     /* REJECT */
1170        svc_putnl(resv, 1);     /* AUTH_ERROR */
1171        svc_putnl(resv, ntohl(auth_stat));      /* status */
1172        goto sendit;
1173
1174err_bad_prog:
1175        dprintk("svc: unknown program %d\n", prog);
1176        serv->sv_stats->rpcbadfmt++;
1177        svc_putnl(resv, RPC_PROG_UNAVAIL);
1178        goto sendit;
1179
1180err_bad_vers:
1181        svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
1182                       vers, prog, progp->pg_name);
1183
1184        serv->sv_stats->rpcbadfmt++;
1185        svc_putnl(resv, RPC_PROG_MISMATCH);
1186        svc_putnl(resv, progp->pg_lovers);
1187        svc_putnl(resv, progp->pg_hivers);
1188        goto sendit;
1189
1190err_bad_proc:
1191        svc_printk(rqstp, "unknown procedure (%d)\n", proc);
1192
1193        serv->sv_stats->rpcbadfmt++;
1194        svc_putnl(resv, RPC_PROC_UNAVAIL);
1195        goto sendit;
1196
1197err_garbage:
1198        svc_printk(rqstp, "failed to decode args\n");
1199
1200        rpc_stat = rpc_garbage_args;
1201err_bad:
1202        serv->sv_stats->rpcbadfmt++;
1203        svc_putnl(resv, ntohl(rpc_stat));
1204        goto sendit;
1205}
1206EXPORT_SYMBOL_GPL(svc_process);
1207
1208/*
1209 * Process the RPC request.
1210 */
1211int
1212svc_process(struct svc_rqst *rqstp)
1213{
1214        struct kvec             *argv = &rqstp->rq_arg.head[0];
1215        struct kvec             *resv = &rqstp->rq_res.head[0];
1216        struct svc_serv         *serv = rqstp->rq_server;
1217        u32                     dir;
1218        int                     error;
1219
1220        /*
1221         * Setup response xdr_buf.
1222         * Initially it has just one page
1223         */
1224        rqstp->rq_resused = 1;
1225        resv->iov_base = page_address(rqstp->rq_respages[0]);
1226        resv->iov_len = 0;
1227        rqstp->rq_res.pages = rqstp->rq_respages + 1;
1228        rqstp->rq_res.len = 0;
1229        rqstp->rq_res.page_base = 0;
1230        rqstp->rq_res.page_len = 0;
1231        rqstp->rq_res.buflen = PAGE_SIZE;
1232        rqstp->rq_res.tail[0].iov_base = NULL;
1233        rqstp->rq_res.tail[0].iov_len = 0;
1234
1235        rqstp->rq_xid = svc_getu32(argv);
1236
1237        dir  = svc_getnl(argv);
1238        if (dir != 0) {
1239                /* direction != CALL */
1240                svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
1241                serv->sv_stats->rpcbadfmt++;
1242                svc_drop(rqstp);
1243                return 0;
1244        }
1245
1246        error = svc_process_common(rqstp, argv, resv);
1247        if (error <= 0)
1248                return error;
1249
1250        return svc_send(rqstp);
1251}
1252
1253#if defined(CONFIG_NFS_V4_1)
1254/*
1255 * Process a backchannel RPC request that arrived over an existing
1256 * outbound connection
1257 */
1258int
1259bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1260               struct svc_rqst *rqstp)
1261{
1262        struct kvec     *argv = &rqstp->rq_arg.head[0];
1263        struct kvec     *resv = &rqstp->rq_res.head[0];
1264        int             error;
1265
1266        /* Build the svc_rqst used by the common processing routine */
1267        rqstp->rq_xprt = serv->bc_xprt;
1268        rqstp->rq_xid = req->rq_xid;
1269        rqstp->rq_prot = req->rq_xprt->prot;
1270        rqstp->rq_server = serv;
1271
1272        rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
1273        memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
1274        memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
1275        memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
1276
1277        /* reset result send buffer "put" position */
1278        resv->iov_len = 0;
1279
1280        if (rqstp->rq_prot != IPPROTO_TCP) {
1281                printk(KERN_ERR "No support for Non-TCP transports!\n");
1282                BUG();
1283        }
1284
1285        /*
1286         * Skip the next two words because they've already been
1287         * processed in the trasport
1288         */
1289        svc_getu32(argv);       /* XID */
1290        svc_getnl(argv);        /* CALLDIR */
1291
1292        error = svc_process_common(rqstp, argv, resv);
1293        if (error <= 0)
1294                return error;
1295
1296        memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
1297        return bc_send(req);
1298}
1299EXPORT_SYMBOL(bc_svc_process);
1300#endif /* CONFIG_NFS_V4_1 */
1301
1302/*
1303 * Return (transport-specific) limit on the rpc payload.
1304 */
1305u32 svc_max_payload(const struct svc_rqst *rqstp)
1306{
1307        u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
1308
1309        if (rqstp->rq_server->sv_max_payload < max)
1310                max = rqstp->rq_server->sv_max_payload;
1311        return max;
1312}
1313EXPORT_SYMBOL_GPL(svc_max_payload);
1314