linux/net/sunrpc/svc.c
<<
>>
Prefs
   1/*
   2 * linux/net/sunrpc/svc.c
   3 *
   4 * High-level RPC service routines
   5 *
   6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
   7 *
   8 * Multiple threads pools and NUMAisation
   9 * Copyright (c) 2006 Silicon Graphics, Inc.
  10 * by Greg Banks <gnb@melbourne.sgi.com>
  11 */
  12
  13#include <linux/linkage.h>
  14#include <linux/sched.h>
  15#include <linux/errno.h>
  16#include <linux/net.h>
  17#include <linux/in.h>
  18#include <linux/mm.h>
  19#include <linux/interrupt.h>
  20#include <linux/module.h>
  21#include <linux/kthread.h>
  22#include <linux/slab.h>
  23
  24#include <linux/sunrpc/types.h>
  25#include <linux/sunrpc/xdr.h>
  26#include <linux/sunrpc/stats.h>
  27#include <linux/sunrpc/svcsock.h>
  28#include <linux/sunrpc/clnt.h>
  29#include <linux/sunrpc/bc_xprt.h>
  30
  31#define RPCDBG_FACILITY RPCDBG_SVCDSP
  32
  33static void svc_unregister(const struct svc_serv *serv);
  34
  35#define svc_serv_is_pooled(serv)    ((serv)->sv_function)
  36
  37/*
  38 * Mode for mapping cpus to pools.
  39 */
  40enum {
  41        SVC_POOL_AUTO = -1,     /* choose one of the others */
  42        SVC_POOL_GLOBAL,        /* no mapping, just a single global pool
  43                                 * (legacy & UP mode) */
  44        SVC_POOL_PERCPU,        /* one pool per cpu */
  45        SVC_POOL_PERNODE        /* one pool per numa node */
  46};
  47#define SVC_POOL_DEFAULT        SVC_POOL_GLOBAL
  48
  49/*
  50 * Structure for mapping cpus to pools and vice versa.
  51 * Setup once during sunrpc initialisation.
  52 */
  53static struct svc_pool_map {
  54        int count;                      /* How many svc_servs use us */
  55        int mode;                       /* Note: int not enum to avoid
  56                                         * warnings about "enumeration value
  57                                         * not handled in switch" */
  58        unsigned int npools;
  59        unsigned int *pool_to;          /* maps pool id to cpu or node */
  60        unsigned int *to_pool;          /* maps cpu or node to pool id */
  61} svc_pool_map = {
  62        .count = 0,
  63        .mode = SVC_POOL_DEFAULT
  64};
  65static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
  66
  67static int
  68param_set_pool_mode(const char *val, struct kernel_param *kp)
  69{
  70        int *ip = (int *)kp->arg;
  71        struct svc_pool_map *m = &svc_pool_map;
  72        int err;
  73
  74        mutex_lock(&svc_pool_map_mutex);
  75
  76        err = -EBUSY;
  77        if (m->count)
  78                goto out;
  79
  80        err = 0;
  81        if (!strncmp(val, "auto", 4))
  82                *ip = SVC_POOL_AUTO;
  83        else if (!strncmp(val, "global", 6))
  84                *ip = SVC_POOL_GLOBAL;
  85        else if (!strncmp(val, "percpu", 6))
  86                *ip = SVC_POOL_PERCPU;
  87        else if (!strncmp(val, "pernode", 7))
  88                *ip = SVC_POOL_PERNODE;
  89        else
  90                err = -EINVAL;
  91
  92out:
  93        mutex_unlock(&svc_pool_map_mutex);
  94        return err;
  95}
  96
  97static int
  98param_get_pool_mode(char *buf, struct kernel_param *kp)
  99{
 100        int *ip = (int *)kp->arg;
 101
 102        switch (*ip)
 103        {
 104        case SVC_POOL_AUTO:
 105                return strlcpy(buf, "auto", 20);
 106        case SVC_POOL_GLOBAL:
 107                return strlcpy(buf, "global", 20);
 108        case SVC_POOL_PERCPU:
 109                return strlcpy(buf, "percpu", 20);
 110        case SVC_POOL_PERNODE:
 111                return strlcpy(buf, "pernode", 20);
 112        default:
 113                return sprintf(buf, "%d", *ip);
 114        }
 115}
 116
 117module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
 118                 &svc_pool_map.mode, 0644);
 119
 120/*
 121 * Detect best pool mapping mode heuristically,
 122 * according to the machine's topology.
 123 */
 124static int
 125svc_pool_map_choose_mode(void)
 126{
 127        unsigned int node;
 128
 129        if (nr_online_nodes > 1) {
 130                /*
 131                 * Actually have multiple NUMA nodes,
 132                 * so split pools on NUMA node boundaries
 133                 */
 134                return SVC_POOL_PERNODE;
 135        }
 136
 137        node = first_online_node;
 138        if (nr_cpus_node(node) > 2) {
 139                /*
 140                 * Non-trivial SMP, or CONFIG_NUMA on
 141                 * non-NUMA hardware, e.g. with a generic
 142                 * x86_64 kernel on Xeons.  In this case we
 143                 * want to divide the pools on cpu boundaries.
 144                 */
 145                return SVC_POOL_PERCPU;
 146        }
 147
 148        /* default: one global pool */
 149        return SVC_POOL_GLOBAL;
 150}
 151
 152/*
 153 * Allocate the to_pool[] and pool_to[] arrays.
 154 * Returns 0 on success or an errno.
 155 */
 156static int
 157svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
 158{
 159        m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
 160        if (!m->to_pool)
 161                goto fail;
 162        m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
 163        if (!m->pool_to)
 164                goto fail_free;
 165
 166        return 0;
 167
 168fail_free:
 169        kfree(m->to_pool);
 170fail:
 171        return -ENOMEM;
 172}
 173
 174/*
 175 * Initialise the pool map for SVC_POOL_PERCPU mode.
 176 * Returns number of pools or <0 on error.
 177 */
 178static int
 179svc_pool_map_init_percpu(struct svc_pool_map *m)
 180{
 181        unsigned int maxpools = nr_cpu_ids;
 182        unsigned int pidx = 0;
 183        unsigned int cpu;
 184        int err;
 185
 186        err = svc_pool_map_alloc_arrays(m, maxpools);
 187        if (err)
 188                return err;
 189
 190        for_each_online_cpu(cpu) {
 191                BUG_ON(pidx > maxpools);
 192                m->to_pool[cpu] = pidx;
 193                m->pool_to[pidx] = cpu;
 194                pidx++;
 195        }
 196        /* cpus brought online later all get mapped to pool0, sorry */
 197
 198        return pidx;
 199};
 200
 201
 202/*
 203 * Initialise the pool map for SVC_POOL_PERNODE mode.
 204 * Returns number of pools or <0 on error.
 205 */
 206static int
 207svc_pool_map_init_pernode(struct svc_pool_map *m)
 208{
 209        unsigned int maxpools = nr_node_ids;
 210        unsigned int pidx = 0;
 211        unsigned int node;
 212        int err;
 213
 214        err = svc_pool_map_alloc_arrays(m, maxpools);
 215        if (err)
 216                return err;
 217
 218        for_each_node_with_cpus(node) {
 219                /* some architectures (e.g. SN2) have cpuless nodes */
 220                BUG_ON(pidx > maxpools);
 221                m->to_pool[node] = pidx;
 222                m->pool_to[pidx] = node;
 223                pidx++;
 224        }
 225        /* nodes brought online later all get mapped to pool0, sorry */
 226
 227        return pidx;
 228}
 229
 230
 231/*
 232 * Add a reference to the global map of cpus to pools (and
 233 * vice versa).  Initialise the map if we're the first user.
 234 * Returns the number of pools.
 235 */
 236static unsigned int
 237svc_pool_map_get(void)
 238{
 239        struct svc_pool_map *m = &svc_pool_map;
 240        int npools = -1;
 241
 242        mutex_lock(&svc_pool_map_mutex);
 243
 244        if (m->count++) {
 245                mutex_unlock(&svc_pool_map_mutex);
 246                return m->npools;
 247        }
 248
 249        if (m->mode == SVC_POOL_AUTO)
 250                m->mode = svc_pool_map_choose_mode();
 251
 252        switch (m->mode) {
 253        case SVC_POOL_PERCPU:
 254                npools = svc_pool_map_init_percpu(m);
 255                break;
 256        case SVC_POOL_PERNODE:
 257                npools = svc_pool_map_init_pernode(m);
 258                break;
 259        }
 260
 261        if (npools < 0) {
 262                /* default, or memory allocation failure */
 263                npools = 1;
 264                m->mode = SVC_POOL_GLOBAL;
 265        }
 266        m->npools = npools;
 267
 268        mutex_unlock(&svc_pool_map_mutex);
 269        return m->npools;
 270}
 271
 272
 273/*
 274 * Drop a reference to the global map of cpus to pools.
 275 * When the last reference is dropped, the map data is
 276 * freed; this allows the sysadmin to change the pool
 277 * mode using the pool_mode module option without
 278 * rebooting or re-loading sunrpc.ko.
 279 */
 280static void
 281svc_pool_map_put(void)
 282{
 283        struct svc_pool_map *m = &svc_pool_map;
 284
 285        mutex_lock(&svc_pool_map_mutex);
 286
 287        if (!--m->count) {
 288                m->mode = SVC_POOL_DEFAULT;
 289                kfree(m->to_pool);
 290                kfree(m->pool_to);
 291                m->npools = 0;
 292        }
 293
 294        mutex_unlock(&svc_pool_map_mutex);
 295}
 296
 297
 298/*
 299 * Set the given thread's cpus_allowed mask so that it
 300 * will only run on cpus in the given pool.
 301 */
 302static inline void
 303svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
 304{
 305        struct svc_pool_map *m = &svc_pool_map;
 306        unsigned int node = m->pool_to[pidx];
 307
 308        /*
 309         * The caller checks for sv_nrpools > 1, which
 310         * implies that we've been initialized.
 311         */
 312        BUG_ON(m->count == 0);
 313
 314        switch (m->mode) {
 315        case SVC_POOL_PERCPU:
 316        {
 317                set_cpus_allowed_ptr(task, cpumask_of(node));
 318                break;
 319        }
 320        case SVC_POOL_PERNODE:
 321        {
 322                set_cpus_allowed_ptr(task, cpumask_of_node(node));
 323                break;
 324        }
 325        }
 326}
 327
 328/*
 329 * Use the mapping mode to choose a pool for a given CPU.
 330 * Used when enqueueing an incoming RPC.  Always returns
 331 * a non-NULL pool pointer.
 332 */
 333struct svc_pool *
 334svc_pool_for_cpu(struct svc_serv *serv, int cpu)
 335{
 336        struct svc_pool_map *m = &svc_pool_map;
 337        unsigned int pidx = 0;
 338
 339        /*
 340         * An uninitialised map happens in a pure client when
 341         * lockd is brought up, so silently treat it the
 342         * same as SVC_POOL_GLOBAL.
 343         */
 344        if (svc_serv_is_pooled(serv)) {
 345                switch (m->mode) {
 346                case SVC_POOL_PERCPU:
 347                        pidx = m->to_pool[cpu];
 348                        break;
 349                case SVC_POOL_PERNODE:
 350                        pidx = m->to_pool[cpu_to_node(cpu)];
 351                        break;
 352                }
 353        }
 354        return &serv->sv_pools[pidx % serv->sv_nrpools];
 355}
 356
 357
 358/*
 359 * Create an RPC service
 360 */
 361static struct svc_serv *
 362__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 363             void (*shutdown)(struct svc_serv *serv))
 364{
 365        struct svc_serv *serv;
 366        unsigned int vers;
 367        unsigned int xdrsize;
 368        unsigned int i;
 369
 370        if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
 371                return NULL;
 372        serv->sv_name      = prog->pg_name;
 373        serv->sv_program   = prog;
 374        serv->sv_nrthreads = 1;
 375        serv->sv_stats     = prog->pg_stats;
 376        if (bufsize > RPCSVC_MAXPAYLOAD)
 377                bufsize = RPCSVC_MAXPAYLOAD;
 378        serv->sv_max_payload = bufsize? bufsize : 4096;
 379        serv->sv_max_mesg  = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
 380        serv->sv_shutdown  = shutdown;
 381        xdrsize = 0;
 382        while (prog) {
 383                prog->pg_lovers = prog->pg_nvers-1;
 384                for (vers=0; vers<prog->pg_nvers ; vers++)
 385                        if (prog->pg_vers[vers]) {
 386                                prog->pg_hivers = vers;
 387                                if (prog->pg_lovers > vers)
 388                                        prog->pg_lovers = vers;
 389                                if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
 390                                        xdrsize = prog->pg_vers[vers]->vs_xdrsize;
 391                        }
 392                prog = prog->pg_next;
 393        }
 394        serv->sv_xdrsize   = xdrsize;
 395        INIT_LIST_HEAD(&serv->sv_tempsocks);
 396        INIT_LIST_HEAD(&serv->sv_permsocks);
 397        init_timer(&serv->sv_temptimer);
 398        spin_lock_init(&serv->sv_lock);
 399
 400        serv->sv_nrpools = npools;
 401        serv->sv_pools =
 402                kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
 403                        GFP_KERNEL);
 404        if (!serv->sv_pools) {
 405                kfree(serv);
 406                return NULL;
 407        }
 408
 409        for (i = 0; i < serv->sv_nrpools; i++) {
 410                struct svc_pool *pool = &serv->sv_pools[i];
 411
 412                dprintk("svc: initialising pool %u for %s\n",
 413                                i, serv->sv_name);
 414
 415                pool->sp_id = i;
 416                INIT_LIST_HEAD(&pool->sp_threads);
 417                INIT_LIST_HEAD(&pool->sp_sockets);
 418                INIT_LIST_HEAD(&pool->sp_all_threads);
 419                spin_lock_init(&pool->sp_lock);
 420        }
 421
 422        /* Remove any stale portmap registrations */
 423        svc_unregister(serv);
 424
 425        return serv;
 426}
 427
 428struct svc_serv *
 429svc_create(struct svc_program *prog, unsigned int bufsize,
 430           void (*shutdown)(struct svc_serv *serv))
 431{
 432        return __svc_create(prog, bufsize, /*npools*/1, shutdown);
 433}
 434EXPORT_SYMBOL_GPL(svc_create);
 435
 436struct svc_serv *
 437svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 438                  void (*shutdown)(struct svc_serv *serv),
 439                  svc_thread_fn func, struct module *mod)
 440{
 441        struct svc_serv *serv;
 442        unsigned int npools = svc_pool_map_get();
 443
 444        serv = __svc_create(prog, bufsize, npools, shutdown);
 445
 446        if (serv != NULL) {
 447                serv->sv_function = func;
 448                serv->sv_module = mod;
 449        }
 450
 451        return serv;
 452}
 453EXPORT_SYMBOL_GPL(svc_create_pooled);
 454
 455/*
 456 * Destroy an RPC service. Should be called with appropriate locking to
 457 * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
 458 */
 459void
 460svc_destroy(struct svc_serv *serv)
 461{
 462        dprintk("svc: svc_destroy(%s, %d)\n",
 463                                serv->sv_program->pg_name,
 464                                serv->sv_nrthreads);
 465
 466        if (serv->sv_nrthreads) {
 467                if (--(serv->sv_nrthreads) != 0) {
 468                        svc_sock_update_bufs(serv);
 469                        return;
 470                }
 471        } else
 472                printk("svc_destroy: no threads for serv=%p!\n", serv);
 473
 474        del_timer_sync(&serv->sv_temptimer);
 475
 476        svc_close_all(&serv->sv_tempsocks);
 477
 478        if (serv->sv_shutdown)
 479                serv->sv_shutdown(serv);
 480
 481        svc_close_all(&serv->sv_permsocks);
 482
 483        BUG_ON(!list_empty(&serv->sv_permsocks));
 484        BUG_ON(!list_empty(&serv->sv_tempsocks));
 485
 486        cache_clean_deferred(serv);
 487
 488        if (svc_serv_is_pooled(serv))
 489                svc_pool_map_put();
 490
 491        svc_unregister(serv);
 492        kfree(serv->sv_pools);
 493        kfree(serv);
 494}
 495EXPORT_SYMBOL_GPL(svc_destroy);
 496
 497/*
 498 * Allocate an RPC server's buffer space.
 499 * We allocate pages and place them in rq_argpages.
 500 */
 501static int
 502svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
 503{
 504        unsigned int pages, arghi;
 505
 506        /* bc_xprt uses fore channel allocated buffers */
 507        if (svc_is_backchannel(rqstp))
 508                return 1;
 509
 510        pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
 511                                       * We assume one is at most one page
 512                                       */
 513        arghi = 0;
 514        BUG_ON(pages > RPCSVC_MAXPAGES);
 515        while (pages) {
 516                struct page *p = alloc_page(GFP_KERNEL);
 517                if (!p)
 518                        break;
 519                rqstp->rq_pages[arghi++] = p;
 520                pages--;
 521        }
 522        return pages == 0;
 523}
 524
 525/*
 526 * Release an RPC server buffer
 527 */
 528static void
 529svc_release_buffer(struct svc_rqst *rqstp)
 530{
 531        unsigned int i;
 532
 533        for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++)
 534                if (rqstp->rq_pages[i])
 535                        put_page(rqstp->rq_pages[i]);
 536}
 537
 538struct svc_rqst *
 539svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
 540{
 541        struct svc_rqst *rqstp;
 542
 543        rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
 544        if (!rqstp)
 545                goto out_enomem;
 546
 547        init_waitqueue_head(&rqstp->rq_wait);
 548
 549        serv->sv_nrthreads++;
 550        spin_lock_bh(&pool->sp_lock);
 551        pool->sp_nrthreads++;
 552        list_add(&rqstp->rq_all, &pool->sp_all_threads);
 553        spin_unlock_bh(&pool->sp_lock);
 554        rqstp->rq_server = serv;
 555        rqstp->rq_pool = pool;
 556
 557        rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
 558        if (!rqstp->rq_argp)
 559                goto out_thread;
 560
 561        rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
 562        if (!rqstp->rq_resp)
 563                goto out_thread;
 564
 565        if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
 566                goto out_thread;
 567
 568        return rqstp;
 569out_thread:
 570        svc_exit_thread(rqstp);
 571out_enomem:
 572        return ERR_PTR(-ENOMEM);
 573}
 574EXPORT_SYMBOL_GPL(svc_prepare_thread);
 575
 576/*
 577 * Choose a pool in which to create a new thread, for svc_set_num_threads
 578 */
 579static inline struct svc_pool *
 580choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 581{
 582        if (pool != NULL)
 583                return pool;
 584
 585        return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
 586}
 587
 588/*
 589 * Choose a thread to kill, for svc_set_num_threads
 590 */
 591static inline struct task_struct *
 592choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 593{
 594        unsigned int i;
 595        struct task_struct *task = NULL;
 596
 597        if (pool != NULL) {
 598                spin_lock_bh(&pool->sp_lock);
 599        } else {
 600                /* choose a pool in round-robin fashion */
 601                for (i = 0; i < serv->sv_nrpools; i++) {
 602                        pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
 603                        spin_lock_bh(&pool->sp_lock);
 604                        if (!list_empty(&pool->sp_all_threads))
 605                                goto found_pool;
 606                        spin_unlock_bh(&pool->sp_lock);
 607                }
 608                return NULL;
 609        }
 610
 611found_pool:
 612        if (!list_empty(&pool->sp_all_threads)) {
 613                struct svc_rqst *rqstp;
 614
 615                /*
 616                 * Remove from the pool->sp_all_threads list
 617                 * so we don't try to kill it again.
 618                 */
 619                rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
 620                list_del_init(&rqstp->rq_all);
 621                task = rqstp->rq_task;
 622        }
 623        spin_unlock_bh(&pool->sp_lock);
 624
 625        return task;
 626}
 627
 628/*
 629 * Create or destroy enough new threads to make the number
 630 * of threads the given number.  If `pool' is non-NULL, applies
 631 * only to threads in that pool, otherwise round-robins between
 632 * all pools.  Must be called with a svc_get() reference and
 633 * the BKL or another lock to protect access to svc_serv fields.
 634 *
 635 * Destroying threads relies on the service threads filling in
 636 * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
 637 * has been created using svc_create_pooled().
 638 *
 639 * Based on code that used to be in nfsd_svc() but tweaked
 640 * to be pool-aware.
 641 */
 642int
 643svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 644{
 645        struct svc_rqst *rqstp;
 646        struct task_struct *task;
 647        struct svc_pool *chosen_pool;
 648        int error = 0;
 649        unsigned int state = serv->sv_nrthreads-1;
 650
 651        if (pool == NULL) {
 652                /* The -1 assumes caller has done a svc_get() */
 653                nrservs -= (serv->sv_nrthreads-1);
 654        } else {
 655                spin_lock_bh(&pool->sp_lock);
 656                nrservs -= pool->sp_nrthreads;
 657                spin_unlock_bh(&pool->sp_lock);
 658        }
 659
 660        /* create new threads */
 661        while (nrservs > 0) {
 662                nrservs--;
 663                chosen_pool = choose_pool(serv, pool, &state);
 664
 665                rqstp = svc_prepare_thread(serv, chosen_pool);
 666                if (IS_ERR(rqstp)) {
 667                        error = PTR_ERR(rqstp);
 668                        break;
 669                }
 670
 671                __module_get(serv->sv_module);
 672                task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
 673                if (IS_ERR(task)) {
 674                        error = PTR_ERR(task);
 675                        module_put(serv->sv_module);
 676                        svc_exit_thread(rqstp);
 677                        break;
 678                }
 679
 680                rqstp->rq_task = task;
 681                if (serv->sv_nrpools > 1)
 682                        svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
 683
 684                svc_sock_update_bufs(serv);
 685                wake_up_process(task);
 686        }
 687        /* destroy old threads */
 688        while (nrservs < 0 &&
 689               (task = choose_victim(serv, pool, &state)) != NULL) {
 690                send_sig(SIGINT, task, 1);
 691                nrservs++;
 692        }
 693
 694        return error;
 695}
 696EXPORT_SYMBOL_GPL(svc_set_num_threads);
 697
 698/*
 699 * Called from a server thread as it's exiting. Caller must hold the BKL or
 700 * the "service mutex", whichever is appropriate for the service.
 701 */
 702void
 703svc_exit_thread(struct svc_rqst *rqstp)
 704{
 705        struct svc_serv *serv = rqstp->rq_server;
 706        struct svc_pool *pool = rqstp->rq_pool;
 707
 708        svc_release_buffer(rqstp);
 709        kfree(rqstp->rq_resp);
 710        kfree(rqstp->rq_argp);
 711        kfree(rqstp->rq_auth_data);
 712
 713        spin_lock_bh(&pool->sp_lock);
 714        pool->sp_nrthreads--;
 715        list_del(&rqstp->rq_all);
 716        spin_unlock_bh(&pool->sp_lock);
 717
 718        kfree(rqstp);
 719
 720        /* Release the server */
 721        if (serv)
 722                svc_destroy(serv);
 723}
 724EXPORT_SYMBOL_GPL(svc_exit_thread);
 725
 726/*
 727 * Register an "inet" protocol family netid with the local
 728 * rpcbind daemon via an rpcbind v4 SET request.
 729 *
 730 * No netconfig infrastructure is available in the kernel, so
 731 * we map IP_ protocol numbers to netids by hand.
 732 *
 733 * Returns zero on success; a negative errno value is returned
 734 * if any error occurs.
 735 */
 736static int __svc_rpcb_register4(const u32 program, const u32 version,
 737                                const unsigned short protocol,
 738                                const unsigned short port)
 739{
 740        const struct sockaddr_in sin = {
 741                .sin_family             = AF_INET,
 742                .sin_addr.s_addr        = htonl(INADDR_ANY),
 743                .sin_port               = htons(port),
 744        };
 745        const char *netid;
 746        int error;
 747
 748        switch (protocol) {
 749        case IPPROTO_UDP:
 750                netid = RPCBIND_NETID_UDP;
 751                break;
 752        case IPPROTO_TCP:
 753                netid = RPCBIND_NETID_TCP;
 754                break;
 755        default:
 756                return -ENOPROTOOPT;
 757        }
 758
 759        error = rpcb_v4_register(program, version,
 760                                        (const struct sockaddr *)&sin, netid);
 761
 762        /*
 763         * User space didn't support rpcbind v4, so retry this
 764         * registration request with the legacy rpcbind v2 protocol.
 765         */
 766        if (error == -EPROTONOSUPPORT)
 767                error = rpcb_register(program, version, protocol, port);
 768
 769        return error;
 770}
 771
 772#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 773/*
 774 * Register an "inet6" protocol family netid with the local
 775 * rpcbind daemon via an rpcbind v4 SET request.
 776 *
 777 * No netconfig infrastructure is available in the kernel, so
 778 * we map IP_ protocol numbers to netids by hand.
 779 *
 780 * Returns zero on success; a negative errno value is returned
 781 * if any error occurs.
 782 */
 783static int __svc_rpcb_register6(const u32 program, const u32 version,
 784                                const unsigned short protocol,
 785                                const unsigned short port)
 786{
 787        const struct sockaddr_in6 sin6 = {
 788                .sin6_family            = AF_INET6,
 789                .sin6_addr              = IN6ADDR_ANY_INIT,
 790                .sin6_port              = htons(port),
 791        };
 792        const char *netid;
 793        int error;
 794
 795        switch (protocol) {
 796        case IPPROTO_UDP:
 797                netid = RPCBIND_NETID_UDP6;
 798                break;
 799        case IPPROTO_TCP:
 800                netid = RPCBIND_NETID_TCP6;
 801                break;
 802        default:
 803                return -ENOPROTOOPT;
 804        }
 805
 806        error = rpcb_v4_register(program, version,
 807                                        (const struct sockaddr *)&sin6, netid);
 808
 809        /*
 810         * User space didn't support rpcbind version 4, so we won't
 811         * use a PF_INET6 listener.
 812         */
 813        if (error == -EPROTONOSUPPORT)
 814                error = -EAFNOSUPPORT;
 815
 816        return error;
 817}
 818#endif  /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 819
 820/*
 821 * Register a kernel RPC service via rpcbind version 4.
 822 *
 823 * Returns zero on success; a negative errno value is returned
 824 * if any error occurs.
 825 */
 826static int __svc_register(const char *progname,
 827                          const u32 program, const u32 version,
 828                          const int family,
 829                          const unsigned short protocol,
 830                          const unsigned short port)
 831{
 832        int error = -EAFNOSUPPORT;
 833
 834        switch (family) {
 835        case PF_INET:
 836                error = __svc_rpcb_register4(program, version,
 837                                                protocol, port);
 838                break;
 839#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 840        case PF_INET6:
 841                error = __svc_rpcb_register6(program, version,
 842                                                protocol, port);
 843#endif  /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 844        }
 845
 846        if (error < 0)
 847                printk(KERN_WARNING "svc: failed to register %sv%u RPC "
 848                        "service (errno %d).\n", progname, version, -error);
 849        return error;
 850}
 851
 852/**
 853 * svc_register - register an RPC service with the local portmapper
 854 * @serv: svc_serv struct for the service to register
 855 * @family: protocol family of service's listener socket
 856 * @proto: transport protocol number to advertise
 857 * @port: port to advertise
 858 *
 859 * Service is registered for any address in the passed-in protocol family
 860 */
 861int svc_register(const struct svc_serv *serv, const int family,
 862                 const unsigned short proto, const unsigned short port)
 863{
 864        struct svc_program      *progp;
 865        unsigned int            i;
 866        int                     error = 0;
 867
 868        BUG_ON(proto == 0 && port == 0);
 869
 870        for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 871                for (i = 0; i < progp->pg_nvers; i++) {
 872                        if (progp->pg_vers[i] == NULL)
 873                                continue;
 874
 875                        dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
 876                                        progp->pg_name,
 877                                        i,
 878                                        proto == IPPROTO_UDP?  "udp" : "tcp",
 879                                        port,
 880                                        family,
 881                                        progp->pg_vers[i]->vs_hidden?
 882                                                " (but not telling portmap)" : "");
 883
 884                        if (progp->pg_vers[i]->vs_hidden)
 885                                continue;
 886
 887                        error = __svc_register(progp->pg_name, progp->pg_prog,
 888                                                i, family, proto, port);
 889                        if (error < 0)
 890                                break;
 891                }
 892        }
 893
 894        return error;
 895}
 896
 897/*
 898 * If user space is running rpcbind, it should take the v4 UNSET
 899 * and clear everything for this [program, version].  If user space
 900 * is running portmap, it will reject the v4 UNSET, but won't have
 901 * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
 902 * in this case to clear all existing entries for [program, version].
 903 */
 904static void __svc_unregister(const u32 program, const u32 version,
 905                             const char *progname)
 906{
 907        int error;
 908
 909        error = rpcb_v4_register(program, version, NULL, "");
 910
 911        /*
 912         * User space didn't support rpcbind v4, so retry this
 913         * request with the legacy rpcbind v2 protocol.
 914         */
 915        if (error == -EPROTONOSUPPORT)
 916                error = rpcb_register(program, version, 0, 0);
 917
 918        dprintk("svc: %s(%sv%u), error %d\n",
 919                        __func__, progname, version, error);
 920}
 921
 922/*
 923 * All netids, bind addresses and ports registered for [program, version]
 924 * are removed from the local rpcbind database (if the service is not
 925 * hidden) to make way for a new instance of the service.
 926 *
 927 * The result of unregistration is reported via dprintk for those who want
 928 * verification of the result, but is otherwise not important.
 929 */
 930static void svc_unregister(const struct svc_serv *serv)
 931{
 932        struct svc_program *progp;
 933        unsigned long flags;
 934        unsigned int i;
 935
 936        clear_thread_flag(TIF_SIGPENDING);
 937
 938        for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 939                for (i = 0; i < progp->pg_nvers; i++) {
 940                        if (progp->pg_vers[i] == NULL)
 941                                continue;
 942                        if (progp->pg_vers[i]->vs_hidden)
 943                                continue;
 944
 945                        __svc_unregister(progp->pg_prog, i, progp->pg_name);
 946                }
 947        }
 948
 949        spin_lock_irqsave(&current->sighand->siglock, flags);
 950        recalc_sigpending();
 951        spin_unlock_irqrestore(&current->sighand->siglock, flags);
 952}
 953
 954/*
 955 * Printk the given error with the address of the client that caused it.
 956 */
 957static int
 958__attribute__ ((format (printf, 2, 3)))
 959svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 960{
 961        va_list args;
 962        int     r;
 963        char    buf[RPC_MAX_ADDRBUFLEN];
 964
 965        if (!net_ratelimit())
 966                return 0;
 967
 968        printk(KERN_WARNING "svc: %s: ",
 969                svc_print_addr(rqstp, buf, sizeof(buf)));
 970
 971        va_start(args, fmt);
 972        r = vprintk(fmt, args);
 973        va_end(args);
 974
 975        return r;
 976}
 977
 978/*
 979 * Common routine for processing the RPC request.
 980 */
 981static int
 982svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 983{
 984        struct svc_program      *progp;
 985        struct svc_version      *versp = NULL;  /* compiler food */
 986        struct svc_procedure    *procp = NULL;
 987        struct svc_serv         *serv = rqstp->rq_server;
 988        kxdrproc_t              xdr;
 989        __be32                  *statp;
 990        u32                     prog, vers, proc;
 991        __be32                  auth_stat, rpc_stat;
 992        int                     auth_res;
 993        __be32                  *reply_statp;
 994
 995        rpc_stat = rpc_success;
 996
 997        if (argv->iov_len < 6*4)
 998                goto err_short_len;
 999
1000        /* Will be turned off only in gss privacy case: */
1001        rqstp->rq_splice_ok = 1;
1002        /* Will be turned off only when NFSv4 Sessions are used */
1003        rqstp->rq_usedeferral = 1;
1004        rqstp->rq_dropme = false;
1005
1006        /* Setup reply header */
1007        rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
1008
1009        svc_putu32(resv, rqstp->rq_xid);
1010
1011        vers = svc_getnl(argv);
1012
1013        /* First words of reply: */
1014        svc_putnl(resv, 1);             /* REPLY */
1015
1016        if (vers != 2)          /* RPC version number */
1017                goto err_bad_rpc;
1018
1019        /* Save position in case we later decide to reject: */
1020        reply_statp = resv->iov_base + resv->iov_len;
1021
1022        svc_putnl(resv, 0);             /* ACCEPT */
1023
1024        rqstp->rq_prog = prog = svc_getnl(argv);        /* program number */
1025        rqstp->rq_vers = vers = svc_getnl(argv);        /* version number */
1026        rqstp->rq_proc = proc = svc_getnl(argv);        /* procedure number */
1027
1028        progp = serv->sv_program;
1029
1030        for (progp = serv->sv_program; progp; progp = progp->pg_next)
1031                if (prog == progp->pg_prog)
1032                        break;
1033
1034        /*
1035         * Decode auth data, and add verifier to reply buffer.
1036         * We do this before anything else in order to get a decent
1037         * auth verifier.
1038         */
1039        auth_res = svc_authenticate(rqstp, &auth_stat);
1040        /* Also give the program a chance to reject this call: */
1041        if (auth_res == SVC_OK && progp) {
1042                auth_stat = rpc_autherr_badcred;
1043                auth_res = progp->pg_authenticate(rqstp);
1044        }
1045        switch (auth_res) {
1046        case SVC_OK:
1047                break;
1048        case SVC_GARBAGE:
1049                goto err_garbage;
1050        case SVC_SYSERR:
1051                rpc_stat = rpc_system_err;
1052                goto err_bad;
1053        case SVC_DENIED:
1054                goto err_bad_auth;
1055        case SVC_CLOSE:
1056                if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
1057                        svc_close_xprt(rqstp->rq_xprt);
1058        case SVC_DROP:
1059                goto dropit;
1060        case SVC_COMPLETE:
1061                goto sendit;
1062        }
1063
1064        if (progp == NULL)
1065                goto err_bad_prog;
1066
1067        if (vers >= progp->pg_nvers ||
1068          !(versp = progp->pg_vers[vers]))
1069                goto err_bad_vers;
1070
1071        procp = versp->vs_proc + proc;
1072        if (proc >= versp->vs_nproc || !procp->pc_func)
1073                goto err_bad_proc;
1074        rqstp->rq_procinfo = procp;
1075
1076        /* Syntactic check complete */
1077        serv->sv_stats->rpccnt++;
1078
1079        /* Build the reply header. */
1080        statp = resv->iov_base +resv->iov_len;
1081        svc_putnl(resv, RPC_SUCCESS);
1082
1083        /* Bump per-procedure stats counter */
1084        procp->pc_count++;
1085
1086        /* Initialize storage for argp and resp */
1087        memset(rqstp->rq_argp, 0, procp->pc_argsize);
1088        memset(rqstp->rq_resp, 0, procp->pc_ressize);
1089
1090        /* un-reserve some of the out-queue now that we have a
1091         * better idea of reply size
1092         */
1093        if (procp->pc_xdrressize)
1094                svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
1095
1096        /* Call the function that processes the request. */
1097        if (!versp->vs_dispatch) {
1098                /* Decode arguments */
1099                xdr = procp->pc_decode;
1100                if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
1101                        goto err_garbage;
1102
1103                *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
1104
1105                /* Encode reply */
1106                if (rqstp->rq_dropme) {
1107                        if (procp->pc_release)
1108                                procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1109                        goto dropit;
1110                }
1111                if (*statp == rpc_success &&
1112                    (xdr = procp->pc_encode) &&
1113                    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
1114                        dprintk("svc: failed to encode reply\n");
1115                        /* serv->sv_stats->rpcsystemerr++; */
1116                        *statp = rpc_system_err;
1117                }
1118        } else {
1119                dprintk("svc: calling dispatcher\n");
1120                if (!versp->vs_dispatch(rqstp, statp)) {
1121                        /* Release reply info */
1122                        if (procp->pc_release)
1123                                procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1124                        goto dropit;
1125                }
1126        }
1127
1128        /* Check RPC status result */
1129        if (*statp != rpc_success)
1130                resv->iov_len = ((void*)statp)  - resv->iov_base + 4;
1131
1132        /* Release reply info */
1133        if (procp->pc_release)
1134                procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1135
1136        if (procp->pc_encode == NULL)
1137                goto dropit;
1138
1139 sendit:
1140        if (svc_authorise(rqstp))
1141                goto dropit;
1142        return 1;               /* Caller can now send it */
1143
1144 dropit:
1145        svc_authorise(rqstp);   /* doesn't hurt to call this twice */
1146        dprintk("svc: svc_process dropit\n");
1147        return 0;
1148
1149err_short_len:
1150        svc_printk(rqstp, "short len %Zd, dropping request\n",
1151                        argv->iov_len);
1152
1153        goto dropit;                    /* drop request */
1154
1155err_bad_rpc:
1156        serv->sv_stats->rpcbadfmt++;
1157        svc_putnl(resv, 1);     /* REJECT */
1158        svc_putnl(resv, 0);     /* RPC_MISMATCH */
1159        svc_putnl(resv, 2);     /* Only RPCv2 supported */
1160        svc_putnl(resv, 2);
1161        goto sendit;
1162
1163err_bad_auth:
1164        dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
1165        serv->sv_stats->rpcbadauth++;
1166        /* Restore write pointer to location of accept status: */
1167        xdr_ressize_check(rqstp, reply_statp);
1168        svc_putnl(resv, 1);     /* REJECT */
1169        svc_putnl(resv, 1);     /* AUTH_ERROR */
1170        svc_putnl(resv, ntohl(auth_stat));      /* status */
1171        goto sendit;
1172
1173err_bad_prog:
1174        dprintk("svc: unknown program %d\n", prog);
1175        serv->sv_stats->rpcbadfmt++;
1176        svc_putnl(resv, RPC_PROG_UNAVAIL);
1177        goto sendit;
1178
1179err_bad_vers:
1180        svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
1181                       vers, prog, progp->pg_name);
1182
1183        serv->sv_stats->rpcbadfmt++;
1184        svc_putnl(resv, RPC_PROG_MISMATCH);
1185        svc_putnl(resv, progp->pg_lovers);
1186        svc_putnl(resv, progp->pg_hivers);
1187        goto sendit;
1188
1189err_bad_proc:
1190        svc_printk(rqstp, "unknown procedure (%d)\n", proc);
1191
1192        serv->sv_stats->rpcbadfmt++;
1193        svc_putnl(resv, RPC_PROC_UNAVAIL);
1194        goto sendit;
1195
1196err_garbage:
1197        svc_printk(rqstp, "failed to decode args\n");
1198
1199        rpc_stat = rpc_garbage_args;
1200err_bad:
1201        serv->sv_stats->rpcbadfmt++;
1202        svc_putnl(resv, ntohl(rpc_stat));
1203        goto sendit;
1204}
1205EXPORT_SYMBOL_GPL(svc_process);
1206
1207/*
1208 * Process the RPC request.
1209 */
1210int
1211svc_process(struct svc_rqst *rqstp)
1212{
1213        struct kvec             *argv = &rqstp->rq_arg.head[0];
1214        struct kvec             *resv = &rqstp->rq_res.head[0];
1215        struct svc_serv         *serv = rqstp->rq_server;
1216        u32                     dir;
1217
1218        /*
1219         * Setup response xdr_buf.
1220         * Initially it has just one page
1221         */
1222        rqstp->rq_resused = 1;
1223        resv->iov_base = page_address(rqstp->rq_respages[0]);
1224        resv->iov_len = 0;
1225        rqstp->rq_res.pages = rqstp->rq_respages + 1;
1226        rqstp->rq_res.len = 0;
1227        rqstp->rq_res.page_base = 0;
1228        rqstp->rq_res.page_len = 0;
1229        rqstp->rq_res.buflen = PAGE_SIZE;
1230        rqstp->rq_res.tail[0].iov_base = NULL;
1231        rqstp->rq_res.tail[0].iov_len = 0;
1232
1233        rqstp->rq_xid = svc_getu32(argv);
1234
1235        dir  = svc_getnl(argv);
1236        if (dir != 0) {
1237                /* direction != CALL */
1238                svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
1239                serv->sv_stats->rpcbadfmt++;
1240                svc_drop(rqstp);
1241                return 0;
1242        }
1243
1244        /* Returns 1 for send, 0 for drop */
1245        if (svc_process_common(rqstp, argv, resv))
1246                return svc_send(rqstp);
1247        else {
1248                svc_drop(rqstp);
1249                return 0;
1250        }
1251}
1252
1253#if defined(CONFIG_NFS_V4_1)
1254/*
1255 * Process a backchannel RPC request that arrived over an existing
1256 * outbound connection
1257 */
1258int
1259bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1260               struct svc_rqst *rqstp)
1261{
1262        struct kvec     *argv = &rqstp->rq_arg.head[0];
1263        struct kvec     *resv = &rqstp->rq_res.head[0];
1264
1265        /* Build the svc_rqst used by the common processing routine */
1266        rqstp->rq_xprt = serv->sv_bc_xprt;
1267        rqstp->rq_xid = req->rq_xid;
1268        rqstp->rq_prot = req->rq_xprt->prot;
1269        rqstp->rq_server = serv;
1270
1271        rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
1272        memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
1273        memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
1274        memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
1275
1276        /* reset result send buffer "put" position */
1277        resv->iov_len = 0;
1278
1279        if (rqstp->rq_prot != IPPROTO_TCP) {
1280                printk(KERN_ERR "No support for Non-TCP transports!\n");
1281                BUG();
1282        }
1283
1284        /*
1285         * Skip the next two words because they've already been
1286         * processed in the trasport
1287         */
1288        svc_getu32(argv);       /* XID */
1289        svc_getnl(argv);        /* CALLDIR */
1290
1291        /* Returns 1 for send, 0 for drop */
1292        if (svc_process_common(rqstp, argv, resv)) {
1293                memcpy(&req->rq_snd_buf, &rqstp->rq_res,
1294                                                sizeof(req->rq_snd_buf));
1295                return bc_send(req);
1296        } else {
1297                /* Nothing to do to drop request */
1298                return 0;
1299        }
1300}
1301EXPORT_SYMBOL(bc_svc_process);
1302#endif /* CONFIG_NFS_V4_1 */
1303
1304/*
1305 * Return (transport-specific) limit on the rpc payload.
1306 */
1307u32 svc_max_payload(const struct svc_rqst *rqstp)
1308{
1309        u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
1310
1311        if (rqstp->rq_server->sv_max_payload < max)
1312                max = rqstp->rq_server->sv_max_payload;
1313        return max;
1314}
1315EXPORT_SYMBOL_GPL(svc_max_payload);
1316
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.