linux/net/sunrpc/xprtsock.c
<<
>>
Prefs
   1/*
   2 * linux/net/sunrpc/xprtsock.c
   3 *
   4 * Client-side transport implementation for sockets.
   5 *
   6 * TCP callback races fixes (C) 1998 Red Hat
   7 * TCP send fixes (C) 1998 Red Hat
   8 * TCP NFS related read + write fixes
   9 *  (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
  10 *
  11 * Rewrite of larges part of the code in order to stabilize TCP stuff.
  12 * Fix behaviour when socket buffer is full.
  13 *  (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
  14 *
  15 * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
  16 *
  17 * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005.
  18 *   <gilles.quillard@bull.net>
  19 */
  20
  21#include <linux/types.h>
  22#include <linux/slab.h>
  23#include <linux/module.h>
  24#include <linux/capability.h>
  25#include <linux/pagemap.h>
  26#include <linux/errno.h>
  27#include <linux/socket.h>
  28#include <linux/in.h>
  29#include <linux/net.h>
  30#include <linux/mm.h>
  31#include <linux/udp.h>
  32#include <linux/tcp.h>
  33#include <linux/sunrpc/clnt.h>
  34#include <linux/sunrpc/sched.h>
  35#include <linux/sunrpc/svcsock.h>
  36#include <linux/sunrpc/xprtsock.h>
  37#include <linux/file.h>
  38#ifdef CONFIG_NFS_V4_1
  39#include <linux/sunrpc/bc_xprt.h>
  40#endif
  41
  42#include <net/sock.h>
  43#include <net/checksum.h>
  44#include <net/udp.h>
  45#include <net/tcp.h>
  46
  47#include "sunrpc.h"
  48/*
  49 * xprtsock tunables
  50 */
  51unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
  52unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
  53
  54unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
  55unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
  56
  57#define XS_TCP_LINGER_TO        (15U * HZ)
  58static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
  59
  60/*
  61 * We can register our own files under /proc/sys/sunrpc by
  62 * calling register_sysctl_table() again.  The files in that
  63 * directory become the union of all files registered there.
  64 *
  65 * We simply need to make sure that we don't collide with
  66 * someone else's file names!
  67 */
  68
  69#ifdef RPC_DEBUG
  70
  71static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
  72static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
  73static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
  74static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
  75
  76static struct ctl_table_header *sunrpc_table_header;
  77
  78/*
  79 * FIXME: changing the UDP slot table size should also resize the UDP
  80 *        socket buffers for existing UDP transports
  81 */
  82static ctl_table xs_tunables_table[] = {
  83        {
  84                .procname       = "udp_slot_table_entries",
  85                .data           = &xprt_udp_slot_table_entries,
  86                .maxlen         = sizeof(unsigned int),
  87                .mode           = 0644,
  88                .proc_handler   = proc_dointvec_minmax,
  89                .extra1         = &min_slot_table_size,
  90                .extra2         = &max_slot_table_size
  91        },
  92        {
  93                .procname       = "tcp_slot_table_entries",
  94                .data           = &xprt_tcp_slot_table_entries,
  95                .maxlen         = sizeof(unsigned int),
  96                .mode           = 0644,
  97                .proc_handler   = proc_dointvec_minmax,
  98                .extra1         = &min_slot_table_size,
  99                .extra2         = &max_slot_table_size
 100        },
 101        {
 102                .procname       = "min_resvport",
 103                .data           = &xprt_min_resvport,
 104                .maxlen         = sizeof(unsigned int),
 105                .mode           = 0644,
 106                .proc_handler   = proc_dointvec_minmax,
 107                .extra1         = &xprt_min_resvport_limit,
 108                .extra2         = &xprt_max_resvport_limit
 109        },
 110        {
 111                .procname       = "max_resvport",
 112                .data           = &xprt_max_resvport,
 113                .maxlen         = sizeof(unsigned int),
 114                .mode           = 0644,
 115                .proc_handler   = proc_dointvec_minmax,
 116                .extra1         = &xprt_min_resvport_limit,
 117                .extra2         = &xprt_max_resvport_limit
 118        },
 119        {
 120                .procname       = "tcp_fin_timeout",
 121                .data           = &xs_tcp_fin_timeout,
 122                .maxlen         = sizeof(xs_tcp_fin_timeout),
 123                .mode           = 0644,
 124                .proc_handler   = proc_dointvec_jiffies,
 125        },
 126        { },
 127};
 128
 129static ctl_table sunrpc_table[] = {
 130        {
 131                .procname       = "sunrpc",
 132                .mode           = 0555,
 133                .child          = xs_tunables_table
 134        },
 135        { },
 136};
 137
 138#endif
 139
 140/*
 141 * Wait duration for a reply from the RPC portmapper.
 142 */
 143#define XS_BIND_TO              (60U * HZ)
 144
 145/*
 146 * Delay if a UDP socket connect error occurs.  This is most likely some
 147 * kind of resource problem on the local host.
 148 */
 149#define XS_UDP_REEST_TO         (2U * HZ)
 150
 151/*
 152 * The reestablish timeout allows clients to delay for a bit before attempting
 153 * to reconnect to a server that just dropped our connection.
 154 *
 155 * We implement an exponential backoff when trying to reestablish a TCP
 156 * transport connection with the server.  Some servers like to drop a TCP
 157 * connection when they are overworked, so we start with a short timeout and
 158 * increase over time if the server is down or not responding.
 159 */
 160#define XS_TCP_INIT_REEST_TO    (3U * HZ)
 161#define XS_TCP_MAX_REEST_TO     (5U * 60 * HZ)
 162
 163/*
 164 * TCP idle timeout; client drops the transport socket if it is idle
 165 * for this long.  Note that we also timeout UDP sockets to prevent
 166 * holding port numbers when there is no RPC traffic.
 167 */
 168#define XS_IDLE_DISC_TO         (5U * 60 * HZ)
 169
 170#ifdef RPC_DEBUG
 171# undef  RPC_DEBUG_DATA
 172# define RPCDBG_FACILITY        RPCDBG_TRANS
 173#endif
 174
 175#ifdef RPC_DEBUG_DATA
 176static void xs_pktdump(char *msg, u32 *packet, unsigned int count)
 177{
 178        u8 *buf = (u8 *) packet;
 179        int j;
 180
 181        dprintk("RPC:       %s\n", msg);
 182        for (j = 0; j < count && j < 128; j += 4) {
 183                if (!(j & 31)) {
 184                        if (j)
 185                                dprintk("\n");
 186                        dprintk("0x%04x ", j);
 187                }
 188                dprintk("%02x%02x%02x%02x ",
 189                        buf[j], buf[j+1], buf[j+2], buf[j+3]);
 190        }
 191        dprintk("\n");
 192}
 193#else
 194static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
 195{
 196        /* NOP */
 197}
 198#endif
 199
 200struct sock_xprt {
 201        struct rpc_xprt         xprt;
 202
 203        /*
 204         * Network layer
 205         */
 206        struct socket *         sock;
 207        struct sock *           inet;
 208
 209        /*
 210         * State of TCP reply receive
 211         */
 212        __be32                  tcp_fraghdr,
 213                                tcp_xid,
 214                                tcp_calldir;
 215
 216        u32                     tcp_offset,
 217                                tcp_reclen;
 218
 219        unsigned long           tcp_copied,
 220                                tcp_flags;
 221
 222        /*
 223         * Connection of transports
 224         */
 225        struct delayed_work     connect_worker;
 226        struct sockaddr_storage srcaddr;
 227        unsigned short          srcport;
 228
 229        /*
 230         * UDP socket buffer size parameters
 231         */
 232        size_t                  rcvsize,
 233                                sndsize;
 234
 235        /*
 236         * Saved socket callback addresses
 237         */
 238        void                    (*old_data_ready)(struct sock *, int);
 239        void                    (*old_state_change)(struct sock *);
 240        void                    (*old_write_space)(struct sock *);
 241        void                    (*old_error_report)(struct sock *);
 242};
 243
 244/*
 245 * TCP receive state flags
 246 */
 247#define TCP_RCV_LAST_FRAG       (1UL << 0)
 248#define TCP_RCV_COPY_FRAGHDR    (1UL << 1)
 249#define TCP_RCV_COPY_XID        (1UL << 2)
 250#define TCP_RCV_COPY_DATA       (1UL << 3)
 251#define TCP_RCV_READ_CALLDIR    (1UL << 4)
 252#define TCP_RCV_COPY_CALLDIR    (1UL << 5)
 253
 254/*
 255 * TCP RPC flags
 256 */
 257#define TCP_RPC_REPLY           (1UL << 6)
 258
 259static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
 260{
 261        return (struct sockaddr *) &xprt->addr;
 262}
 263
 264static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
 265{
 266        return (struct sockaddr_in *) &xprt->addr;
 267}
 268
 269static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
 270{
 271        return (struct sockaddr_in6 *) &xprt->addr;
 272}
 273
 274static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
 275{
 276        struct sockaddr *sap = xs_addr(xprt);
 277        struct sockaddr_in6 *sin6;
 278        struct sockaddr_in *sin;
 279        char buf[128];
 280
 281        (void)rpc_ntop(sap, buf, sizeof(buf));
 282        xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
 283
 284        switch (sap->sa_family) {
 285        case AF_INET:
 286                sin = xs_addr_in(xprt);
 287                snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
 288                break;
 289        case AF_INET6:
 290                sin6 = xs_addr_in6(xprt);
 291                snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
 292                break;
 293        default:
 294                BUG();
 295        }
 296        xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
 297}
 298
 299static void xs_format_common_peer_ports(struct rpc_xprt *xprt)
 300{
 301        struct sockaddr *sap = xs_addr(xprt);
 302        char buf[128];
 303
 304        snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
 305        xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 306
 307        snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
 308        xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 309}
 310
 311static void xs_format_peer_addresses(struct rpc_xprt *xprt,
 312                                     const char *protocol,
 313                                     const char *netid)
 314{
 315        xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
 316        xprt->address_strings[RPC_DISPLAY_NETID] = netid;
 317        xs_format_common_peer_addresses(xprt);
 318        xs_format_common_peer_ports(xprt);
 319}
 320
 321static void xs_update_peer_port(struct rpc_xprt *xprt)
 322{
 323        kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
 324        kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
 325
 326        xs_format_common_peer_ports(xprt);
 327}
 328
 329static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 330{
 331        unsigned int i;
 332
 333        for (i = 0; i < RPC_DISPLAY_MAX; i++)
 334                switch (i) {
 335                case RPC_DISPLAY_PROTO:
 336                case RPC_DISPLAY_NETID:
 337                        continue;
 338                default:
 339                        kfree(xprt->address_strings[i]);
 340                }
 341}
 342
 343#define XS_SENDMSG_FLAGS        (MSG_DONTWAIT | MSG_NOSIGNAL)
 344
 345static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
 346{
 347        struct msghdr msg = {
 348                .msg_name       = addr,
 349                .msg_namelen    = addrlen,
 350                .msg_flags      = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
 351        };
 352        struct kvec iov = {
 353                .iov_base       = vec->iov_base + base,
 354                .iov_len        = vec->iov_len - base,
 355        };
 356
 357        if (iov.iov_len != 0)
 358                return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
 359        return kernel_sendmsg(sock, &msg, NULL, 0, 0);
 360}
 361
 362static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more)
 363{
 364        struct page **ppage;
 365        unsigned int remainder;
 366        int err, sent = 0;
 367
 368        remainder = xdr->page_len - base;
 369        base += xdr->page_base;
 370        ppage = xdr->pages + (base >> PAGE_SHIFT);
 371        base &= ~PAGE_MASK;
 372        for(;;) {
 373                unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
 374                int flags = XS_SENDMSG_FLAGS;
 375
 376                remainder -= len;
 377                if (remainder != 0 || more)
 378                        flags |= MSG_MORE;
 379                err = sock->ops->sendpage(sock, *ppage, base, len, flags);
 380                if (remainder == 0 || err != len)
 381                        break;
 382                sent += err;
 383                ppage++;
 384                base = 0;
 385        }
 386        if (sent == 0)
 387                return err;
 388        if (err > 0)
 389                sent += err;
 390        return sent;
 391}
 392
 393/**
 394 * xs_sendpages - write pages directly to a socket
 395 * @sock: socket to send on
 396 * @addr: UDP only -- address of destination
 397 * @addrlen: UDP only -- length of destination address
 398 * @xdr: buffer containing this request
 399 * @base: starting position in the buffer
 400 *
 401 */
 402static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
 403{
 404        unsigned int remainder = xdr->len - base;
 405        int err, sent = 0;
 406
 407        if (unlikely(!sock))
 408                return -ENOTSOCK;
 409
 410        clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
 411        if (base != 0) {
 412                addr = NULL;
 413                addrlen = 0;
 414        }
 415
 416        if (base < xdr->head[0].iov_len || addr != NULL) {
 417                unsigned int len = xdr->head[0].iov_len - base;
 418                remainder -= len;
 419                err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
 420                if (remainder == 0 || err != len)
 421                        goto out;
 422                sent += err;
 423                base = 0;
 424        } else
 425                base -= xdr->head[0].iov_len;
 426
 427        if (base < xdr->page_len) {
 428                unsigned int len = xdr->page_len - base;
 429                remainder -= len;
 430                err = xs_send_pagedata(sock, xdr, base, remainder != 0);
 431                if (remainder == 0 || err != len)
 432                        goto out;
 433                sent += err;
 434                base = 0;
 435        } else
 436                base -= xdr->page_len;
 437
 438        if (base >= xdr->tail[0].iov_len)
 439                return sent;
 440        err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
 441out:
 442        if (sent == 0)
 443                return err;
 444        if (err > 0)
 445                sent += err;
 446        return sent;
 447}
 448
 449static void xs_nospace_callback(struct rpc_task *task)
 450{
 451        struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
 452
 453        transport->inet->sk_write_pending--;
 454        clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 455}
 456
 457/**
 458 * xs_nospace - place task on wait queue if transmit was incomplete
 459 * @task: task to put to sleep
 460 *
 461 */
 462static int xs_nospace(struct rpc_task *task)
 463{
 464        struct rpc_rqst *req = task->tk_rqstp;
 465        struct rpc_xprt *xprt = req->rq_xprt;
 466        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 467        int ret = 0;
 468
 469        dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
 470                        task->tk_pid, req->rq_slen - req->rq_bytes_sent,
 471                        req->rq_slen);
 472
 473        /* Protect against races with write_space */
 474        spin_lock_bh(&xprt->transport_lock);
 475
 476        /* Don't race with disconnect */
 477        if (xprt_connected(xprt)) {
 478                if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
 479                        ret = -EAGAIN;
 480                        /*
 481                         * Notify TCP that we're limited by the application
 482                         * window size
 483                         */
 484                        set_bit(SOCK_NOSPACE, &transport->sock->flags);
 485                        transport->inet->sk_write_pending++;
 486                        /* ...and wait for more buffer space */
 487                        xprt_wait_for_buffer_space(task, xs_nospace_callback);
 488                }
 489        } else {
 490                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 491                ret = -ENOTCONN;
 492        }
 493
 494        spin_unlock_bh(&xprt->transport_lock);
 495        return ret;
 496}
 497
 498/**
 499 * xs_udp_send_request - write an RPC request to a UDP socket
 500 * @task: address of RPC task that manages the state of an RPC request
 501 *
 502 * Return values:
 503 *        0:    The request has been sent
 504 *   EAGAIN:    The socket was blocked, please call again later to
 505 *              complete the request
 506 * ENOTCONN:    Caller needs to invoke connect logic then call again
 507 *    other:    Some other error occured, the request was not sent
 508 */
 509static int xs_udp_send_request(struct rpc_task *task)
 510{
 511        struct rpc_rqst *req = task->tk_rqstp;
 512        struct rpc_xprt *xprt = req->rq_xprt;
 513        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 514        struct xdr_buf *xdr = &req->rq_snd_buf;
 515        int status;
 516
 517        xs_pktdump("packet data:",
 518                                req->rq_svec->iov_base,
 519                                req->rq_svec->iov_len);
 520
 521        if (!xprt_bound(xprt))
 522                return -ENOTCONN;
 523        status = xs_sendpages(transport->sock,
 524                              xs_addr(xprt),
 525                              xprt->addrlen, xdr,
 526                              req->rq_bytes_sent);
 527
 528        dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
 529                        xdr->len - req->rq_bytes_sent, status);
 530
 531        if (status >= 0) {
 532                req->rq_xmit_bytes_sent += status;
 533                if (status >= req->rq_slen)
 534                        return 0;
 535                /* Still some bytes left; set up for a retry later. */
 536                status = -EAGAIN;
 537        }
 538
 539        switch (status) {
 540        case -ENOTSOCK:
 541                status = -ENOTCONN;
 542                /* Should we call xs_close() here? */
 543                break;
 544        case -EAGAIN:
 545                status = xs_nospace(task);
 546                break;
 547        default:
 548                dprintk("RPC:       sendmsg returned unrecognized error %d\n",
 549                        -status);
 550        case -ENETUNREACH:
 551        case -EPIPE:
 552        case -ECONNREFUSED:
 553                /* When the server has died, an ICMP port unreachable message
 554                 * prompts ECONNREFUSED. */
 555                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 556        }
 557
 558        return status;
 559}
 560
 561/**
 562 * xs_tcp_shutdown - gracefully shut down a TCP socket
 563 * @xprt: transport
 564 *
 565 * Initiates a graceful shutdown of the TCP socket by calling the
 566 * equivalent of shutdown(SHUT_WR);
 567 */
 568static void xs_tcp_shutdown(struct rpc_xprt *xprt)
 569{
 570        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 571        struct socket *sock = transport->sock;
 572
 573        if (sock != NULL)
 574                kernel_sock_shutdown(sock, SHUT_WR);
 575}
 576
 577static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
 578{
 579        u32 reclen = buf->len - sizeof(rpc_fraghdr);
 580        rpc_fraghdr *base = buf->head[0].iov_base;
 581        *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
 582}
 583
 584/**
 585 * xs_tcp_send_request - write an RPC request to a TCP socket
 586 * @task: address of RPC task that manages the state of an RPC request
 587 *
 588 * Return values:
 589 *        0:    The request has been sent
 590 *   EAGAIN:    The socket was blocked, please call again later to
 591 *              complete the request
 592 * ENOTCONN:    Caller needs to invoke connect logic then call again
 593 *    other:    Some other error occured, the request was not sent
 594 *
 595 * XXX: In the case of soft timeouts, should we eventually give up
 596 *      if sendmsg is not able to make progress?
 597 */
 598static int xs_tcp_send_request(struct rpc_task *task)
 599{
 600        struct rpc_rqst *req = task->tk_rqstp;
 601        struct rpc_xprt *xprt = req->rq_xprt;
 602        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 603        struct xdr_buf *xdr = &req->rq_snd_buf;
 604        int status;
 605
 606        xs_encode_tcp_record_marker(&req->rq_snd_buf);
 607
 608        xs_pktdump("packet data:",
 609                                req->rq_svec->iov_base,
 610                                req->rq_svec->iov_len);
 611
 612        /* Continue transmitting the packet/record. We must be careful
 613         * to cope with writespace callbacks arriving _after_ we have
 614         * called sendmsg(). */
 615        while (1) {
 616                status = xs_sendpages(transport->sock,
 617                                        NULL, 0, xdr, req->rq_bytes_sent);
 618
 619                dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
 620                                xdr->len - req->rq_bytes_sent, status);
 621
 622                if (unlikely(status < 0))
 623                        break;
 624
 625                /* If we've sent the entire packet, immediately
 626                 * reset the count of bytes sent. */
 627                req->rq_bytes_sent += status;
 628                req->rq_xmit_bytes_sent += status;
 629                if (likely(req->rq_bytes_sent >= req->rq_slen)) {
 630                        req->rq_bytes_sent = 0;
 631                        return 0;
 632                }
 633
 634                if (status != 0)
 635                        continue;
 636                status = -EAGAIN;
 637                break;
 638        }
 639
 640        switch (status) {
 641        case -ENOTSOCK:
 642                status = -ENOTCONN;
 643                /* Should we call xs_close() here? */
 644                break;
 645        case -EAGAIN:
 646                status = xs_nospace(task);
 647                break;
 648        default:
 649                dprintk("RPC:       sendmsg returned unrecognized error %d\n",
 650                        -status);
 651        case -ECONNRESET:
 652        case -EPIPE:
 653                xs_tcp_shutdown(xprt);
 654        case -ECONNREFUSED:
 655        case -ENOTCONN:
 656                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 657        }
 658
 659        return status;
 660}
 661
 662/**
 663 * xs_tcp_release_xprt - clean up after a tcp transmission
 664 * @xprt: transport
 665 * @task: rpc task
 666 *
 667 * This cleans up if an error causes us to abort the transmission of a request.
 668 * In this case, the socket may need to be reset in order to avoid confusing
 669 * the server.
 670 */
 671static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 672{
 673        struct rpc_rqst *req;
 674
 675        if (task != xprt->snd_task)
 676                return;
 677        if (task == NULL)
 678                goto out_release;
 679        req = task->tk_rqstp;
 680        if (req->rq_bytes_sent == 0)
 681                goto out_release;
 682        if (req->rq_bytes_sent == req->rq_snd_buf.len)
 683                goto out_release;
 684        set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state);
 685out_release:
 686        xprt_release_xprt(xprt, task);
 687}
 688
 689static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
 690{
 691        transport->old_data_ready = sk->sk_data_ready;
 692        transport->old_state_change = sk->sk_state_change;
 693        transport->old_write_space = sk->sk_write_space;
 694        transport->old_error_report = sk->sk_error_report;
 695}
 696
 697static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
 698{
 699        sk->sk_data_ready = transport->old_data_ready;
 700        sk->sk_state_change = transport->old_state_change;
 701        sk->sk_write_space = transport->old_write_space;
 702        sk->sk_error_report = transport->old_error_report;
 703}
 704
 705static void xs_reset_transport(struct sock_xprt *transport)
 706{
 707        struct socket *sock = transport->sock;
 708        struct sock *sk = transport->inet;
 709
 710        if (sk == NULL)
 711                return;
 712
 713        write_lock_bh(&sk->sk_callback_lock);
 714        transport->inet = NULL;
 715        transport->sock = NULL;
 716
 717        sk->sk_user_data = NULL;
 718
 719        xs_restore_old_callbacks(transport, sk);
 720        write_unlock_bh(&sk->sk_callback_lock);
 721
 722        sk->sk_no_check = 0;
 723
 724        sock_release(sock);
 725}
 726
 727/**
 728 * xs_close - close a socket
 729 * @xprt: transport
 730 *
 731 * This is used when all requests are complete; ie, no DRC state remains
 732 * on the server we want to save.
 733 *
 734 * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
 735 * xs_reset_transport() zeroing the socket from underneath a writer.
 736 */
 737static void xs_close(struct rpc_xprt *xprt)
 738{
 739        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 740
 741        dprintk("RPC:       xs_close xprt %p\n", xprt);
 742
 743        xs_reset_transport(transport);
 744        xprt->reestablish_timeout = 0;
 745
 746        smp_mb__before_clear_bit();
 747        clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
 748        clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 749        clear_bit(XPRT_CLOSING, &xprt->state);
 750        smp_mb__after_clear_bit();
 751        xprt_disconnect_done(xprt);
 752}
 753
 754static void xs_tcp_close(struct rpc_xprt *xprt)
 755{
 756        if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
 757                xs_close(xprt);
 758        else
 759                xs_tcp_shutdown(xprt);
 760}
 761
 762/**
 763 * xs_destroy - prepare to shutdown a transport
 764 * @xprt: doomed transport
 765 *
 766 */
 767static void xs_destroy(struct rpc_xprt *xprt)
 768{
 769        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 770
 771        dprintk("RPC:       xs_destroy xprt %p\n", xprt);
 772
 773        cancel_rearming_delayed_work(&transport->connect_worker);
 774
 775        xs_close(xprt);
 776        xs_free_peer_addresses(xprt);
 777        kfree(xprt->slot);
 778        kfree(xprt);
 779        module_put(THIS_MODULE);
 780}
 781
 782static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
 783{
 784        return (struct rpc_xprt *) sk->sk_user_data;
 785}
 786
 787/**
 788 * xs_udp_data_ready - "data ready" callback for UDP sockets
 789 * @sk: socket with data to read
 790 * @len: how much data to read
 791 *
 792 */
 793static void xs_udp_data_ready(struct sock *sk, int len)
 794{
 795        struct rpc_task *task;
 796        struct rpc_xprt *xprt;
 797        struct rpc_rqst *rovr;
 798        struct sk_buff *skb;
 799        int err, repsize, copied;
 800        u32 _xid;
 801        __be32 *xp;
 802
 803        read_lock(&sk->sk_callback_lock);
 804        dprintk("RPC:       xs_udp_data_ready...\n");
 805        if (!(xprt = xprt_from_sock(sk)))
 806                goto out;
 807
 808        if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
 809                goto out;
 810
 811        if (xprt->shutdown)
 812                goto dropit;
 813
 814        repsize = skb->len - sizeof(struct udphdr);
 815        if (repsize < 4) {
 816                dprintk("RPC:       impossible RPC reply size %d!\n", repsize);
 817                goto dropit;
 818        }
 819
 820        /* Copy the XID from the skb... */
 821        xp = skb_header_pointer(skb, sizeof(struct udphdr),
 822                                sizeof(_xid), &_xid);
 823        if (xp == NULL)
 824                goto dropit;
 825
 826        /* Look up and lock the request corresponding to the given XID */
 827        spin_lock(&xprt->transport_lock);
 828        rovr = xprt_lookup_rqst(xprt, *xp);
 829        if (!rovr)
 830                goto out_unlock;
 831        task = rovr->rq_task;
 832
 833        if ((copied = rovr->rq_private_buf.buflen) > repsize)
 834                copied = repsize;
 835
 836        /* Suck it into the iovec, verify checksum if not done by hw. */
 837        if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
 838                UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS);
 839                goto out_unlock;
 840        }
 841
 842        UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
 843
 844        /* Something worked... */
 845        dst_confirm(skb_dst(skb));
 846
 847        xprt_adjust_cwnd(task, copied);
 848        xprt_complete_rqst(task, copied);
 849
 850 out_unlock:
 851        spin_unlock(&xprt->transport_lock);
 852 dropit:
 853        skb_free_datagram(sk, skb);
 854 out:
 855        read_unlock(&sk->sk_callback_lock);
 856}
 857
 858static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
 859{
 860        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 861        size_t len, used;
 862        char *p;
 863
 864        p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
 865        len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
 866        used = xdr_skb_read_bits(desc, p, len);
 867        transport->tcp_offset += used;
 868        if (used != len)
 869                return;
 870
 871        transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
 872        if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
 873                transport->tcp_flags |= TCP_RCV_LAST_FRAG;
 874        else
 875                transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
 876        transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
 877
 878        transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
 879        transport->tcp_offset = 0;
 880
 881        /* Sanity check of the record length */
 882        if (unlikely(transport->tcp_reclen < 8)) {
 883                dprintk("RPC:       invalid TCP record fragment length\n");
 884                xprt_force_disconnect(xprt);
 885                return;
 886        }
 887        dprintk("RPC:       reading TCP record fragment of length %d\n",
 888                        transport->tcp_reclen);
 889}
 890
 891static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
 892{
 893        if (transport->tcp_offset == transport->tcp_reclen) {
 894                transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
 895                transport->tcp_offset = 0;
 896                if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
 897                        transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
 898                        transport->tcp_flags |= TCP_RCV_COPY_XID;
 899                        transport->tcp_copied = 0;
 900                }
 901        }
 902}
 903
 904static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
 905{
 906        size_t len, used;
 907        char *p;
 908
 909        len = sizeof(transport->tcp_xid) - transport->tcp_offset;
 910        dprintk("RPC:       reading XID (%Zu bytes)\n", len);
 911        p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
 912        used = xdr_skb_read_bits(desc, p, len);
 913        transport->tcp_offset += used;
 914        if (used != len)
 915                return;
 916        transport->tcp_flags &= ~TCP_RCV_COPY_XID;
 917        transport->tcp_flags |= TCP_RCV_READ_CALLDIR;
 918        transport->tcp_copied = 4;
 919        dprintk("RPC:       reading %s XID %08x\n",
 920                        (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for"
 921                                                              : "request with",
 922                        ntohl(transport->tcp_xid));
 923        xs_tcp_check_fraghdr(transport);
 924}
 925
 926static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
 927                                       struct xdr_skb_reader *desc)
 928{
 929        size_t len, used;
 930        u32 offset;
 931        char *p;
 932
 933        /*
 934         * We want transport->tcp_offset to be 8 at the end of this routine
 935         * (4 bytes for the xid and 4 bytes for the call/reply flag).
 936         * When this function is called for the first time,
 937         * transport->tcp_offset is 4 (after having already read the xid).
 938         */
 939        offset = transport->tcp_offset - sizeof(transport->tcp_xid);
 940        len = sizeof(transport->tcp_calldir) - offset;
 941        dprintk("RPC:       reading CALL/REPLY flag (%Zu bytes)\n", len);
 942        p = ((char *) &transport->tcp_calldir) + offset;
 943        used = xdr_skb_read_bits(desc, p, len);
 944        transport->tcp_offset += used;
 945        if (used != len)
 946                return;
 947        transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
 948        /*
 949         * We don't yet have the XDR buffer, so we will write the calldir
 950         * out after we get the buffer from the 'struct rpc_rqst'
 951         */
 952        switch (ntohl(transport->tcp_calldir)) {
 953        case RPC_REPLY:
 954                transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
 955                transport->tcp_flags |= TCP_RCV_COPY_DATA;
 956                transport->tcp_flags |= TCP_RPC_REPLY;
 957                break;
 958        case RPC_CALL:
 959                transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
 960                transport->tcp_flags |= TCP_RCV_COPY_DATA;
 961                transport->tcp_flags &= ~TCP_RPC_REPLY;
 962                break;
 963        default:
 964                dprintk("RPC:       invalid request message type\n");
 965                xprt_force_disconnect(&transport->xprt);
 966        }
 967        xs_tcp_check_fraghdr(transport);
 968}
 969
 970static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 971                                     struct xdr_skb_reader *desc,
 972                                     struct rpc_rqst *req)
 973{
 974        struct sock_xprt *transport =
 975                                container_of(xprt, struct sock_xprt, xprt);
 976        struct xdr_buf *rcvbuf;
 977        size_t len;
 978        ssize_t r;
 979
 980        rcvbuf = &req->rq_private_buf;
 981
 982        if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) {
 983                /*
 984                 * Save the RPC direction in the XDR buffer
 985                 */
 986                memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
 987                        &transport->tcp_calldir,
 988                        sizeof(transport->tcp_calldir));
 989                transport->tcp_copied += sizeof(transport->tcp_calldir);
 990                transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
 991        }
 992
 993        len = desc->count;
 994        if (len > transport->tcp_reclen - transport->tcp_offset) {
 995                struct xdr_skb_reader my_desc;
 996
 997                len = transport->tcp_reclen - transport->tcp_offset;
 998                memcpy(&my_desc, desc, sizeof(my_desc));
 999                my_desc.count = len;
1000                r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
1001                                          &my_desc, xdr_skb_read_bits);
1002                desc->count -= r;
1003                desc->offset += r;
1004        } else
1005                r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
1006                                          desc, xdr_skb_read_bits);
1007
1008        if (r > 0) {
1009                transport->tcp_copied += r;
1010                transport->tcp_offset += r;
1011        }
1012        if (r != len) {
1013                /* Error when copying to the receive buffer,
1014                 * usually because we weren't able to allocate
1015                 * additional buffer pages. All we can do now
1016                 * is turn off TCP_RCV_COPY_DATA, so the request
1017                 * will not receive any additional updates,
1018                 * and time out.
1019                 * Any remaining data from this record will
1020                 * be discarded.
1021                 */
1022                transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
1023                dprintk("RPC:       XID %08x truncated request\n",
1024                                ntohl(transport->tcp_xid));
1025                dprintk("RPC:       xprt = %p, tcp_copied = %lu, "
1026                                "tcp_offset = %u, tcp_reclen = %u\n",
1027                                xprt, transport->tcp_copied,
1028                                transport->tcp_offset, transport->tcp_reclen);
1029                return;
1030        }
1031
1032        dprintk("RPC:       XID %08x read %Zd bytes\n",
1033                        ntohl(transport->tcp_xid), r);
1034        dprintk("RPC:       xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
1035                        "tcp_reclen = %u\n", xprt, transport->tcp_copied,
1036                        transport->tcp_offset, transport->tcp_reclen);
1037
1038        if (transport->tcp_copied == req->rq_private_buf.buflen)
1039                transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
1040        else if (transport->tcp_offset == transport->tcp_reclen) {
1041                if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
1042                        transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
1043        }
1044}
1045
1046/*
1047 * Finds the request corresponding to the RPC xid and invokes the common
1048 * tcp read code to read the data.
1049 */
1050static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
1051                                    struct xdr_skb_reader *desc)
1052{
1053        struct sock_xprt *transport =
1054                                container_of(xprt, struct sock_xprt, xprt);
1055        struct rpc_rqst *req;
1056
1057        dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
1058
1059        /* Find and lock the request corresponding to this xid */
1060        spin_lock(&xprt->transport_lock);
1061        req = xprt_lookup_rqst(xprt, transport->tcp_xid);
1062        if (!req) {
1063                dprintk("RPC:       XID %08x request not found!\n",
1064                                ntohl(transport->tcp_xid));
1065                spin_unlock(&xprt->transport_lock);
1066                return -1;
1067        }
1068
1069        xs_tcp_read_common(xprt, desc, req);
1070
1071        if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
1072                xprt_complete_rqst(req->rq_task, transport->tcp_copied);
1073
1074        spin_unlock(&xprt->transport_lock);
1075        return 0;
1076}
1077
1078#if defined(CONFIG_NFS_V4_1)
1079/*
1080 * Obtains an rpc_rqst previously allocated and invokes the common
1081 * tcp read code to read the data.  The result is placed in the callback
1082 * queue.
1083 * If we're unable to obtain the rpc_rqst we schedule the closing of the
1084 * connection and return -1.
1085 */
1086static inline int xs_tcp_read_callback(struct rpc_xprt *xprt,
1087                                       struct xdr_skb_reader *desc)
1088{
1089        struct sock_xprt *transport =
1090                                container_of(xprt, struct sock_xprt, xprt);
1091        struct rpc_rqst *req;
1092
1093        req = xprt_alloc_bc_request(xprt);
1094        if (req == NULL) {
1095                printk(KERN_WARNING "Callback slot table overflowed\n");
1096                xprt_force_disconnect(xprt);
1097                return -1;
1098        }
1099
1100        req->rq_xid = transport->tcp_xid;
1101        dprintk("RPC:       read callback  XID %08x\n", ntohl(req->rq_xid));
1102        xs_tcp_read_common(xprt, desc, req);
1103
1104        if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) {
1105                struct svc_serv *bc_serv = xprt->bc_serv;
1106
1107                /*
1108                 * Add callback request to callback list.  The callback
1109                 * service sleeps on the sv_cb_waitq waiting for new
1110                 * requests.  Wake it up after adding enqueing the
1111                 * request.
1112                 */
1113                dprintk("RPC:       add callback request to list\n");
1114                spin_lock(&bc_serv->sv_cb_lock);
1115                list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
1116                spin_unlock(&bc_serv->sv_cb_lock);
1117                wake_up(&bc_serv->sv_cb_waitq);
1118        }
1119
1120        req->rq_private_buf.len = transport->tcp_copied;
1121
1122        return 0;
1123}
1124
1125static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
1126                                        struct xdr_skb_reader *desc)
1127{
1128        struct sock_xprt *transport =
1129                                container_of(xprt, struct sock_xprt, xprt);
1130
1131        return (transport->tcp_flags & TCP_RPC_REPLY) ?
1132                xs_tcp_read_reply(xprt, desc) :
1133                xs_tcp_read_callback(xprt, desc);
1134}
1135#else
1136static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
1137                                        struct xdr_skb_reader *desc)
1138{
1139        return xs_tcp_read_reply(xprt, desc);
1140}
1141#endif /* CONFIG_NFS_V4_1 */
1142
1143/*
1144 * Read data off the transport.  This can be either an RPC_CALL or an
1145 * RPC_REPLY.  Relay the processing to helper functions.
1146 */
1147static void xs_tcp_read_data(struct rpc_xprt *xprt,
1148                                    struct xdr_skb_reader *desc)
1149{
1150        struct sock_xprt *transport =
1151                                container_of(xprt, struct sock_xprt, xprt);
1152
1153        if (_xs_tcp_read_data(xprt, desc) == 0)
1154                xs_tcp_check_fraghdr(transport);
1155        else {
1156                /*
1157                 * The transport_lock protects the request handling.
1158                 * There's no need to hold it to update the tcp_flags.
1159                 */
1160                transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
1161        }
1162}
1163
1164static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
1165{
1166        size_t len;
1167
1168        len = transport->tcp_reclen - transport->tcp_offset;
1169        if (len > desc->count)
1170                len = desc->count;
1171        desc->count -= len;
1172        desc->offset += len;
1173        transport->tcp_offset += len;
1174        dprintk("RPC:       discarded %Zu bytes\n", len);
1175        xs_tcp_check_fraghdr(transport);
1176}
1177
1178static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
1179{
1180        struct rpc_xprt *xprt = rd_desc->arg.data;
1181        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1182        struct xdr_skb_reader desc = {
1183                .skb    = skb,
1184                .offset = offset,
1185                .count  = len,
1186        };
1187
1188        dprintk("RPC:       xs_tcp_data_recv started\n");
1189        do {
1190                /* Read in a new fragment marker if necessary */
1191                /* Can we ever really expect to get completely empty fragments? */
1192                if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
1193                        xs_tcp_read_fraghdr(xprt, &desc);
1194                        continue;
1195                }
1196                /* Read in the xid if necessary */
1197                if (transport->tcp_flags & TCP_RCV_COPY_XID) {
1198                        xs_tcp_read_xid(transport, &desc);
1199                        continue;
1200                }
1201                /* Read in the call/reply flag */
1202                if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) {
1203                        xs_tcp_read_calldir(transport, &desc);
1204                        continue;
1205                }
1206                /* Read in the request data */
1207                if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
1208                        xs_tcp_read_data(xprt, &desc);
1209                        continue;
1210                }
1211                /* Skip over any trailing bytes on short reads */
1212                xs_tcp_read_discard(transport, &desc);
1213        } while (desc.count);
1214        dprintk("RPC:       xs_tcp_data_recv done\n");
1215        return len - desc.count;
1216}
1217
1218/**
1219 * xs_tcp_data_ready - "data ready" callback for TCP sockets
1220 * @sk: socket with data to read
1221 * @bytes: how much data to read
1222 *
1223 */
1224static void xs_tcp_data_ready(struct sock *sk, int bytes)
1225{
1226        struct rpc_xprt *xprt;
1227        read_descriptor_t rd_desc;
1228        int read;
1229
1230        dprintk("RPC:       xs_tcp_data_ready...\n");
1231
1232        read_lock(&sk->sk_callback_lock);
1233        if (!(xprt = xprt_from_sock(sk)))
1234                goto out;
1235        if (xprt->shutdown)
1236                goto out;
1237
1238        /* Any data means we had a useful conversation, so
1239         * the we don't need to delay the next reconnect
1240         */
1241        if (xprt->reestablish_timeout)
1242                xprt->reestablish_timeout = 0;
1243
1244        /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
1245        rd_desc.arg.data = xprt;
1246        do {
1247                rd_desc.count = 65536;
1248                read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
1249        } while (read > 0);
1250out:
1251        read_unlock(&sk->sk_callback_lock);
1252}
1253
1254/*
1255 * Do the equivalent of linger/linger2 handling for dealing with
1256 * broken servers that don't close the socket in a timely
1257 * fashion
1258 */
1259static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
1260                unsigned long timeout)
1261{
1262        struct sock_xprt *transport;
1263
1264        if (xprt_test_and_set_connecting(xprt))
1265                return;
1266        set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1267        transport = container_of(xprt, struct sock_xprt, xprt);
1268        queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
1269                           timeout);
1270}
1271
1272static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
1273{
1274        struct sock_xprt *transport;
1275
1276        transport = container_of(xprt, struct sock_xprt, xprt);
1277
1278        if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
1279            !cancel_delayed_work(&transport->connect_worker))
1280                return;
1281        clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1282        xprt_clear_connecting(xprt);
1283}
1284
1285static void xs_sock_mark_closed(struct rpc_xprt *xprt)
1286{
1287        smp_mb__before_clear_bit();
1288        clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1289        clear_bit(XPRT_CLOSING, &xprt->state);
1290        smp_mb__after_clear_bit();
1291        /* Mark transport as closed and wake up all pending tasks */
1292        xprt_disconnect_done(xprt);
1293}
1294
1295/**
1296 * xs_tcp_state_change - callback to handle TCP socket state changes
1297 * @sk: socket whose state has changed
1298 *
1299 */
1300static void xs_tcp_state_change(struct sock *sk)
1301{
1302        struct rpc_xprt *xprt;
1303
1304        read_lock(&sk->sk_callback_lock);
1305        if (!(xprt = xprt_from_sock(sk)))
1306                goto out;
1307        dprintk("RPC:       xs_tcp_state_change client %p...\n", xprt);
1308        dprintk("RPC:       state %x conn %d dead %d zapped %d\n",
1309                        sk->sk_state, xprt_connected(xprt),
1310                        sock_flag(sk, SOCK_DEAD),
1311                        sock_flag(sk, SOCK_ZAPPED));
1312
1313        switch (sk->sk_state) {
1314        case TCP_ESTABLISHED:
1315                spin_lock_bh(&xprt->transport_lock);
1316                if (!xprt_test_and_set_connected(xprt)) {
1317                        struct sock_xprt *transport = container_of(xprt,
1318                                        struct sock_xprt, xprt);
1319
1320                        /* Reset TCP record info */
1321                        transport->tcp_offset = 0;
1322                        transport->tcp_reclen = 0;
1323                        transport->tcp_copied = 0;
1324                        transport->tcp_flags =
1325                                TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
1326
1327                        xprt_wake_pending_tasks(xprt, -EAGAIN);
1328                }
1329                spin_unlock_bh(&xprt->transport_lock);
1330                break;
1331        case TCP_FIN_WAIT1:
1332                /* The client initiated a shutdown of the socket */
1333                xprt->connect_cookie++;
1334                xprt->reestablish_timeout = 0;
1335                set_bit(XPRT_CLOSING, &xprt->state);
1336                smp_mb__before_clear_bit();
1337                clear_bit(XPRT_CONNECTED, &xprt->state);
1338                clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1339                smp_mb__after_clear_bit();
1340                xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1341                break;
1342        case TCP_CLOSE_WAIT:
1343                /* The server initiated a shutdown of the socket */
1344                xprt_force_disconnect(xprt);
1345        case TCP_SYN_SENT:
1346                xprt->connect_cookie++;
1347        case TCP_CLOSING:
1348                /*
1349                 * If the server closed down the connection, make sure that
1350                 * we back off before reconnecting
1351                 */
1352                if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
1353                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1354                break;
1355        case TCP_LAST_ACK:
1356                set_bit(XPRT_CLOSING, &xprt->state);
1357                xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1358                smp_mb__before_clear_bit();
1359                clear_bit(XPRT_CONNECTED, &xprt->state);
1360                smp_mb__after_clear_bit();
1361                break;
1362        case TCP_CLOSE:
1363                xs_tcp_cancel_linger_timeout(xprt);
1364                xs_sock_mark_closed(xprt);
1365        }
1366 out:
1367        read_unlock(&sk->sk_callback_lock);
1368}
1369
1370/**
1371 * xs_error_report - callback mainly for catching socket errors
1372 * @sk: socket
1373 */
1374static void xs_error_report(struct sock *sk)
1375{
1376        struct rpc_xprt *xprt;
1377
1378        read_lock(&sk->sk_callback_lock);
1379        if (!(xprt = xprt_from_sock(sk)))
1380                goto out;
1381        dprintk("RPC:       %s client %p...\n"
1382                        "RPC:       error %d\n",
1383                        __func__, xprt, sk->sk_err);
1384        xprt_wake_pending_tasks(xprt, -EAGAIN);
1385out:
1386        read_unlock(&sk->sk_callback_lock);
1387}
1388
1389static void xs_write_space(struct sock *sk)
1390{
1391        struct socket *sock;
1392        struct rpc_xprt *xprt;
1393
1394        if (unlikely(!(sock = sk->sk_socket)))
1395                return;
1396        clear_bit(SOCK_NOSPACE, &sock->flags);
1397
1398        if (unlikely(!(xprt = xprt_from_sock(sk))))
1399                return;
1400        if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
1401                return;
1402
1403        xprt_write_space(xprt);
1404}
1405
1406/**
1407 * xs_udp_write_space - callback invoked when socket buffer space
1408 *                             becomes available
1409 * @sk: socket whose state has changed
1410 *
1411 * Called when more output buffer space is available for this socket.
1412 * We try not to wake our writers until they can make "significant"
1413 * progress, otherwise we'll waste resources thrashing kernel_sendmsg
1414 * with a bunch of small requests.
1415 */
1416static void xs_udp_write_space(struct sock *sk)
1417{
1418        read_lock(&sk->sk_callback_lock);
1419
1420        /* from net/core/sock.c:sock_def_write_space */
1421        if (sock_writeable(sk))
1422                xs_write_space(sk);
1423
1424        read_unlock(&sk->sk_callback_lock);
1425}
1426
1427/**
1428 * xs_tcp_write_space - callback invoked when socket buffer space
1429 *                             becomes available
1430 * @sk: socket whose state has changed
1431 *
1432 * Called when more output buffer space is available for this socket.
1433 * We try not to wake our writers until they can make "significant"
1434 * progress, otherwise we'll waste resources thrashing kernel_sendmsg
1435 * with a bunch of small requests.
1436 */
1437static void xs_tcp_write_space(struct sock *sk)
1438{
1439        read_lock(&sk->sk_callback_lock);
1440
1441        /* from net/core/stream.c:sk_stream_write_space */
1442        if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
1443                xs_write_space(sk);
1444
1445        read_unlock(&sk->sk_callback_lock);
1446}
1447
1448static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
1449{
1450        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1451        struct sock *sk = transport->inet;
1452
1453        if (transport->rcvsize) {
1454                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
1455                sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2;
1456        }
1457        if (transport->sndsize) {
1458                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1459                sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2;
1460                sk->sk_write_space(sk);
1461        }
1462}
1463
1464/**
1465 * xs_udp_set_buffer_size - set send and receive limits
1466 * @xprt: generic transport
1467 * @sndsize: requested size of send buffer, in bytes
1468 * @rcvsize: requested size of receive buffer, in bytes
1469 *
1470 * Set socket send and receive buffer size limits.
1471 */
1472static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize)
1473{
1474        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1475
1476        transport->sndsize = 0;
1477        if (sndsize)
1478                transport->sndsize = sndsize + 1024;
1479        transport->rcvsize = 0;
1480        if (rcvsize)
1481                transport->rcvsize = rcvsize + 1024;
1482
1483        xs_udp_do_set_buffer_size(xprt);
1484}
1485
1486/**
1487 * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport
1488 * @task: task that timed out
1489 *
1490 * Adjust the congestion window after a retransmit timeout has occurred.
1491 */
1492static void xs_udp_timer(struct rpc_task *task)
1493{
1494        xprt_adjust_cwnd(task, -ETIMEDOUT);
1495}
1496
1497static unsigned short xs_get_random_port(void)
1498{
1499        unsigned short range = xprt_max_resvport - xprt_min_resvport;
1500        unsigned short rand = (unsigned short) net_random() % range;
1501        return rand + xprt_min_resvport;
1502}
1503
1504/**
1505 * xs_set_port - reset the port number in the remote endpoint address
1506 * @xprt: generic transport
1507 * @port: new port number
1508 *
1509 */
1510static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1511{
1512        dprintk("RPC:       setting port for xprt %p to %u\n", xprt, port);
1513
1514        rpc_set_port(xs_addr(xprt), port);
1515        xs_update_peer_port(xprt);
1516}
1517
1518static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock)
1519{
1520        unsigned short port = transport->srcport;
1521
1522        if (port == 0 && transport->xprt.resvport)
1523                port = xs_get_random_port();
1524        return port;
1525}
1526
1527static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port)
1528{
1529        if (transport->srcport != 0)
1530                transport->srcport = 0;
1531        if (!transport->xprt.resvport)
1532                return 0;
1533        if (port <= xprt_min_resvport || port > xprt_max_resvport)
1534                return xprt_max_resvport;
1535        return --port;
1536}
1537
1538static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
1539{
1540        struct sockaddr_in myaddr = {
1541                .sin_family = AF_INET,
1542        };
1543        struct sockaddr_in *sa;
1544        int err, nloop = 0;
1545        unsigned short port = xs_get_srcport(transport, sock);
1546        unsigned short last;
1547
1548        sa = (struct sockaddr_in *)&transport->srcaddr;
1549        myaddr.sin_addr = sa->sin_addr;
1550        do {
1551                myaddr.sin_port = htons(port);
1552                err = kernel_bind(sock, (struct sockaddr *) &myaddr,
1553                                                sizeof(myaddr));
1554                if (port == 0)
1555                        break;
1556                if (err == 0) {
1557                        transport->srcport = port;
1558                        break;
1559                }
1560                last = port;
1561                port = xs_next_srcport(transport, sock, port);
1562                if (port > last)
1563                        nloop++;
1564        } while (err == -EADDRINUSE && nloop != 2);
1565        dprintk("RPC:       %s %pI4:%u: %s (%d)\n",
1566                        __func__, &myaddr.sin_addr,
1567                        port, err ? "failed" : "ok", err);
1568        return err;
1569}
1570
1571static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
1572{
1573        struct sockaddr_in6 myaddr = {
1574                .sin6_family = AF_INET6,
1575        };
1576        struct sockaddr_in6 *sa;
1577        int err, nloop = 0;
1578        unsigned short port = xs_get_srcport(transport, sock);
1579        unsigned short last;
1580
1581        sa = (struct sockaddr_in6 *)&transport->srcaddr;
1582        myaddr.sin6_addr = sa->sin6_addr;
1583        do {
1584                myaddr.sin6_port = htons(port);
1585                err = kernel_bind(sock, (struct sockaddr *) &myaddr,
1586                                                sizeof(myaddr));
1587                if (port == 0)
1588                        break;
1589                if (err == 0) {
1590                        transport->srcport = port;
1591                        break;
1592                }
1593                last = port;
1594                port = xs_next_srcport(transport, sock, port);
1595                if (port > last)
1596                        nloop++;
1597        } while (err == -EADDRINUSE && nloop != 2);
1598        dprintk("RPC:       xs_bind6 %pI6:%u: %s (%d)\n",
1599                &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
1600        return err;
1601}
1602
1603#ifdef CONFIG_DEBUG_LOCK_ALLOC
1604static struct lock_class_key xs_key[2];
1605static struct lock_class_key xs_slock_key[2];
1606
1607static inline void xs_reclassify_socket4(struct socket *sock)
1608{
1609        struct sock *sk = sock->sk;
1610
1611        BUG_ON(sock_owned_by_user(sk));
1612        sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
1613                &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
1614}
1615
1616static inline void xs_reclassify_socket6(struct socket *sock)
1617{
1618        struct sock *sk = sock->sk;
1619
1620        BUG_ON(sock_owned_by_user(sk));
1621        sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
1622                &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
1623}
1624#else
1625static inline void xs_reclassify_socket4(struct socket *sock)
1626{
1627}
1628
1629static inline void xs_reclassify_socket6(struct socket *sock)
1630{
1631}
1632#endif
1633
1634static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1635{
1636        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1637
1638        if (!transport->inet) {
1639                struct sock *sk = sock->sk;
1640
1641                write_lock_bh(&sk->sk_callback_lock);
1642
1643                xs_save_old_callbacks(transport, sk);
1644
1645                sk->sk_user_data = xprt;
1646                sk->sk_data_ready = xs_udp_data_ready;
1647                sk->sk_write_space = xs_udp_write_space;
1648                sk->sk_error_report = xs_error_report;
1649                sk->sk_no_check = UDP_CSUM_NORCV;
1650                sk->sk_allocation = GFP_ATOMIC;
1651
1652                xprt_set_connected(xprt);
1653
1654                /* Reset to new socket */
1655                transport->sock = sock;
1656                transport->inet = sk;
1657
1658                write_unlock_bh(&sk->sk_callback_lock);
1659        }
1660        xs_udp_do_set_buffer_size(xprt);
1661}
1662
1663/**
1664 * xs_udp_connect_worker4 - set up a UDP socket
1665 * @work: RPC transport to connect
1666 *
1667 * Invoked by a work queue tasklet.
1668 */
1669static void xs_udp_connect_worker4(struct work_struct *work)
1670{
1671        struct sock_xprt *transport =
1672                container_of(work, struct sock_xprt, connect_worker.work);
1673        struct rpc_xprt *xprt = &transport->xprt;
1674        struct socket *sock = transport->sock;
1675        int err, status = -EIO;
1676
1677        if (xprt->shutdown)
1678                goto out;
1679
1680        /* Start by resetting any existing state */
1681        xs_reset_transport(transport);
1682
1683        err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1684        if (err < 0) {
1685                dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
1686                goto out;
1687        }
1688        xs_reclassify_socket4(sock);
1689
1690        if (xs_bind4(transport, sock)) {
1691                sock_release(sock);
1692                goto out;
1693        }
1694
1695        dprintk("RPC:       worker connecting xprt %p via %s to "
1696                                "%s (port %s)\n", xprt,
1697                        xprt->address_strings[RPC_DISPLAY_PROTO],
1698                        xprt->address_strings[RPC_DISPLAY_ADDR],
1699                        xprt->address_strings[RPC_DISPLAY_PORT]);
1700
1701        xs_udp_finish_connecting(xprt, sock);
1702        status = 0;
1703out:
1704        xprt_clear_connecting(xprt);
1705        xprt_wake_pending_tasks(xprt, status);
1706}
1707
1708/**
1709 * xs_udp_connect_worker6 - set up a UDP socket
1710 * @work: RPC transport to connect
1711 *
1712 * Invoked by a work queue tasklet.
1713 */
1714static void xs_udp_connect_worker6(struct work_struct *work)
1715{
1716        struct sock_xprt *transport =
1717                container_of(work, struct sock_xprt, connect_worker.work);
1718        struct rpc_xprt *xprt = &transport->xprt;
1719        struct socket *sock = transport->sock;
1720        int err, status = -EIO;
1721
1722        if (xprt->shutdown)
1723                goto out;
1724
1725        /* Start by resetting any existing state */
1726        xs_reset_transport(transport);
1727
1728        err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1729        if (err < 0) {
1730                dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
1731                goto out;
1732        }
1733        xs_reclassify_socket6(sock);
1734
1735        if (xs_bind6(transport, sock) < 0) {
1736                sock_release(sock);
1737                goto out;
1738        }
1739
1740        dprintk("RPC:       worker connecting xprt %p via %s to "
1741                                "%s (port %s)\n", xprt,
1742                        xprt->address_strings[RPC_DISPLAY_PROTO],
1743                        xprt->address_strings[RPC_DISPLAY_ADDR],
1744                        xprt->address_strings[RPC_DISPLAY_PORT]);
1745
1746        xs_udp_finish_connecting(xprt, sock);
1747        status = 0;
1748out:
1749        xprt_clear_connecting(xprt);
1750        xprt_wake_pending_tasks(xprt, status);
1751}
1752
1753/*
1754 * We need to preserve the port number so the reply cache on the server can
1755 * find our cached RPC replies when we get around to reconnecting.
1756 */
1757static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1758{
1759        int result;
1760        struct sockaddr any;
1761
1762        dprintk("RPC:       disconnecting xprt %p to reuse port\n", xprt);
1763
1764        /*
1765         * Disconnect the transport socket by doing a connect operation
1766         * with AF_UNSPEC.  This should return immediately...
1767         */
1768        memset(&any, 0, sizeof(any));
1769        any.sa_family = AF_UNSPEC;
1770        result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1771        if (!result)
1772                xs_sock_mark_closed(xprt);
1773        else
1774                dprintk("RPC:       AF_UNSPEC connect return code %d\n",
1775                                result);
1776}
1777
1778static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1779{
1780        unsigned int state = transport->inet->sk_state;
1781
1782        if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
1783                return;
1784        if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
1785                return;
1786        xs_abort_connection(xprt, transport);
1787}
1788
1789static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1790{
1791        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1792
1793        if (!transport->inet) {
1794                struct sock *sk = sock->sk;
1795
1796                write_lock_bh(&sk->sk_callback_lock);
1797
1798                xs_save_old_callbacks(transport, sk);
1799
1800                sk->sk_user_data = xprt;
1801                sk->sk_data_ready = xs_tcp_data_ready;
1802                sk->sk_state_change = xs_tcp_state_change;
1803                sk->sk_write_space = xs_tcp_write_space;
1804                sk->sk_error_report = xs_error_report;
1805                sk->sk_allocation = GFP_ATOMIC;
1806
1807                /* socket options */
1808                sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
1809                sock_reset_flag(sk, SOCK_LINGER);
1810                tcp_sk(sk)->linger2 = 0;
1811                tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1812
1813                xprt_clear_connected(xprt);
1814
1815                /* Reset to new socket */
1816                transport->sock = sock;
1817                transport->inet = sk;
1818
1819                write_unlock_bh(&sk->sk_callback_lock);
1820        }
1821
1822        if (!xprt_bound(xprt))
1823                return -ENOTCONN;
1824
1825        /* Tell the socket layer to start connecting... */
1826        xprt->stat.connect_count++;
1827        xprt->stat.connect_start = jiffies;
1828        return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
1829}
1830
1831/**
1832 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
1833 * @xprt: RPC transport to connect
1834 * @transport: socket transport to connect
1835 * @create_sock: function to create a socket of the correct type
1836 *
1837 * Invoked by a work queue tasklet.
1838 */
1839static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1840                struct sock_xprt *transport,
1841                struct socket *(*create_sock)(struct rpc_xprt *,
1842                        struct sock_xprt *))
1843{
1844        struct socket *sock = transport->sock;
1845        int status = -EIO;
1846
1847        if (xprt->shutdown)
1848                goto out;
1849
1850        if (!sock) {
1851                clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1852                sock = create_sock(xprt, transport);
1853                if (IS_ERR(sock)) {
1854                        status = PTR_ERR(sock);
1855                        goto out;
1856                }
1857        } else {
1858                int abort_and_exit;
1859
1860                abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1861                                &xprt->state);
1862                /* "close" the socket, preserving the local port */
1863                xs_tcp_reuse_connection(xprt, transport);
1864
1865                if (abort_and_exit)
1866                        goto out_eagain;
1867        }
1868
1869        dprintk("RPC:       worker connecting xprt %p via %s to "
1870                                "%s (port %s)\n", xprt,
1871                        xprt->address_strings[RPC_DISPLAY_PROTO],
1872                        xprt->address_strings[RPC_DISPLAY_ADDR],
1873                        xprt->address_strings[RPC_DISPLAY_PORT]);
1874
1875        status = xs_tcp_finish_connecting(xprt, sock);
1876        dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
1877                        xprt, -status, xprt_connected(xprt),
1878                        sock->sk->sk_state);
1879        switch (status) {
1880        default:
1881                printk("%s: connect returned unhandled error %d\n",
1882                        __func__, status);
1883        case -EADDRNOTAVAIL:
1884                /* We're probably in TIME_WAIT. Get rid of existing socket,
1885                 * and retry
1886                 */
1887                set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
1888                xprt_force_disconnect(xprt);
1889                break;
1890        case -ECONNREFUSED:
1891        case -ECONNRESET:
1892        case -ENETUNREACH:
1893                /* retry with existing socket, after a delay */
1894        case 0:
1895        case -EINPROGRESS:
1896        case -EALREADY:
1897                xprt_clear_connecting(xprt);
1898                return;
1899        case -EINVAL:
1900                /* Happens, for instance, if the user specified a link
1901                 * local IPv6 address without a scope-id.
1902                 */
1903                goto out;
1904        }
1905out_eagain:
1906        status = -EAGAIN;
1907out:
1908        xprt_clear_connecting(xprt);
1909        xprt_wake_pending_tasks(xprt, status);
1910}
1911
1912static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1913                struct sock_xprt *transport)
1914{
1915        struct socket *sock;
1916        int err;
1917
1918        /* start from scratch */
1919        err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1920        if (err < 0) {
1921                dprintk("RPC:       can't create TCP transport socket (%d).\n",
1922                                -err);
1923                goto out_err;
1924        }
1925        xs_reclassify_socket4(sock);
1926
1927        if (xs_bind4(transport, sock) < 0) {
1928                sock_release(sock);
1929                goto out_err;
1930        }
1931        return sock;
1932out_err:
1933        return ERR_PTR(-EIO);
1934}
1935
1936/**
1937 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1938 * @work: RPC transport to connect
1939 *
1940 * Invoked by a work queue tasklet.
1941 */
1942static void xs_tcp_connect_worker4(struct work_struct *work)
1943{
1944        struct sock_xprt *transport =
1945                container_of(work, struct sock_xprt, connect_worker.work);
1946        struct rpc_xprt *xprt = &transport->xprt;
1947
1948        xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1949}
1950
1951static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1952                struct sock_xprt *transport)
1953{
1954        struct socket *sock;
1955        int err;
1956
1957        /* start from scratch */
1958        err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1959        if (err < 0) {
1960                dprintk("RPC:       can't create TCP transport socket (%d).\n",
1961                                -err);
1962                goto out_err;
1963        }
1964        xs_reclassify_socket6(sock);
1965
1966        if (xs_bind6(transport, sock) < 0) {
1967                sock_release(sock);
1968                goto out_err;
1969        }
1970        return sock;
1971out_err:
1972        return ERR_PTR(-EIO);
1973}
1974
1975/**
1976 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1977 * @work: RPC transport to connect
1978 *
1979 * Invoked by a work queue tasklet.
1980 */
1981static void xs_tcp_connect_worker6(struct work_struct *work)
1982{
1983        struct sock_xprt *transport =
1984                container_of(work, struct sock_xprt, connect_worker.work);
1985        struct rpc_xprt *xprt = &transport->xprt;
1986
1987        xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
1988}
1989
1990/**
1991 * xs_connect - connect a socket to a remote endpoint
1992 * @task: address of RPC task that manages state of connect request
1993 *
1994 * TCP: If the remote end dropped the connection, delay reconnecting.
1995 *
1996 * UDP socket connects are synchronous, but we use a work queue anyway
1997 * to guarantee that even unprivileged user processes can set up a
1998 * socket on a privileged port.
1999 *
2000 * If a UDP socket connect fails, the delay behavior here prevents
2001 * retry floods (hard mounts).
2002 */
2003static void xs_connect(struct rpc_task *task)
2004{
2005        struct rpc_xprt *xprt = task->tk_xprt;
2006        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2007
2008        if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
2009                dprintk("RPC:       xs_connect delayed xprt %p for %lu "
2010                                "seconds\n",
2011                                xprt, xprt->reestablish_timeout / HZ);
2012                queue_delayed_work(rpciod_workqueue,
2013                                   &transport->connect_worker,
2014                                   xprt->reestablish_timeout);
2015                xprt->reestablish_timeout <<= 1;
2016                if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
2017                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2018                if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
2019                        xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
2020        } else {
2021                dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
2022                queue_delayed_work(rpciod_workqueue,
2023                                   &transport->connect_worker, 0);
2024        }
2025}
2026
2027/**
2028 * xs_udp_print_stats - display UDP socket-specifc stats
2029 * @xprt: rpc_xprt struct containing statistics
2030 * @seq: output file
2031 *
2032 */
2033static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2034{
2035        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2036
2037        seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
2038                        transport->srcport,
2039                        xprt->stat.bind_count,
2040                        xprt->stat.sends,
2041                        xprt->stat.recvs,
2042                        xprt->stat.bad_xids,
2043                        xprt->stat.req_u,
2044                        xprt->stat.bklog_u);
2045}
2046
2047/**
2048 * xs_tcp_print_stats - display TCP socket-specifc stats
2049 * @xprt: rpc_xprt struct containing statistics
2050 * @seq: output file
2051 *
2052 */
2053static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
2054{
2055        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2056        long idle_time = 0;
2057
2058        if (xprt_connected(xprt))
2059                idle_time = (long)(jiffies - xprt->last_used) / HZ;
2060
2061        seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
2062                        transport->srcport,
2063                        xprt->stat.bind_count,
2064                        xprt->stat.connect_count,
2065                        xprt->stat.connect_time,
2066                        idle_time,
2067                        xprt->stat.sends,
2068                        xprt->stat.recvs,
2069                        xprt->stat.bad_xids,
2070                        xprt->stat.req_u,
2071                        xprt->stat.bklog_u);
2072}
2073
2074/*
2075 * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
2076 * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
2077 * to use the server side send routines.
2078 */
2079static void *bc_malloc(struct rpc_task *task, size_t size)
2080{
2081        struct page *page;
2082        struct rpc_buffer *buf;
2083
2084        BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
2085        page = alloc_page(GFP_KERNEL);
2086
2087        if (!page)
2088                return NULL;
2089
2090        buf = page_address(page);
2091        buf->len = PAGE_SIZE;
2092
2093        return buf->data;
2094}
2095
2096/*
2097 * Free the space allocated in the bc_alloc routine
2098 */
2099static void bc_free(void *buffer)
2100{
2101        struct rpc_buffer *buf;
2102
2103        if (!buffer)
2104                return;
2105
2106        buf = container_of(buffer, struct rpc_buffer, data);
2107        free_page((unsigned long)buf);
2108}
2109
2110/*
2111 * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
2112 * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
2113 */
2114static int bc_sendto(struct rpc_rqst *req)
2115{
2116        int len;
2117        struct xdr_buf *xbufp = &req->rq_snd_buf;
2118        struct rpc_xprt *xprt = req->rq_xprt;
2119        struct sock_xprt *transport =
2120                                container_of(xprt, struct sock_xprt, xprt);
2121        struct socket *sock = transport->sock;
2122        unsigned long headoff;
2123        unsigned long tailoff;
2124
2125        /*
2126         * Set up the rpc header and record marker stuff
2127         */
2128        xs_encode_tcp_record_marker(xbufp);
2129
2130        tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
2131        headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
2132        len = svc_send_common(sock, xbufp,
2133                              virt_to_page(xbufp->head[0].iov_base), headoff,
2134                              xbufp->tail[0].iov_base, tailoff);
2135
2136        if (len != xbufp->len) {
2137                printk(KERN_NOTICE "Error sending entire callback!\n");
2138                len = -EAGAIN;
2139        }
2140
2141        return len;
2142}
2143
2144/*
2145 * The send routine. Borrows from svc_send
2146 */
2147static int bc_send_request(struct rpc_task *task)
2148{
2149        struct rpc_rqst *req = task->tk_rqstp;
2150        struct svc_xprt *xprt;
2151        struct svc_sock         *svsk;
2152        u32                     len;
2153
2154        dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
2155        /*
2156         * Get the server socket associated with this callback xprt
2157         */
2158        xprt = req->rq_xprt->bc_xprt;
2159        svsk = container_of(xprt, struct svc_sock, sk_xprt);
2160
2161        /*
2162         * Grab the mutex to serialize data as the connection is shared
2163         * with the fore channel
2164         */
2165        if (!mutex_trylock(&xprt->xpt_mutex)) {
2166                rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
2167                if (!mutex_trylock(&xprt->xpt_mutex))
2168                        return -EAGAIN;
2169                rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
2170        }
2171        if (test_bit(XPT_DEAD, &xprt->xpt_flags))
2172                len = -ENOTCONN;
2173        else
2174                len = bc_sendto(req);
2175        mutex_unlock(&xprt->xpt_mutex);
2176
2177        if (len > 0)
2178                len = 0;
2179
2180        return len;
2181}
2182
2183/*
2184 * The close routine. Since this is client initiated, we do nothing
2185 */
2186
2187static void bc_close(struct rpc_xprt *xprt)
2188{
2189}
2190
2191/*
2192 * The xprt destroy routine. Again, because this connection is client
2193 * initiated, we do nothing
2194 */
2195
2196static void bc_destroy(struct rpc_xprt *xprt)
2197{
2198}
2199
2200static struct rpc_xprt_ops xs_udp_ops = {
2201        .set_buffer_size        = xs_udp_set_buffer_size,
2202        .reserve_xprt           = xprt_reserve_xprt_cong,
2203        .release_xprt           = xprt_release_xprt_cong,
2204        .rpcbind                = rpcb_getport_async,
2205        .set_port               = xs_set_port,
2206        .connect                = xs_connect,
2207        .buf_alloc              = rpc_malloc,
2208        .buf_free               = rpc_free,
2209        .send_request           = xs_udp_send_request,
2210        .set_retrans_timeout    = xprt_set_retrans_timeout_rtt,
2211        .timer                  = xs_udp_timer,
2212        .release_request        = xprt_release_rqst_cong,
2213        .close                  = xs_close,
2214        .destroy                = xs_destroy,
2215        .print_stats            = xs_udp_print_stats,
2216};
2217
2218static struct rpc_xprt_ops xs_tcp_ops = {
2219        .reserve_xprt           = xprt_reserve_xprt,
2220        .release_xprt           = xs_tcp_release_xprt,
2221        .rpcbind                = rpcb_getport_async,
2222        .set_port               = xs_set_port,
2223        .connect                = xs_connect,
2224        .buf_alloc              = rpc_malloc,
2225        .buf_free               = rpc_free,
2226        .send_request           = xs_tcp_send_request,
2227        .set_retrans_timeout    = xprt_set_retrans_timeout_def,
2228        .close                  = xs_tcp_close,
2229        .destroy                = xs_destroy,
2230        .print_stats            = xs_tcp_print_stats,
2231};
2232
2233/*
2234 * The rpc_xprt_ops for the server backchannel
2235 */
2236
2237static struct rpc_xprt_ops bc_tcp_ops = {
2238        .reserve_xprt           = xprt_reserve_xprt,
2239        .release_xprt           = xprt_release_xprt,
2240        .buf_alloc              = bc_malloc,
2241        .buf_free               = bc_free,
2242        .send_request           = bc_send_request,
2243        .set_retrans_timeout    = xprt_set_retrans_timeout_def,
2244        .close                  = bc_close,
2245        .destroy                = bc_destroy,
2246        .print_stats            = xs_tcp_print_stats,
2247};
2248
2249static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2250                                      unsigned int slot_table_size)
2251{
2252        struct rpc_xprt *xprt;
2253        struct sock_xprt *new;
2254
2255        if (args->addrlen > sizeof(xprt->addr)) {
2256                dprintk("RPC:       xs_setup_xprt: address too large\n");
2257                return ERR_PTR(-EBADF);
2258        }
2259
2260        new = kzalloc(sizeof(*new), GFP_KERNEL);
2261        if (new == NULL) {
2262                dprintk("RPC:       xs_setup_xprt: couldn't allocate "
2263                                "rpc_xprt\n");
2264                return ERR_PTR(-ENOMEM);
2265        }
2266        xprt = &new->xprt;
2267
2268        xprt->max_reqs = slot_table_size;
2269        xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
2270        if (xprt->slot == NULL) {
2271                kfree(xprt);
2272                dprintk("RPC:       xs_setup_xprt: couldn't allocate slot "
2273                                "table\n");
2274                return ERR_PTR(-ENOMEM);
2275        }
2276
2277        memcpy(&xprt->addr, args->dstaddr, args->addrlen);
2278        xprt->addrlen = args->addrlen;
2279        if (args->srcaddr)
2280                memcpy(&new->srcaddr, args->srcaddr, args->addrlen);
2281
2282        return xprt;
2283}
2284
2285static const struct rpc_timeout xs_udp_default_timeout = {
2286        .to_initval = 5 * HZ,
2287        .to_maxval = 30 * HZ,
2288        .to_increment = 5 * HZ,
2289        .to_retries = 5,
2290};
2291
2292/**
2293 * xs_setup_udp - Set up transport to use a UDP socket
2294 * @args: rpc transport creation arguments
2295 *
2296 */
2297static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2298{
2299        struct sockaddr *addr = args->dstaddr;
2300        struct rpc_xprt *xprt;
2301        struct sock_xprt *transport;
2302        struct rpc_xprt *ret;
2303
2304        xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
2305        if (IS_ERR(xprt))
2306                return xprt;
2307        transport = container_of(xprt, struct sock_xprt, xprt);
2308
2309        xprt->prot = IPPROTO_UDP;
2310        xprt->tsh_size = 0;
2311        /* XXX: header size can vary due to auth type, IPv6, etc. */
2312        xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
2313
2314        xprt->bind_timeout = XS_BIND_TO;
2315        xprt->reestablish_timeout = XS_UDP_REEST_TO;
2316        xprt->idle_timeout = XS_IDLE_DISC_TO;
2317
2318        xprt->ops = &xs_udp_ops;
2319
2320        xprt->timeout = &xs_udp_default_timeout;
2321
2322        switch (addr->sa_family) {
2323        case AF_INET:
2324                if (((struct sockaddr_in *)addr)->sin_port != htons(0))
2325                        xprt_set_bound(xprt);
2326
2327                INIT_DELAYED_WORK(&transport->connect_worker,
2328                                        xs_udp_connect_worker4);
2329                xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
2330                break;
2331        case AF_INET6:
2332                if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
2333                        xprt_set_bound(xprt);
2334
2335                INIT_DELAYED_WORK(&transport->connect_worker,
2336                                        xs_udp_connect_worker6);
2337                xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2338                break;
2339        default:
2340                ret = ERR_PTR(-EAFNOSUPPORT);
2341                goto out_err;
2342        }
2343
2344        if (xprt_bound(xprt))
2345                dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
2346                                xprt->address_strings[RPC_DISPLAY_ADDR],
2347                                xprt->address_strings[RPC_DISPLAY_PORT],
2348                                xprt->address_strings[RPC_DISPLAY_PROTO]);
2349        else
2350                dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
2351                                xprt->address_strings[RPC_DISPLAY_ADDR],
2352                                xprt->address_strings[RPC_DISPLAY_PROTO]);
2353
2354        if (try_module_get(THIS_MODULE))
2355                return xprt;
2356        ret = ERR_PTR(-EINVAL);
2357out_err:
2358        kfree(xprt->slot);
2359        kfree(xprt);
2360        return ret;
2361}
2362
2363static const struct rpc_timeout xs_tcp_default_timeout = {
2364        .to_initval = 60 * HZ,
2365        .to_maxval = 60 * HZ,
2366        .to_retries = 2,
2367};
2368
2369/**
2370 * xs_setup_tcp - Set up transport to use a TCP socket
2371 * @args: rpc transport creation arguments
2372 *
2373 */
2374static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2375{
2376        struct sockaddr *addr = args->dstaddr;
2377        struct rpc_xprt *xprt;
2378        struct sock_xprt *transport;
2379        struct rpc_xprt *ret;
2380
2381        xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2382        if (IS_ERR(xprt))
2383                return xprt;
2384        transport = container_of(xprt, struct sock_xprt, xprt);
2385
2386        xprt->prot = IPPROTO_TCP;
2387        xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
2388        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
2389
2390        xprt->bind_timeout = XS_BIND_TO;
2391        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2392        xprt->idle_timeout = XS_IDLE_DISC_TO;
2393
2394        xprt->ops = &xs_tcp_ops;
2395        xprt->timeout = &xs_tcp_default_timeout;
2396
2397        switch (addr->sa_family) {
2398        case AF_INET:
2399                if (((struct sockaddr_in *)addr)->sin_port != htons(0))
2400                        xprt_set_bound(xprt);
2401
2402                INIT_DELAYED_WORK(&transport->connect_worker,
2403                                        xs_tcp_connect_worker4);
2404                xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
2405                break;
2406        case AF_INET6:
2407                if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
2408                        xprt_set_bound(xprt);
2409
2410                INIT_DELAYED_WORK(&transport->connect_worker,
2411                                        xs_tcp_connect_worker6);
2412                xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2413                break;
2414        default:
2415                ret = ERR_PTR(-EAFNOSUPPORT);
2416                goto out_err;
2417        }
2418
2419        if (xprt_bound(xprt))
2420                dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
2421                                xprt->address_strings[RPC_DISPLAY_ADDR],
2422                                xprt->address_strings[RPC_DISPLAY_PORT],
2423                                xprt->address_strings[RPC_DISPLAY_PROTO]);
2424        else
2425                dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
2426                                xprt->address_strings[RPC_DISPLAY_ADDR],
2427                                xprt->address_strings[RPC_DISPLAY_PROTO]);
2428
2429
2430        if (try_module_get(THIS_MODULE))
2431                return xprt;
2432        ret = ERR_PTR(-EINVAL);
2433out_err:
2434        kfree(xprt->slot);
2435        kfree(xprt);
2436        return ret;
2437}
2438
2439/**
2440 * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
2441 * @args: rpc transport creation arguments
2442 *
2443 */
2444static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2445{
2446        struct sockaddr *addr = args->dstaddr;
2447        struct rpc_xprt *xprt;
2448        struct sock_xprt *transport;
2449        struct svc_sock *bc_sock;
2450        struct rpc_xprt *ret;
2451
2452        xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2453        if (IS_ERR(xprt))
2454                return xprt;
2455        transport = container_of(xprt, struct sock_xprt, xprt);
2456
2457        xprt->prot = IPPROTO_TCP;
2458        xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
2459        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
2460        xprt->timeout = &xs_tcp_default_timeout;
2461
2462        /* backchannel */
2463        xprt_set_bound(xprt);
2464        xprt->bind_timeout = 0;
2465        xprt->reestablish_timeout = 0;
2466        xprt->idle_timeout = 0;
2467
2468        /*
2469         * The backchannel uses the same socket connection as the
2470         * forechannel
2471         */
2472        xprt->bc_xprt = args->bc_xprt;
2473        bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
2474        bc_sock->sk_bc_xprt = xprt;
2475        transport->sock = bc_sock->sk_sock;
2476        transport->inet = bc_sock->sk_sk;
2477
2478        xprt->ops = &bc_tcp_ops;
2479
2480        switch (addr->sa_family) {
2481        case AF_INET:
2482                xs_format_peer_addresses(xprt, "tcp",
2483                                         RPCBIND_NETID_TCP);
2484                break;
2485        case AF_INET6:
2486                xs_format_peer_addresses(xprt, "tcp",
2487                                   RPCBIND_NETID_TCP6);
2488                break;
2489        default:
2490                ret = ERR_PTR(-EAFNOSUPPORT);
2491                goto out_err;
2492        }
2493
2494        if (xprt_bound(xprt))
2495                dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
2496                                xprt->address_strings[RPC_DISPLAY_ADDR],
2497                                xprt->address_strings[RPC_DISPLAY_PORT],
2498                                xprt->address_strings[RPC_DISPLAY_PROTO]);
2499        else
2500                dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
2501                                xprt->address_strings[RPC_DISPLAY_ADDR],
2502                                xprt->address_strings[RPC_DISPLAY_PROTO]);
2503
2504        /*
2505         * Since we don't want connections for the backchannel, we set
2506         * the xprt status to connected
2507         */
2508        xprt_set_connected(xprt);
2509
2510
2511        if (try_module_get(THIS_MODULE))
2512                return xprt;
2513        ret = ERR_PTR(-EINVAL);
2514out_err:
2515        kfree(xprt->slot);
2516        kfree(xprt);
2517        return ret;
2518}
2519
2520static struct xprt_class        xs_udp_transport = {
2521        .list           = LIST_HEAD_INIT(xs_udp_transport.list),
2522        .name           = "udp",
2523        .owner          = THIS_MODULE,
2524        .ident          = XPRT_TRANSPORT_UDP,
2525        .setup          = xs_setup_udp,
2526};
2527
2528static struct xprt_class        xs_tcp_transport = {
2529        .list           = LIST_HEAD_INIT(xs_tcp_transport.list),
2530        .name           = "tcp",
2531        .owner          = THIS_MODULE,
2532        .ident          = XPRT_TRANSPORT_TCP,
2533        .setup          = xs_setup_tcp,
2534};
2535
2536static struct xprt_class        xs_bc_tcp_transport = {
2537        .list           = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
2538        .name           = "tcp NFSv4.1 backchannel",
2539        .owner          = THIS_MODULE,
2540        .ident          = XPRT_TRANSPORT_BC_TCP,
2541        .setup          = xs_setup_bc_tcp,
2542};
2543
2544/**
2545 * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
2546 *
2547 */
2548int init_socket_xprt(void)
2549{
2550#ifdef RPC_DEBUG
2551        if (!sunrpc_table_header)
2552                sunrpc_table_header = register_sysctl_table(sunrpc_table);
2553#endif
2554
2555        xprt_register_transport(&xs_udp_transport);
2556        xprt_register_transport(&xs_tcp_transport);
2557        xprt_register_transport(&xs_bc_tcp_transport);
2558
2559        return 0;
2560}
2561
2562/**
2563 * cleanup_socket_xprt - remove xprtsock's sysctls, unregister
2564 *
2565 */
2566void cleanup_socket_xprt(void)
2567{
2568#ifdef RPC_DEBUG
2569        if (sunrpc_table_header) {
2570                unregister_sysctl_table(sunrpc_table_header);
2571                sunrpc_table_header = NULL;
2572        }
2573#endif
2574
2575        xprt_unregister_transport(&xs_udp_transport);
2576        xprt_unregister_transport(&xs_tcp_transport);
2577        xprt_unregister_transport(&xs_bc_tcp_transport);
2578}
2579
2580static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
2581                unsigned int min, unsigned int max)
2582{
2583        unsigned long num;
2584        int ret;
2585
2586        if (!val)
2587                return -EINVAL;
2588        ret = strict_strtoul(val, 0, &num);
2589        if (ret == -EINVAL || num < min || num > max)
2590                return -EINVAL;
2591        *((unsigned int *)kp->arg) = num;
2592        return 0;
2593}
2594
2595static int param_set_portnr(const char *val, struct kernel_param *kp)
2596{
2597        return param_set_uint_minmax(val, kp,
2598                        RPC_MIN_RESVPORT,
2599                        RPC_MAX_RESVPORT);
2600}
2601
2602static int param_get_portnr(char *buffer, struct kernel_param *kp)
2603{
2604        return param_get_uint(buffer, kp);
2605}
2606#define param_check_portnr(name, p) \
2607        __param_check(name, p, unsigned int);
2608
2609module_param_named(min_resvport, xprt_min_resvport, portnr, 0644);
2610module_param_named(max_resvport, xprt_max_resvport, portnr, 0644);
2611
2612static int param_set_slot_table_size(const char *val, struct kernel_param *kp)
2613{
2614        return param_set_uint_minmax(val, kp,
2615                        RPC_MIN_SLOT_TABLE,
2616                        RPC_MAX_SLOT_TABLE);
2617}
2618
2619static int param_get_slot_table_size(char *buffer, struct kernel_param *kp)
2620{
2621        return param_get_uint(buffer, kp);
2622}
2623#define param_check_slot_table_size(name, p) \
2624        __param_check(name, p, unsigned int);
2625
2626module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
2627                   slot_table_size, 0644);
2628module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
2629                   slot_table_size, 0644);
2630
2631