linux/net/tipc/socket.c
<<
>>
Prefs
   1/*
   2 * net/tipc/socket.c: TIPC socket API
   3 *
   4 * Copyright (c) 2001-2007, Ericsson AB
   5 * Copyright (c) 2004-2008, 2010-2011, Wind River Systems
   6 * All rights reserved.
   7 *
   8 * Redistribution and use in source and binary forms, with or without
   9 * modification, are permitted provided that the following conditions are met:
  10 *
  11 * 1. Redistributions of source code must retain the above copyright
  12 *    notice, this list of conditions and the following disclaimer.
  13 * 2. Redistributions in binary form must reproduce the above copyright
  14 *    notice, this list of conditions and the following disclaimer in the
  15 *    documentation and/or other materials provided with the distribution.
  16 * 3. Neither the names of the copyright holders nor the names of its
  17 *    contributors may be used to endorse or promote products derived from
  18 *    this software without specific prior written permission.
  19 *
  20 * Alternatively, this software may be distributed under the terms of the
  21 * GNU General Public License ("GPL") version 2 as published by the Free
  22 * Software Foundation.
  23 *
  24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34 * POSSIBILITY OF SUCH DAMAGE.
  35 */
  36
  37#include "core.h"
  38#include "port.h"
  39
  40#include <linux/export.h>
  41#include <net/sock.h>
  42
  43#define SS_LISTENING    -1      /* socket is listening */
  44#define SS_READY        -2      /* socket is connectionless */
  45
  46#define OVERLOAD_LIMIT_BASE     5000
  47#define CONN_TIMEOUT_DEFAULT    8000    /* default connect timeout = 8s */
  48
  49struct tipc_sock {
  50        struct sock sk;
  51        struct tipc_port *p;
  52        struct tipc_portid peer_name;
  53        unsigned int conn_timeout;
  54};
  55
  56#define tipc_sk(sk) ((struct tipc_sock *)(sk))
  57#define tipc_sk_port(sk) (tipc_sk(sk)->p)
  58
  59#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \
  60                        (sock->state == SS_DISCONNECTING))
  61
  62static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
  63static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
  64static void wakeupdispatch(struct tipc_port *tport);
  65
  66static const struct proto_ops packet_ops;
  67static const struct proto_ops stream_ops;
  68static const struct proto_ops msg_ops;
  69
  70static struct proto tipc_proto;
  71
  72static int sockets_enabled;
  73
  74static atomic_t tipc_queue_size = ATOMIC_INIT(0);
  75
  76/*
  77 * Revised TIPC socket locking policy:
  78 *
  79 * Most socket operations take the standard socket lock when they start
  80 * and hold it until they finish (or until they need to sleep).  Acquiring
  81 * this lock grants the owner exclusive access to the fields of the socket
  82 * data structures, with the exception of the backlog queue.  A few socket
  83 * operations can be done without taking the socket lock because they only
  84 * read socket information that never changes during the life of the socket.
  85 *
  86 * Socket operations may acquire the lock for the associated TIPC port if they
  87 * need to perform an operation on the port.  If any routine needs to acquire
  88 * both the socket lock and the port lock it must take the socket lock first
  89 * to avoid the risk of deadlock.
  90 *
  91 * The dispatcher handling incoming messages cannot grab the socket lock in
  92 * the standard fashion, since invoked it runs at the BH level and cannot block.
  93 * Instead, it checks to see if the socket lock is currently owned by someone,
  94 * and either handles the message itself or adds it to the socket's backlog
  95 * queue; in the latter case the queued message is processed once the process
  96 * owning the socket lock releases it.
  97 *
  98 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
  99 * the problem of a blocked socket operation preventing any other operations
 100 * from occurring.  However, applications must be careful if they have
 101 * multiple threads trying to send (or receive) on the same socket, as these
 102 * operations might interfere with each other.  For example, doing a connect
 103 * and a receive at the same time might allow the receive to consume the
 104 * ACK message meant for the connect.  While additional work could be done
 105 * to try and overcome this, it doesn't seem to be worthwhile at the present.
 106 *
 107 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
 108 * that another operation that must be performed in a non-blocking manner is
 109 * not delayed for very long because the lock has already been taken.
 110 *
 111 * NOTE: This code assumes that certain fields of a port/socket pair are
 112 * constant over its lifetime; such fields can be examined without taking
 113 * the socket lock and/or port lock, and do not need to be re-read even
 114 * after resuming processing after waiting.  These fields include:
 115 *   - socket type
 116 *   - pointer to socket sk structure (aka tipc_sock structure)
 117 *   - pointer to port structure
 118 *   - port reference
 119 */
 120
 121/**
 122 * advance_rx_queue - discard first buffer in socket receive queue
 123 *
 124 * Caller must hold socket lock
 125 */
 126static void advance_rx_queue(struct sock *sk)
 127{
 128        kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
 129        atomic_dec(&tipc_queue_size);
 130}
 131
 132/**
 133 * discard_rx_queue - discard all buffers in socket receive queue
 134 *
 135 * Caller must hold socket lock
 136 */
 137static void discard_rx_queue(struct sock *sk)
 138{
 139        struct sk_buff *buf;
 140
 141        while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
 142                atomic_dec(&tipc_queue_size);
 143                kfree_skb(buf);
 144        }
 145}
 146
 147/**
 148 * reject_rx_queue - reject all buffers in socket receive queue
 149 *
 150 * Caller must hold socket lock
 151 */
 152static void reject_rx_queue(struct sock *sk)
 153{
 154        struct sk_buff *buf;
 155
 156        while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
 157                tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
 158                atomic_dec(&tipc_queue_size);
 159        }
 160}
 161
 162/**
 163 * tipc_create - create a TIPC socket
 164 * @net: network namespace (must be default network)
 165 * @sock: pre-allocated socket structure
 166 * @protocol: protocol indicator (must be 0)
 167 * @kern: caused by kernel or by userspace?
 168 *
 169 * This routine creates additional data structures used by the TIPC socket,
 170 * initializes them, and links them together.
 171 *
 172 * Returns 0 on success, errno otherwise
 173 */
 174static int tipc_create(struct net *net, struct socket *sock, int protocol,
 175                       int kern)
 176{
 177        const struct proto_ops *ops;
 178        socket_state state;
 179        struct sock *sk;
 180        struct tipc_port *tp_ptr;
 181
 182        /* Validate arguments */
 183        if (unlikely(protocol != 0))
 184                return -EPROTONOSUPPORT;
 185
 186        switch (sock->type) {
 187        case SOCK_STREAM:
 188                ops = &stream_ops;
 189                state = SS_UNCONNECTED;
 190                break;
 191        case SOCK_SEQPACKET:
 192                ops = &packet_ops;
 193                state = SS_UNCONNECTED;
 194                break;
 195        case SOCK_DGRAM:
 196        case SOCK_RDM:
 197                ops = &msg_ops;
 198                state = SS_READY;
 199                break;
 200        default:
 201                return -EPROTOTYPE;
 202        }
 203
 204        /* Allocate socket's protocol area */
 205        sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
 206        if (sk == NULL)
 207                return -ENOMEM;
 208
 209        /* Allocate TIPC port for socket to use */
 210        tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
 211                                     TIPC_LOW_IMPORTANCE);
 212        if (unlikely(!tp_ptr)) {
 213                sk_free(sk);
 214                return -ENOMEM;
 215        }
 216
 217        /* Finish initializing socket data structures */
 218        sock->ops = ops;
 219        sock->state = state;
 220
 221        sock_init_data(sock, sk);
 222        sk->sk_backlog_rcv = backlog_rcv;
 223        sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2;
 224        tipc_sk(sk)->p = tp_ptr;
 225        tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
 226
 227        spin_unlock_bh(tp_ptr->lock);
 228
 229        if (sock->state == SS_READY) {
 230                tipc_set_portunreturnable(tp_ptr->ref, 1);
 231                if (sock->type == SOCK_DGRAM)
 232                        tipc_set_portunreliable(tp_ptr->ref, 1);
 233        }
 234
 235        return 0;
 236}
 237
 238/**
 239 * release - destroy a TIPC socket
 240 * @sock: socket to destroy
 241 *
 242 * This routine cleans up any messages that are still queued on the socket.
 243 * For DGRAM and RDM socket types, all queued messages are rejected.
 244 * For SEQPACKET and STREAM socket types, the first message is rejected
 245 * and any others are discarded.  (If the first message on a STREAM socket
 246 * is partially-read, it is discarded and the next one is rejected instead.)
 247 *
 248 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 249 * are returned or discarded according to the "destination droppable" setting
 250 * specified for the message by the sender.
 251 *
 252 * Returns 0 on success, errno otherwise
 253 */
 254static int release(struct socket *sock)
 255{
 256        struct sock *sk = sock->sk;
 257        struct tipc_port *tport;
 258        struct sk_buff *buf;
 259        int res;
 260
 261        /*
 262         * Exit if socket isn't fully initialized (occurs when a failed accept()
 263         * releases a pre-allocated child socket that was never used)
 264         */
 265        if (sk == NULL)
 266                return 0;
 267
 268        tport = tipc_sk_port(sk);
 269        lock_sock(sk);
 270
 271        /*
 272         * Reject all unreceived messages, except on an active connection
 273         * (which disconnects locally & sends a 'FIN+' to peer)
 274         */
 275        while (sock->state != SS_DISCONNECTING) {
 276                buf = __skb_dequeue(&sk->sk_receive_queue);
 277                if (buf == NULL)
 278                        break;
 279                atomic_dec(&tipc_queue_size);
 280                if (TIPC_SKB_CB(buf)->handle != 0)
 281                        kfree_skb(buf);
 282                else {
 283                        if ((sock->state == SS_CONNECTING) ||
 284                            (sock->state == SS_CONNECTED)) {
 285                                sock->state = SS_DISCONNECTING;
 286                                tipc_disconnect(tport->ref);
 287                        }
 288                        tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
 289                }
 290        }
 291
 292        /*
 293         * Delete TIPC port; this ensures no more messages are queued
 294         * (also disconnects an active connection & sends a 'FIN-' to peer)
 295         */
 296        res = tipc_deleteport(tport->ref);
 297
 298        /* Discard any remaining (connection-based) messages in receive queue */
 299        discard_rx_queue(sk);
 300
 301        /* Reject any messages that accumulated in backlog queue */
 302        sock->state = SS_DISCONNECTING;
 303        release_sock(sk);
 304
 305        sock_put(sk);
 306        sock->sk = NULL;
 307
 308        return res;
 309}
 310
 311/**
 312 * bind - associate or disassocate TIPC name(s) with a socket
 313 * @sock: socket structure
 314 * @uaddr: socket address describing name(s) and desired operation
 315 * @uaddr_len: size of socket address data structure
 316 *
 317 * Name and name sequence binding is indicated using a positive scope value;
 318 * a negative scope value unbinds the specified name.  Specifying no name
 319 * (i.e. a socket address length of 0) unbinds all names from the socket.
 320 *
 321 * Returns 0 on success, errno otherwise
 322 *
 323 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 324 *       access any non-constant socket information.
 325 */
 326static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
 327{
 328        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 329        u32 portref = tipc_sk_port(sock->sk)->ref;
 330
 331        if (unlikely(!uaddr_len))
 332                return tipc_withdraw(portref, 0, NULL);
 333
 334        if (uaddr_len < sizeof(struct sockaddr_tipc))
 335                return -EINVAL;
 336        if (addr->family != AF_TIPC)
 337                return -EAFNOSUPPORT;
 338
 339        if (addr->addrtype == TIPC_ADDR_NAME)
 340                addr->addr.nameseq.upper = addr->addr.nameseq.lower;
 341        else if (addr->addrtype != TIPC_ADDR_NAMESEQ)
 342                return -EAFNOSUPPORT;
 343
 344        if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES)
 345                return -EACCES;
 346
 347        return (addr->scope > 0) ?
 348                tipc_publish(portref, addr->scope, &addr->addr.nameseq) :
 349                tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq);
 350}
 351
 352/**
 353 * get_name - get port ID of socket or peer socket
 354 * @sock: socket structure
 355 * @uaddr: area for returned socket address
 356 * @uaddr_len: area for returned length of socket address
 357 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
 358 *
 359 * Returns 0 on success, errno otherwise
 360 *
 361 * NOTE: This routine doesn't need to take the socket lock since it only
 362 *       accesses socket information that is unchanging (or which changes in
 363 *       a completely predictable manner).
 364 */
 365static int get_name(struct socket *sock, struct sockaddr *uaddr,
 366                    int *uaddr_len, int peer)
 367{
 368        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 369        struct tipc_sock *tsock = tipc_sk(sock->sk);
 370
 371        memset(addr, 0, sizeof(*addr));
 372        if (peer) {
 373                if ((sock->state != SS_CONNECTED) &&
 374                        ((peer != 2) || (sock->state != SS_DISCONNECTING)))
 375                        return -ENOTCONN;
 376                addr->addr.id.ref = tsock->peer_name.ref;
 377                addr->addr.id.node = tsock->peer_name.node;
 378        } else {
 379                addr->addr.id.ref = tsock->p->ref;
 380                addr->addr.id.node = tipc_own_addr;
 381        }
 382
 383        *uaddr_len = sizeof(*addr);
 384        addr->addrtype = TIPC_ADDR_ID;
 385        addr->family = AF_TIPC;
 386        addr->scope = 0;
 387        addr->addr.name.domain = 0;
 388
 389        return 0;
 390}
 391
 392/**
 393 * poll - read and possibly block on pollmask
 394 * @file: file structure associated with the socket
 395 * @sock: socket for which to calculate the poll bits
 396 * @wait: ???
 397 *
 398 * Returns pollmask value
 399 *
 400 * COMMENTARY:
 401 * It appears that the usual socket locking mechanisms are not useful here
 402 * since the pollmask info is potentially out-of-date the moment this routine
 403 * exits.  TCP and other protocols seem to rely on higher level poll routines
 404 * to handle any preventable race conditions, so TIPC will do the same ...
 405 *
 406 * TIPC sets the returned events as follows:
 407 *
 408 * socket state         flags set
 409 * ------------         ---------
 410 * unconnected          no read flags
 411 *                      no write flags
 412 *
 413 * connecting           POLLIN/POLLRDNORM if ACK/NACK in rx queue
 414 *                      no write flags
 415 *
 416 * connected            POLLIN/POLLRDNORM if data in rx queue
 417 *                      POLLOUT if port is not congested
 418 *
 419 * disconnecting        POLLIN/POLLRDNORM/POLLHUP
 420 *                      no write flags
 421 *
 422 * listening            POLLIN if SYN in rx queue
 423 *                      no write flags
 424 *
 425 * ready                POLLIN/POLLRDNORM if data in rx queue
 426 * [connectionless]     POLLOUT (since port cannot be congested)
 427 *
 428 * IMPORTANT: The fact that a read or write operation is indicated does NOT
 429 * imply that the operation will succeed, merely that it should be performed
 430 * and will not block.
 431 */
 432static unsigned int poll(struct file *file, struct socket *sock,
 433                         poll_table *wait)
 434{
 435        struct sock *sk = sock->sk;
 436        u32 mask = 0;
 437
 438        poll_wait(file, sk_sleep(sk), wait);
 439
 440        switch ((int)sock->state) {
 441        case SS_READY:
 442        case SS_CONNECTED:
 443                if (!tipc_sk_port(sk)->congested)
 444                        mask |= POLLOUT;
 445                /* fall thru' */
 446        case SS_CONNECTING:
 447        case SS_LISTENING:
 448                if (!skb_queue_empty(&sk->sk_receive_queue))
 449                        mask |= (POLLIN | POLLRDNORM);
 450                break;
 451        case SS_DISCONNECTING:
 452                mask = (POLLIN | POLLRDNORM | POLLHUP);
 453                break;
 454        }
 455
 456        return mask;
 457}
 458
 459/**
 460 * dest_name_check - verify user is permitted to send to specified port name
 461 * @dest: destination address
 462 * @m: descriptor for message to be sent
 463 *
 464 * Prevents restricted configuration commands from being issued by
 465 * unauthorized users.
 466 *
 467 * Returns 0 if permission is granted, otherwise errno
 468 */
 469static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
 470{
 471        struct tipc_cfg_msg_hdr hdr;
 472
 473        if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
 474                return 0;
 475        if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
 476                return 0;
 477        if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
 478                return -EACCES;
 479
 480        if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr)))
 481                return -EMSGSIZE;
 482        if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
 483                return -EFAULT;
 484        if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
 485                return -EACCES;
 486
 487        return 0;
 488}
 489
 490/**
 491 * send_msg - send message in connectionless manner
 492 * @iocb: if NULL, indicates that socket lock is already held
 493 * @sock: socket structure
 494 * @m: message to send
 495 * @total_len: length of message
 496 *
 497 * Message must have an destination specified explicitly.
 498 * Used for SOCK_RDM and SOCK_DGRAM messages,
 499 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 500 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
 501 *
 502 * Returns the number of bytes sent on success, or errno otherwise
 503 */
 504static int send_msg(struct kiocb *iocb, struct socket *sock,
 505                    struct msghdr *m, size_t total_len)
 506{
 507        struct sock *sk = sock->sk;
 508        struct tipc_port *tport = tipc_sk_port(sk);
 509        struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
 510        int needs_conn;
 511        long timeout_val;
 512        int res = -EINVAL;
 513
 514        if (unlikely(!dest))
 515                return -EDESTADDRREQ;
 516        if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 517                     (dest->family != AF_TIPC)))
 518                return -EINVAL;
 519        if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
 520            (m->msg_iovlen > (unsigned int)INT_MAX))
 521                return -EMSGSIZE;
 522
 523        if (iocb)
 524                lock_sock(sk);
 525
 526        needs_conn = (sock->state != SS_READY);
 527        if (unlikely(needs_conn)) {
 528                if (sock->state == SS_LISTENING) {
 529                        res = -EPIPE;
 530                        goto exit;
 531                }
 532                if (sock->state != SS_UNCONNECTED) {
 533                        res = -EISCONN;
 534                        goto exit;
 535                }
 536                if ((tport->published) ||
 537                    ((sock->type == SOCK_STREAM) && (total_len != 0))) {
 538                        res = -EOPNOTSUPP;
 539                        goto exit;
 540                }
 541                if (dest->addrtype == TIPC_ADDR_NAME) {
 542                        tport->conn_type = dest->addr.name.name.type;
 543                        tport->conn_instance = dest->addr.name.name.instance;
 544                }
 545
 546                /* Abort any pending connection attempts (very unlikely) */
 547                reject_rx_queue(sk);
 548        }
 549
 550        timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 551
 552        do {
 553                if (dest->addrtype == TIPC_ADDR_NAME) {
 554                        res = dest_name_check(dest, m);
 555                        if (res)
 556                                break;
 557                        res = tipc_send2name(tport->ref,
 558                                             &dest->addr.name.name,
 559                                             dest->addr.name.domain,
 560                                             m->msg_iovlen,
 561                                             m->msg_iov,
 562                                             total_len);
 563                } else if (dest->addrtype == TIPC_ADDR_ID) {
 564                        res = tipc_send2port(tport->ref,
 565                                             &dest->addr.id,
 566                                             m->msg_iovlen,
 567                                             m->msg_iov,
 568                                             total_len);
 569                } else if (dest->addrtype == TIPC_ADDR_MCAST) {
 570                        if (needs_conn) {
 571                                res = -EOPNOTSUPP;
 572                                break;
 573                        }
 574                        res = dest_name_check(dest, m);
 575                        if (res)
 576                                break;
 577                        res = tipc_multicast(tport->ref,
 578                                             &dest->addr.nameseq,
 579                                             m->msg_iovlen,
 580                                             m->msg_iov,
 581                                             total_len);
 582                }
 583                if (likely(res != -ELINKCONG)) {
 584                        if (needs_conn && (res >= 0))
 585                                sock->state = SS_CONNECTING;
 586                        break;
 587                }
 588                if (timeout_val <= 0L) {
 589                        res = timeout_val ? timeout_val : -EWOULDBLOCK;
 590                        break;
 591                }
 592                release_sock(sk);
 593                timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
 594                                               !tport->congested, timeout_val);
 595                lock_sock(sk);
 596        } while (1);
 597
 598exit:
 599        if (iocb)
 600                release_sock(sk);
 601        return res;
 602}
 603
 604/**
 605 * send_packet - send a connection-oriented message
 606 * @iocb: if NULL, indicates that socket lock is already held
 607 * @sock: socket structure
 608 * @m: message to send
 609 * @total_len: length of message
 610 *
 611 * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
 612 *
 613 * Returns the number of bytes sent on success, or errno otherwise
 614 */
 615static int send_packet(struct kiocb *iocb, struct socket *sock,
 616                       struct msghdr *m, size_t total_len)
 617{
 618        struct sock *sk = sock->sk;
 619        struct tipc_port *tport = tipc_sk_port(sk);
 620        struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
 621        long timeout_val;
 622        int res;
 623
 624        /* Handle implied connection establishment */
 625        if (unlikely(dest))
 626                return send_msg(iocb, sock, m, total_len);
 627
 628        if ((total_len > TIPC_MAX_USER_MSG_SIZE) ||
 629            (m->msg_iovlen > (unsigned int)INT_MAX))
 630                return -EMSGSIZE;
 631
 632        if (iocb)
 633                lock_sock(sk);
 634
 635        timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 636
 637        do {
 638                if (unlikely(sock->state != SS_CONNECTED)) {
 639                        if (sock->state == SS_DISCONNECTING)
 640                                res = -EPIPE;
 641                        else
 642                                res = -ENOTCONN;
 643                        break;
 644                }
 645
 646                res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov,
 647                                total_len);
 648                if (likely(res != -ELINKCONG))
 649                        break;
 650                if (timeout_val <= 0L) {
 651                        res = timeout_val ? timeout_val : -EWOULDBLOCK;
 652                        break;
 653                }
 654                release_sock(sk);
 655                timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
 656                        (!tport->congested || !tport->connected), timeout_val);
 657                lock_sock(sk);
 658        } while (1);
 659
 660        if (iocb)
 661                release_sock(sk);
 662        return res;
 663}
 664
 665/**
 666 * send_stream - send stream-oriented data
 667 * @iocb: (unused)
 668 * @sock: socket structure
 669 * @m: data to send
 670 * @total_len: total length of data to be sent
 671 *
 672 * Used for SOCK_STREAM data.
 673 *
 674 * Returns the number of bytes sent on success (or partial success),
 675 * or errno if no data sent
 676 */
 677static int send_stream(struct kiocb *iocb, struct socket *sock,
 678                       struct msghdr *m, size_t total_len)
 679{
 680        struct sock *sk = sock->sk;
 681        struct tipc_port *tport = tipc_sk_port(sk);
 682        struct msghdr my_msg;
 683        struct iovec my_iov;
 684        struct iovec *curr_iov;
 685        int curr_iovlen;
 686        char __user *curr_start;
 687        u32 hdr_size;
 688        int curr_left;
 689        int bytes_to_send;
 690        int bytes_sent;
 691        int res;
 692
 693        lock_sock(sk);
 694
 695        /* Handle special cases where there is no connection */
 696        if (unlikely(sock->state != SS_CONNECTED)) {
 697                if (sock->state == SS_UNCONNECTED) {
 698                        res = send_packet(NULL, sock, m, total_len);
 699                        goto exit;
 700                } else if (sock->state == SS_DISCONNECTING) {
 701                        res = -EPIPE;
 702                        goto exit;
 703                } else {
 704                        res = -ENOTCONN;
 705                        goto exit;
 706                }
 707        }
 708
 709        if (unlikely(m->msg_name)) {
 710                res = -EISCONN;
 711                goto exit;
 712        }
 713
 714        if ((total_len > (unsigned int)INT_MAX) ||
 715            (m->msg_iovlen > (unsigned int)INT_MAX)) {
 716                res = -EMSGSIZE;
 717                goto exit;
 718        }
 719
 720        /*
 721         * Send each iovec entry using one or more messages
 722         *
 723         * Note: This algorithm is good for the most likely case
 724         * (i.e. one large iovec entry), but could be improved to pass sets
 725         * of small iovec entries into send_packet().
 726         */
 727        curr_iov = m->msg_iov;
 728        curr_iovlen = m->msg_iovlen;
 729        my_msg.msg_iov = &my_iov;
 730        my_msg.msg_iovlen = 1;
 731        my_msg.msg_flags = m->msg_flags;
 732        my_msg.msg_name = NULL;
 733        bytes_sent = 0;
 734
 735        hdr_size = msg_hdr_sz(&tport->phdr);
 736
 737        while (curr_iovlen--) {
 738                curr_start = curr_iov->iov_base;
 739                curr_left = curr_iov->iov_len;
 740
 741                while (curr_left) {
 742                        bytes_to_send = tport->max_pkt - hdr_size;
 743                        if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
 744                                bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
 745                        if (curr_left < bytes_to_send)
 746                                bytes_to_send = curr_left;
 747                        my_iov.iov_base = curr_start;
 748                        my_iov.iov_len = bytes_to_send;
 749                        res = send_packet(NULL, sock, &my_msg, bytes_to_send);
 750                        if (res < 0) {
 751                                if (bytes_sent)
 752                                        res = bytes_sent;
 753                                goto exit;
 754                        }
 755                        curr_left -= bytes_to_send;
 756                        curr_start += bytes_to_send;
 757                        bytes_sent += bytes_to_send;
 758                }
 759
 760                curr_iov++;
 761        }
 762        res = bytes_sent;
 763exit:
 764        release_sock(sk);
 765        return res;
 766}
 767
 768/**
 769 * auto_connect - complete connection setup to a remote port
 770 * @sock: socket structure
 771 * @msg: peer's response message
 772 *
 773 * Returns 0 on success, errno otherwise
 774 */
 775static int auto_connect(struct socket *sock, struct tipc_msg *msg)
 776{
 777        struct tipc_sock *tsock = tipc_sk(sock->sk);
 778
 779        if (msg_errcode(msg)) {
 780                sock->state = SS_DISCONNECTING;
 781                return -ECONNREFUSED;
 782        }
 783
 784        tsock->peer_name.ref = msg_origport(msg);
 785        tsock->peer_name.node = msg_orignode(msg);
 786        tipc_connect2port(tsock->p->ref, &tsock->peer_name);
 787        tipc_set_portimportance(tsock->p->ref, msg_importance(msg));
 788        sock->state = SS_CONNECTED;
 789        return 0;
 790}
 791
 792/**
 793 * set_orig_addr - capture sender's address for received message
 794 * @m: descriptor for message info
 795 * @msg: received message header
 796 *
 797 * Note: Address is not captured if not requested by receiver.
 798 */
 799static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
 800{
 801        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name;
 802
 803        if (addr) {
 804                addr->family = AF_TIPC;
 805                addr->addrtype = TIPC_ADDR_ID;
 806                addr->addr.id.ref = msg_origport(msg);
 807                addr->addr.id.node = msg_orignode(msg);
 808                addr->addr.name.domain = 0;     /* could leave uninitialized */
 809                addr->scope = 0;                /* could leave uninitialized */
 810                m->msg_namelen = sizeof(struct sockaddr_tipc);
 811        }
 812}
 813
 814/**
 815 * anc_data_recv - optionally capture ancillary data for received message
 816 * @m: descriptor for message info
 817 * @msg: received message header
 818 * @tport: TIPC port associated with message
 819 *
 820 * Note: Ancillary data is not captured if not requested by receiver.
 821 *
 822 * Returns 0 if successful, otherwise errno
 823 */
 824static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 825                                struct tipc_port *tport)
 826{
 827        u32 anc_data[3];
 828        u32 err;
 829        u32 dest_type;
 830        int has_name;
 831        int res;
 832
 833        if (likely(m->msg_controllen == 0))
 834                return 0;
 835
 836        /* Optionally capture errored message object(s) */
 837        err = msg ? msg_errcode(msg) : 0;
 838        if (unlikely(err)) {
 839                anc_data[0] = err;
 840                anc_data[1] = msg_data_sz(msg);
 841                res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
 842                if (res)
 843                        return res;
 844                if (anc_data[1]) {
 845                        res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
 846                                       msg_data(msg));
 847                        if (res)
 848                                return res;
 849                }
 850        }
 851
 852        /* Optionally capture message destination object */
 853        dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
 854        switch (dest_type) {
 855        case TIPC_NAMED_MSG:
 856                has_name = 1;
 857                anc_data[0] = msg_nametype(msg);
 858                anc_data[1] = msg_namelower(msg);
 859                anc_data[2] = msg_namelower(msg);
 860                break;
 861        case TIPC_MCAST_MSG:
 862                has_name = 1;
 863                anc_data[0] = msg_nametype(msg);
 864                anc_data[1] = msg_namelower(msg);
 865                anc_data[2] = msg_nameupper(msg);
 866                break;
 867        case TIPC_CONN_MSG:
 868                has_name = (tport->conn_type != 0);
 869                anc_data[0] = tport->conn_type;
 870                anc_data[1] = tport->conn_instance;
 871                anc_data[2] = tport->conn_instance;
 872                break;
 873        default:
 874                has_name = 0;
 875        }
 876        if (has_name) {
 877                res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
 878                if (res)
 879                        return res;
 880        }
 881
 882        return 0;
 883}
 884
 885/**
 886 * recv_msg - receive packet-oriented message
 887 * @iocb: (unused)
 888 * @m: descriptor for message info
 889 * @buf_len: total size of user buffer area
 890 * @flags: receive flags
 891 *
 892 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 893 * If the complete message doesn't fit in user area, truncate it.
 894 *
 895 * Returns size of returned message data, errno otherwise
 896 */
 897static int recv_msg(struct kiocb *iocb, struct socket *sock,
 898                    struct msghdr *m, size_t buf_len, int flags)
 899{
 900        struct sock *sk = sock->sk;
 901        struct tipc_port *tport = tipc_sk_port(sk);
 902        struct sk_buff *buf;
 903        struct tipc_msg *msg;
 904        long timeout;
 905        unsigned int sz;
 906        u32 err;
 907        int res;
 908
 909        /* Catch invalid receive requests */
 910        if (unlikely(!buf_len))
 911                return -EINVAL;
 912
 913        lock_sock(sk);
 914
 915        if (unlikely(sock->state == SS_UNCONNECTED)) {
 916                res = -ENOTCONN;
 917                goto exit;
 918        }
 919
 920        timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 921restart:
 922
 923        /* Look for a message in receive queue; wait if necessary */
 924        while (skb_queue_empty(&sk->sk_receive_queue)) {
 925                if (sock->state == SS_DISCONNECTING) {
 926                        res = -ENOTCONN;
 927                        goto exit;
 928                }
 929                if (timeout <= 0L) {
 930                        res = timeout ? timeout : -EWOULDBLOCK;
 931                        goto exit;
 932                }
 933                release_sock(sk);
 934                timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
 935                                                           tipc_rx_ready(sock),
 936                                                           timeout);
 937                lock_sock(sk);
 938        }
 939
 940        /* Look at first message in receive queue */
 941        buf = skb_peek(&sk->sk_receive_queue);
 942        msg = buf_msg(buf);
 943        sz = msg_data_sz(msg);
 944        err = msg_errcode(msg);
 945
 946        /* Complete connection setup for an implied connect */
 947        if (unlikely(sock->state == SS_CONNECTING)) {
 948                res = auto_connect(sock, msg);
 949                if (res)
 950                        goto exit;
 951        }
 952
 953        /* Discard an empty non-errored message & try again */
 954        if ((!sz) && (!err)) {
 955                advance_rx_queue(sk);
 956                goto restart;
 957        }
 958
 959        /* Capture sender's address (optional) */
 960        set_orig_addr(m, msg);
 961
 962        /* Capture ancillary data (optional) */
 963        res = anc_data_recv(m, msg, tport);
 964        if (res)
 965                goto exit;
 966
 967        /* Capture message data (if valid) & compute return value (always) */
 968        if (!err) {
 969                if (unlikely(buf_len < sz)) {
 970                        sz = buf_len;
 971                        m->msg_flags |= MSG_TRUNC;
 972                }
 973                res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
 974                                              m->msg_iov, sz);
 975                if (res)
 976                        goto exit;
 977                res = sz;
 978        } else {
 979                if ((sock->state == SS_READY) ||
 980                    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
 981                        res = 0;
 982                else
 983                        res = -ECONNRESET;
 984        }
 985
 986        /* Consume received message (optional) */
 987        if (likely(!(flags & MSG_PEEK))) {
 988                if ((sock->state != SS_READY) &&
 989                    (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
 990                        tipc_acknowledge(tport->ref, tport->conn_unacked);
 991                advance_rx_queue(sk);
 992        }
 993exit:
 994        release_sock(sk);
 995        return res;
 996}
 997
 998/**
 999 * recv_stream - receive stream-oriented data
1000 * @iocb: (unused)
1001 * @m: descriptor for message info
1002 * @buf_len: total size of user buffer area
1003 * @flags: receive flags
1004 *
1005 * Used for SOCK_STREAM messages only.  If not enough data is available
1006 * will optionally wait for more; never truncates data.
1007 *
1008 * Returns size of returned message data, errno otherwise
1009 */
1010static int recv_stream(struct kiocb *iocb, struct socket *sock,
1011                       struct msghdr *m, size_t buf_len, int flags)
1012{
1013        struct sock *sk = sock->sk;
1014        struct tipc_port *tport = tipc_sk_port(sk);
1015        struct sk_buff *buf;
1016        struct tipc_msg *msg;
1017        long timeout;
1018        unsigned int sz;
1019        int sz_to_copy, target, needed;
1020        int sz_copied = 0;
1021        u32 err;
1022        int res = 0;
1023
1024        /* Catch invalid receive attempts */
1025        if (unlikely(!buf_len))
1026                return -EINVAL;
1027
1028        lock_sock(sk);
1029
1030        if (unlikely((sock->state == SS_UNCONNECTED) ||
1031                     (sock->state == SS_CONNECTING))) {
1032                res = -ENOTCONN;
1033                goto exit;
1034        }
1035
1036        target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1037        timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1038
1039restart:
1040        /* Look for a message in receive queue; wait if necessary */
1041        while (skb_queue_empty(&sk->sk_receive_queue)) {
1042                if (sock->state == SS_DISCONNECTING) {
1043                        res = -ENOTCONN;
1044                        goto exit;
1045                }
1046                if (timeout <= 0L) {
1047                        res = timeout ? timeout : -EWOULDBLOCK;
1048                        goto exit;
1049                }
1050                release_sock(sk);
1051                timeout = wait_event_interruptible_timeout(*sk_sleep(sk),
1052                                                           tipc_rx_ready(sock),
1053                                                           timeout);
1054                lock_sock(sk);
1055        }
1056
1057        /* Look at first message in receive queue */
1058        buf = skb_peek(&sk->sk_receive_queue);
1059        msg = buf_msg(buf);
1060        sz = msg_data_sz(msg);
1061        err = msg_errcode(msg);
1062
1063        /* Discard an empty non-errored message & try again */
1064        if ((!sz) && (!err)) {
1065                advance_rx_queue(sk);
1066                goto restart;
1067        }
1068
1069        /* Optionally capture sender's address & ancillary data of first msg */
1070        if (sz_copied == 0) {
1071                set_orig_addr(m, msg);
1072                res = anc_data_recv(m, msg, tport);
1073                if (res)
1074                        goto exit;
1075        }
1076
1077        /* Capture message data (if valid) & compute return value (always) */
1078        if (!err) {
1079                u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
1080
1081                sz -= offset;
1082                needed = (buf_len - sz_copied);
1083                sz_to_copy = (sz <= needed) ? sz : needed;
1084
1085                res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
1086                                              m->msg_iov, sz_to_copy);
1087                if (res)
1088                        goto exit;
1089
1090                sz_copied += sz_to_copy;
1091
1092                if (sz_to_copy < sz) {
1093                        if (!(flags & MSG_PEEK))
1094                                TIPC_SKB_CB(buf)->handle =
1095                                (void *)(unsigned long)(offset + sz_to_copy);
1096                        goto exit;
1097                }
1098        } else {
1099                if (sz_copied != 0)
1100                        goto exit; /* can't add error msg to valid data */
1101
1102                if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
1103                        res = 0;
1104                else
1105                        res = -ECONNRESET;
1106        }
1107
1108        /* Consume received message (optional) */
1109        if (likely(!(flags & MSG_PEEK))) {
1110                if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1111                        tipc_acknowledge(tport->ref, tport->conn_unacked);
1112                advance_rx_queue(sk);
1113        }
1114
1115        /* Loop around if more data is required */
1116        if ((sz_copied < buf_len) &&    /* didn't get all requested data */
1117            (!skb_queue_empty(&sk->sk_receive_queue) ||
1118            (sz_copied < target)) &&    /* and more is ready or required */
1119            (!(flags & MSG_PEEK)) &&    /* and aren't just peeking at data */
1120            (!err))                     /* and haven't reached a FIN */
1121                goto restart;
1122
1123exit:
1124        release_sock(sk);
1125        return sz_copied ? sz_copied : res;
1126}
1127
1128/**
1129 * rx_queue_full - determine if receive queue can accept another message
1130 * @msg: message to be added to queue
1131 * @queue_size: current size of queue
1132 * @base: nominal maximum size of queue
1133 *
1134 * Returns 1 if queue is unable to accept message, 0 otherwise
1135 */
1136static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1137{
1138        u32 threshold;
1139        u32 imp = msg_importance(msg);
1140
1141        if (imp == TIPC_LOW_IMPORTANCE)
1142                threshold = base;
1143        else if (imp == TIPC_MEDIUM_IMPORTANCE)
1144                threshold = base * 2;
1145        else if (imp == TIPC_HIGH_IMPORTANCE)
1146                threshold = base * 100;
1147        else
1148                return 0;
1149
1150        if (msg_connected(msg))
1151                threshold *= 4;
1152
1153        return queue_size >= threshold;
1154}
1155
1156/**
1157 * filter_rcv - validate incoming message
1158 * @sk: socket
1159 * @buf: message
1160 *
1161 * Enqueues message on receive queue if acceptable; optionally handles
1162 * disconnect indication for a connected socket.
1163 *
1164 * Called with socket lock already taken; port lock may also be taken.
1165 *
1166 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1167 */
1168static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1169{
1170        struct socket *sock = sk->sk_socket;
1171        struct tipc_msg *msg = buf_msg(buf);
1172        u32 recv_q_len;
1173
1174        /* Reject message if it is wrong sort of message for socket */
1175        if (msg_type(msg) > TIPC_DIRECT_MSG)
1176                return TIPC_ERR_NO_PORT;
1177
1178        if (sock->state == SS_READY) {
1179                if (msg_connected(msg))
1180                        return TIPC_ERR_NO_PORT;
1181        } else {
1182                if (msg_mcast(msg))
1183                        return TIPC_ERR_NO_PORT;
1184                if (sock->state == SS_CONNECTED) {
1185                        if (!msg_connected(msg) ||
1186                            !tipc_port_peer_msg(tipc_sk_port(sk), msg))
1187                                return TIPC_ERR_NO_PORT;
1188                } else if (sock->state == SS_CONNECTING) {
1189                        if (!msg_connected(msg) && (msg_errcode(msg) == 0))
1190                                return TIPC_ERR_NO_PORT;
1191                } else if (sock->state == SS_LISTENING) {
1192                        if (msg_connected(msg) || msg_errcode(msg))
1193                                return TIPC_ERR_NO_PORT;
1194                } else if (sock->state == SS_DISCONNECTING) {
1195                        return TIPC_ERR_NO_PORT;
1196                } else /* (sock->state == SS_UNCONNECTED) */ {
1197                        if (msg_connected(msg) || msg_errcode(msg))
1198                                return TIPC_ERR_NO_PORT;
1199                }
1200        }
1201
1202        /* Reject message if there isn't room to queue it */
1203        recv_q_len = (u32)atomic_read(&tipc_queue_size);
1204        if (unlikely(recv_q_len >= OVERLOAD_LIMIT_BASE)) {
1205                if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE))
1206                        return TIPC_ERR_OVERLOAD;
1207        }
1208        recv_q_len = skb_queue_len(&sk->sk_receive_queue);
1209        if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
1210                if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
1211                        return TIPC_ERR_OVERLOAD;
1212        }
1213
1214        /* Enqueue message (finally!) */
1215        TIPC_SKB_CB(buf)->handle = 0;
1216        atomic_inc(&tipc_queue_size);
1217        __skb_queue_tail(&sk->sk_receive_queue, buf);
1218
1219        /* Initiate connection termination for an incoming 'FIN' */
1220        if (unlikely(msg_errcode(msg) && (sock->state == SS_CONNECTED))) {
1221                sock->state = SS_DISCONNECTING;
1222                tipc_disconnect_port(tipc_sk_port(sk));
1223        }
1224
1225        if (waitqueue_active(sk_sleep(sk)))
1226                wake_up_interruptible(sk_sleep(sk));
1227        return TIPC_OK;
1228}
1229
1230/**
1231 * backlog_rcv - handle incoming message from backlog queue
1232 * @sk: socket
1233 * @buf: message
1234 *
1235 * Caller must hold socket lock, but not port lock.
1236 *
1237 * Returns 0
1238 */
1239static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
1240{
1241        u32 res;
1242
1243        res = filter_rcv(sk, buf);
1244        if (res)
1245                tipc_reject_msg(buf, res);
1246        return 0;
1247}
1248
1249/**
1250 * dispatch - handle incoming message
1251 * @tport: TIPC port that received message
1252 * @buf: message
1253 *
1254 * Called with port lock already taken.
1255 *
1256 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1257 */
1258static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1259{
1260        struct sock *sk = (struct sock *)tport->usr_handle;
1261        u32 res;
1262
1263        /*
1264         * Process message if socket is unlocked; otherwise add to backlog queue
1265         *
1266         * This code is based on sk_receive_skb(), but must be distinct from it
1267         * since a TIPC-specific filter/reject mechanism is utilized
1268         */
1269        bh_lock_sock(sk);
1270        if (!sock_owned_by_user(sk)) {
1271                res = filter_rcv(sk, buf);
1272        } else {
1273                if (sk_add_backlog(sk, buf, sk->sk_rcvbuf))
1274                        res = TIPC_ERR_OVERLOAD;
1275                else
1276                        res = TIPC_OK;
1277        }
1278        bh_unlock_sock(sk);
1279
1280        return res;
1281}
1282
1283/**
1284 * wakeupdispatch - wake up port after congestion
1285 * @tport: port to wakeup
1286 *
1287 * Called with port lock already taken.
1288 */
1289static void wakeupdispatch(struct tipc_port *tport)
1290{
1291        struct sock *sk = (struct sock *)tport->usr_handle;
1292
1293        if (waitqueue_active(sk_sleep(sk)))
1294                wake_up_interruptible(sk_sleep(sk));
1295}
1296
1297/**
1298 * connect - establish a connection to another TIPC port
1299 * @sock: socket structure
1300 * @dest: socket address for destination port
1301 * @destlen: size of socket address data structure
1302 * @flags: file-related flags associated with socket
1303 *
1304 * Returns 0 on success, errno otherwise
1305 */
1306static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1307                   int flags)
1308{
1309        struct sock *sk = sock->sk;
1310        struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
1311        struct msghdr m = {NULL,};
1312        struct sk_buff *buf;
1313        struct tipc_msg *msg;
1314        unsigned int timeout;
1315        int res;
1316
1317        lock_sock(sk);
1318
1319        /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
1320        if (sock->state == SS_READY) {
1321                res = -EOPNOTSUPP;
1322                goto exit;
1323        }
1324
1325        /* For now, TIPC does not support the non-blocking form of connect() */
1326        if (flags & O_NONBLOCK) {
1327                res = -EOPNOTSUPP;
1328                goto exit;
1329        }
1330
1331        /* Issue Posix-compliant error code if socket is in the wrong state */
1332        if (sock->state == SS_LISTENING) {
1333                res = -EOPNOTSUPP;
1334                goto exit;
1335        }
1336        if (sock->state == SS_CONNECTING) {
1337                res = -EALREADY;
1338                goto exit;
1339        }
1340        if (sock->state != SS_UNCONNECTED) {
1341                res = -EISCONN;
1342                goto exit;
1343        }
1344
1345        /*
1346         * Reject connection attempt using multicast address
1347         *
1348         * Note: send_msg() validates the rest of the address fields,
1349         *       so there's no need to do it here
1350         */
1351        if (dst->addrtype == TIPC_ADDR_MCAST) {
1352                res = -EINVAL;
1353                goto exit;
1354        }
1355
1356        /* Reject any messages already in receive queue (very unlikely) */
1357        reject_rx_queue(sk);
1358
1359        /* Send a 'SYN-' to destination */
1360        m.msg_name = dest;
1361        m.msg_namelen = destlen;
1362        res = send_msg(NULL, sock, &m, 0);
1363        if (res < 0)
1364                goto exit;
1365
1366        /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1367        timeout = tipc_sk(sk)->conn_timeout;
1368        release_sock(sk);
1369        res = wait_event_interruptible_timeout(*sk_sleep(sk),
1370                        (!skb_queue_empty(&sk->sk_receive_queue) ||
1371                        (sock->state != SS_CONNECTING)),
1372                        timeout ? (long)msecs_to_jiffies(timeout)
1373                                : MAX_SCHEDULE_TIMEOUT);
1374        lock_sock(sk);
1375
1376        if (res > 0) {
1377                buf = skb_peek(&sk->sk_receive_queue);
1378                if (buf != NULL) {
1379                        msg = buf_msg(buf);
1380                        res = auto_connect(sock, msg);
1381                        if (!res) {
1382                                if (!msg_data_sz(msg))
1383                                        advance_rx_queue(sk);
1384                        }
1385                } else {
1386                        if (sock->state == SS_CONNECTED)
1387                                res = -EISCONN;
1388                        else
1389                                res = -ECONNREFUSED;
1390                }
1391        } else {
1392                if (res == 0)
1393                        res = -ETIMEDOUT;
1394                else
1395                        ; /* leave "res" unchanged */
1396                sock->state = SS_DISCONNECTING;
1397        }
1398
1399exit:
1400        release_sock(sk);
1401        return res;
1402}
1403
1404/**
1405 * listen - allow socket to listen for incoming connections
1406 * @sock: socket structure
1407 * @len: (unused)
1408 *
1409 * Returns 0 on success, errno otherwise
1410 */
1411static int listen(struct socket *sock, int len)
1412{
1413        struct sock *sk = sock->sk;
1414        int res;
1415
1416        lock_sock(sk);
1417
1418        if (sock->state != SS_UNCONNECTED)
1419                res = -EINVAL;
1420        else {
1421                sock->state = SS_LISTENING;
1422                res = 0;
1423        }
1424
1425        release_sock(sk);
1426        return res;
1427}
1428
1429/**
1430 * accept - wait for connection request
1431 * @sock: listening socket
1432 * @newsock: new socket that is to be connected
1433 * @flags: file-related flags associated with socket
1434 *
1435 * Returns 0 on success, errno otherwise
1436 */
1437static int accept(struct socket *sock, struct socket *new_sock, int flags)
1438{
1439        struct sock *sk = sock->sk;
1440        struct sk_buff *buf;
1441        int res;
1442
1443        lock_sock(sk);
1444
1445        if (sock->state != SS_LISTENING) {
1446                res = -EINVAL;
1447                goto exit;
1448        }
1449
1450        while (skb_queue_empty(&sk->sk_receive_queue)) {
1451                if (flags & O_NONBLOCK) {
1452                        res = -EWOULDBLOCK;
1453                        goto exit;
1454                }
1455                release_sock(sk);
1456                res = wait_event_interruptible(*sk_sleep(sk),
1457                                (!skb_queue_empty(&sk->sk_receive_queue)));
1458                lock_sock(sk);
1459                if (res)
1460                        goto exit;
1461        }
1462
1463        buf = skb_peek(&sk->sk_receive_queue);
1464
1465        res = tipc_create(sock_net(sock->sk), new_sock, 0, 0);
1466        if (!res) {
1467                struct sock *new_sk = new_sock->sk;
1468                struct tipc_sock *new_tsock = tipc_sk(new_sk);
1469                struct tipc_port *new_tport = new_tsock->p;
1470                u32 new_ref = new_tport->ref;
1471                struct tipc_msg *msg = buf_msg(buf);
1472
1473                lock_sock(new_sk);
1474
1475                /*
1476                 * Reject any stray messages received by new socket
1477                 * before the socket lock was taken (very, very unlikely)
1478                 */
1479                reject_rx_queue(new_sk);
1480
1481                /* Connect new socket to it's peer */
1482                new_tsock->peer_name.ref = msg_origport(msg);
1483                new_tsock->peer_name.node = msg_orignode(msg);
1484                tipc_connect2port(new_ref, &new_tsock->peer_name);
1485                new_sock->state = SS_CONNECTED;
1486
1487                tipc_set_portimportance(new_ref, msg_importance(msg));
1488                if (msg_named(msg)) {
1489                        new_tport->conn_type = msg_nametype(msg);
1490                        new_tport->conn_instance = msg_nameinst(msg);
1491                }
1492
1493                /*
1494                 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
1495                 * Respond to 'SYN+' by queuing it on new socket.
1496                 */
1497                if (!msg_data_sz(msg)) {
1498                        struct msghdr m = {NULL,};
1499
1500                        advance_rx_queue(sk);
1501                        send_packet(NULL, new_sock, &m, 0);
1502                } else {
1503                        __skb_dequeue(&sk->sk_receive_queue);
1504                        __skb_queue_head(&new_sk->sk_receive_queue, buf);
1505                }
1506                release_sock(new_sk);
1507        }
1508exit:
1509        release_sock(sk);
1510        return res;
1511}
1512
1513/**
1514 * shutdown - shutdown socket connection
1515 * @sock: socket structure
1516 * @how: direction to close (must be SHUT_RDWR)
1517 *
1518 * Terminates connection (if necessary), then purges socket's receive queue.
1519 *
1520 * Returns 0 on success, errno otherwise
1521 */
1522static int shutdown(struct socket *sock, int how)
1523{
1524        struct sock *sk = sock->sk;
1525        struct tipc_port *tport = tipc_sk_port(sk);
1526        struct sk_buff *buf;
1527        int res;
1528
1529        if (how != SHUT_RDWR)
1530                return -EINVAL;
1531
1532        lock_sock(sk);
1533
1534        switch (sock->state) {
1535        case SS_CONNECTING:
1536        case SS_CONNECTED:
1537
1538restart:
1539                /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
1540                buf = __skb_dequeue(&sk->sk_receive_queue);
1541                if (buf) {
1542                        atomic_dec(&tipc_queue_size);
1543                        if (TIPC_SKB_CB(buf)->handle != 0) {
1544                                kfree_skb(buf);
1545                                goto restart;
1546                        }
1547                        tipc_disconnect(tport->ref);
1548                        tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
1549                } else {
1550                        tipc_shutdown(tport->ref);
1551                }
1552
1553                sock->state = SS_DISCONNECTING;
1554
1555                /* fall through */
1556
1557        case SS_DISCONNECTING:
1558
1559                /* Discard any unreceived messages; wake up sleeping tasks */
1560                discard_rx_queue(sk);
1561                if (waitqueue_active(sk_sleep(sk)))
1562                        wake_up_interruptible(sk_sleep(sk));
1563                res = 0;
1564                break;
1565
1566        default:
1567                res = -ENOTCONN;
1568        }
1569
1570        release_sock(sk);
1571        return res;
1572}
1573
1574/**
1575 * setsockopt - set socket option
1576 * @sock: socket structure
1577 * @lvl: option level
1578 * @opt: option identifier
1579 * @ov: pointer to new option value
1580 * @ol: length of option value
1581 *
1582 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
1583 * (to ease compatibility).
1584 *
1585 * Returns 0 on success, errno otherwise
1586 */
1587static int setsockopt(struct socket *sock,
1588                      int lvl, int opt, char __user *ov, unsigned int ol)
1589{
1590        struct sock *sk = sock->sk;
1591        struct tipc_port *tport = tipc_sk_port(sk);
1592        u32 value;
1593        int res;
1594
1595        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1596                return 0;
1597        if (lvl != SOL_TIPC)
1598                return -ENOPROTOOPT;
1599        if (ol < sizeof(value))
1600                return -EINVAL;
1601        res = get_user(value, (u32 __user *)ov);
1602        if (res)
1603                return res;
1604
1605        lock_sock(sk);
1606
1607        switch (opt) {
1608        case TIPC_IMPORTANCE:
1609                res = tipc_set_portimportance(tport->ref, value);
1610                break;
1611        case TIPC_SRC_DROPPABLE:
1612                if (sock->type != SOCK_STREAM)
1613                        res = tipc_set_portunreliable(tport->ref, value);
1614                else
1615                        res = -ENOPROTOOPT;
1616                break;
1617        case TIPC_DEST_DROPPABLE:
1618                res = tipc_set_portunreturnable(tport->ref, value);
1619                break;
1620        case TIPC_CONN_TIMEOUT:
1621                tipc_sk(sk)->conn_timeout = value;
1622                /* no need to set "res", since already 0 at this point */
1623                break;
1624        default:
1625                res = -EINVAL;
1626        }
1627
1628        release_sock(sk);
1629
1630        return res;
1631}
1632
1633/**
1634 * getsockopt - get socket option
1635 * @sock: socket structure
1636 * @lvl: option level
1637 * @opt: option identifier
1638 * @ov: receptacle for option value
1639 * @ol: receptacle for length of option value
1640 *
1641 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
1642 * (to ease compatibility).
1643 *
1644 * Returns 0 on success, errno otherwise
1645 */
1646static int getsockopt(struct socket *sock,
1647                      int lvl, int opt, char __user *ov, int __user *ol)
1648{
1649        struct sock *sk = sock->sk;
1650        struct tipc_port *tport = tipc_sk_port(sk);
1651        int len;
1652        u32 value;
1653        int res;
1654
1655        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1656                return put_user(0, ol);
1657        if (lvl != SOL_TIPC)
1658                return -ENOPROTOOPT;
1659        res = get_user(len, ol);
1660        if (res)
1661                return res;
1662
1663        lock_sock(sk);
1664
1665        switch (opt) {
1666        case TIPC_IMPORTANCE:
1667                res = tipc_portimportance(tport->ref, &value);
1668                break;
1669        case TIPC_SRC_DROPPABLE:
1670                res = tipc_portunreliable(tport->ref, &value);
1671                break;
1672        case TIPC_DEST_DROPPABLE:
1673                res = tipc_portunreturnable(tport->ref, &value);
1674                break;
1675        case TIPC_CONN_TIMEOUT:
1676                value = tipc_sk(sk)->conn_timeout;
1677                /* no need to set "res", since already 0 at this point */
1678                break;
1679        case TIPC_NODE_RECVQ_DEPTH:
1680                value = (u32)atomic_read(&tipc_queue_size);
1681                break;
1682        case TIPC_SOCK_RECVQ_DEPTH:
1683                value = skb_queue_len(&sk->sk_receive_queue);
1684                break;
1685        default:
1686                res = -EINVAL;
1687        }
1688
1689        release_sock(sk);
1690
1691        if (res)
1692                return res;     /* "get" failed */
1693
1694        if (len < sizeof(value))
1695                return -EINVAL;
1696
1697        if (copy_to_user(ov, &value, sizeof(value)))
1698                return -EFAULT;
1699
1700        return put_user(sizeof(value), ol);
1701}
1702
1703/* Protocol switches for the various types of TIPC sockets */
1704
1705static const struct proto_ops msg_ops = {
1706        .owner          = THIS_MODULE,
1707        .family         = AF_TIPC,
1708        .release        = release,
1709        .bind           = bind,
1710        .connect        = connect,
1711        .socketpair     = sock_no_socketpair,
1712        .accept         = sock_no_accept,
1713        .getname        = get_name,
1714        .poll           = poll,
1715        .ioctl          = sock_no_ioctl,
1716        .listen         = sock_no_listen,
1717        .shutdown       = shutdown,
1718        .setsockopt     = setsockopt,
1719        .getsockopt     = getsockopt,
1720        .sendmsg        = send_msg,
1721        .recvmsg        = recv_msg,
1722        .mmap           = sock_no_mmap,
1723        .sendpage       = sock_no_sendpage
1724};
1725
1726static const struct proto_ops packet_ops = {
1727        .owner          = THIS_MODULE,
1728        .family         = AF_TIPC,
1729        .release        = release,
1730        .bind           = bind,
1731        .connect        = connect,
1732        .socketpair     = sock_no_socketpair,
1733        .accept         = accept,
1734        .getname        = get_name,
1735        .poll           = poll,
1736        .ioctl          = sock_no_ioctl,
1737        .listen         = listen,
1738        .shutdown       = shutdown,
1739        .setsockopt     = setsockopt,
1740        .getsockopt     = getsockopt,
1741        .sendmsg        = send_packet,
1742        .recvmsg        = recv_msg,
1743        .mmap           = sock_no_mmap,
1744        .sendpage       = sock_no_sendpage
1745};
1746
1747static const struct proto_ops stream_ops = {
1748        .owner          = THIS_MODULE,
1749        .family         = AF_TIPC,
1750        .release        = release,
1751        .bind           = bind,
1752        .connect        = connect,
1753        .socketpair     = sock_no_socketpair,
1754        .accept         = accept,
1755        .getname        = get_name,
1756        .poll           = poll,
1757        .ioctl          = sock_no_ioctl,
1758        .listen         = listen,
1759        .shutdown       = shutdown,
1760        .setsockopt     = setsockopt,
1761        .getsockopt     = getsockopt,
1762        .sendmsg        = send_stream,
1763        .recvmsg        = recv_stream,
1764        .mmap           = sock_no_mmap,
1765        .sendpage       = sock_no_sendpage
1766};
1767
1768static const struct net_proto_family tipc_family_ops = {
1769        .owner          = THIS_MODULE,
1770        .family         = AF_TIPC,
1771        .create         = tipc_create
1772};
1773
1774static struct proto tipc_proto = {
1775        .name           = "TIPC",
1776        .owner          = THIS_MODULE,
1777        .obj_size       = sizeof(struct tipc_sock)
1778};
1779
1780/**
1781 * tipc_socket_init - initialize TIPC socket interface
1782 *
1783 * Returns 0 on success, errno otherwise
1784 */
1785int tipc_socket_init(void)
1786{
1787        int res;
1788
1789        res = proto_register(&tipc_proto, 1);
1790        if (res) {
1791                pr_err("Failed to register TIPC protocol type\n");
1792                goto out;
1793        }
1794
1795        res = sock_register(&tipc_family_ops);
1796        if (res) {
1797                pr_err("Failed to register TIPC socket type\n");
1798                proto_unregister(&tipc_proto);
1799                goto out;
1800        }
1801
1802        sockets_enabled = 1;
1803 out:
1804        return res;
1805}
1806
1807/**
1808 * tipc_socket_stop - stop TIPC socket interface
1809 */
1810void tipc_socket_stop(void)
1811{
1812        if (!sockets_enabled)
1813                return;
1814
1815        sockets_enabled = 0;
1816        sock_unregister(tipc_family_ops.family);
1817        proto_unregister(&tipc_proto);
1818}
1819
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.