linux/net/tipc/socket.c
<<
>>
Prefs
   1/*
   2 * net/tipc/socket.c: TIPC socket API
   3 *
   4 * Copyright (c) 2001-2007, Ericsson AB
   5 * Copyright (c) 2004-2008, Wind River Systems
   6 * All rights reserved.
   7 *
   8 * Redistribution and use in source and binary forms, with or without
   9 * modification, are permitted provided that the following conditions are met:
  10 *
  11 * 1. Redistributions of source code must retain the above copyright
  12 *    notice, this list of conditions and the following disclaimer.
  13 * 2. Redistributions in binary form must reproduce the above copyright
  14 *    notice, this list of conditions and the following disclaimer in the
  15 *    documentation and/or other materials provided with the distribution.
  16 * 3. Neither the names of the copyright holders nor the names of its
  17 *    contributors may be used to endorse or promote products derived from
  18 *    this software without specific prior written permission.
  19 *
  20 * Alternatively, this software may be distributed under the terms of the
  21 * GNU General Public License ("GPL") version 2 as published by the Free
  22 * Software Foundation.
  23 *
  24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34 * POSSIBILITY OF SUCH DAMAGE.
  35 */
  36
  37#include <linux/module.h>
  38#include <linux/types.h>
  39#include <linux/net.h>
  40#include <linux/socket.h>
  41#include <linux/errno.h>
  42#include <linux/mm.h>
  43#include <linux/poll.h>
  44#include <linux/fcntl.h>
  45#include <linux/gfp.h>
  46#include <asm/string.h>
  47#include <asm/atomic.h>
  48#include <net/sock.h>
  49
  50#include <linux/tipc.h>
  51#include <linux/tipc_config.h>
  52#include <net/tipc/tipc_msg.h>
  53#include <net/tipc/tipc_port.h>
  54
  55#include "core.h"
  56
  57#define SS_LISTENING    -1      /* socket is listening */
  58#define SS_READY        -2      /* socket is connectionless */
  59
  60#define OVERLOAD_LIMIT_BASE     5000
  61#define CONN_TIMEOUT_DEFAULT    8000    /* default connect timeout = 8s */
  62
  63struct tipc_sock {
  64        struct sock sk;
  65        struct tipc_port *p;
  66        struct tipc_portid peer_name;
  67};
  68
  69#define tipc_sk(sk) ((struct tipc_sock *)(sk))
  70#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p))
  71
  72static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
  73static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
  74static void wakeupdispatch(struct tipc_port *tport);
  75
  76static const struct proto_ops packet_ops;
  77static const struct proto_ops stream_ops;
  78static const struct proto_ops msg_ops;
  79
  80static struct proto tipc_proto;
  81
  82static int sockets_enabled = 0;
  83
  84static atomic_t tipc_queue_size = ATOMIC_INIT(0);
  85
  86/*
  87 * Revised TIPC socket locking policy:
  88 *
  89 * Most socket operations take the standard socket lock when they start
  90 * and hold it until they finish (or until they need to sleep).  Acquiring
  91 * this lock grants the owner exclusive access to the fields of the socket
  92 * data structures, with the exception of the backlog queue.  A few socket
  93 * operations can be done without taking the socket lock because they only
  94 * read socket information that never changes during the life of the socket.
  95 *
  96 * Socket operations may acquire the lock for the associated TIPC port if they
  97 * need to perform an operation on the port.  If any routine needs to acquire
  98 * both the socket lock and the port lock it must take the socket lock first
  99 * to avoid the risk of deadlock.
 100 *
 101 * The dispatcher handling incoming messages cannot grab the socket lock in
 102 * the standard fashion, since invoked it runs at the BH level and cannot block.
 103 * Instead, it checks to see if the socket lock is currently owned by someone,
 104 * and either handles the message itself or adds it to the socket's backlog
 105 * queue; in the latter case the queued message is processed once the process
 106 * owning the socket lock releases it.
 107 *
 108 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
 109 * the problem of a blocked socket operation preventing any other operations
 110 * from occurring.  However, applications must be careful if they have
 111 * multiple threads trying to send (or receive) on the same socket, as these
 112 * operations might interfere with each other.  For example, doing a connect
 113 * and a receive at the same time might allow the receive to consume the
 114 * ACK message meant for the connect.  While additional work could be done
 115 * to try and overcome this, it doesn't seem to be worthwhile at the present.
 116 *
 117 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
 118 * that another operation that must be performed in a non-blocking manner is
 119 * not delayed for very long because the lock has already been taken.
 120 *
 121 * NOTE: This code assumes that certain fields of a port/socket pair are
 122 * constant over its lifetime; such fields can be examined without taking
 123 * the socket lock and/or port lock, and do not need to be re-read even
 124 * after resuming processing after waiting.  These fields include:
 125 *   - socket type
 126 *   - pointer to socket sk structure (aka tipc_sock structure)
 127 *   - pointer to port structure
 128 *   - port reference
 129 */
 130
 131/**
 132 * advance_rx_queue - discard first buffer in socket receive queue
 133 *
 134 * Caller must hold socket lock
 135 */
 136
 137static void advance_rx_queue(struct sock *sk)
 138{
 139        buf_discard(__skb_dequeue(&sk->sk_receive_queue));
 140        atomic_dec(&tipc_queue_size);
 141}
 142
 143/**
 144 * discard_rx_queue - discard all buffers in socket receive queue
 145 *
 146 * Caller must hold socket lock
 147 */
 148
 149static void discard_rx_queue(struct sock *sk)
 150{
 151        struct sk_buff *buf;
 152
 153        while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
 154                atomic_dec(&tipc_queue_size);
 155                buf_discard(buf);
 156        }
 157}
 158
 159/**
 160 * reject_rx_queue - reject all buffers in socket receive queue
 161 *
 162 * Caller must hold socket lock
 163 */
 164
 165static void reject_rx_queue(struct sock *sk)
 166{
 167        struct sk_buff *buf;
 168
 169        while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
 170                tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
 171                atomic_dec(&tipc_queue_size);
 172        }
 173}
 174
 175/**
 176 * tipc_create - create a TIPC socket
 177 * @net: network namespace (must be default network)
 178 * @sock: pre-allocated socket structure
 179 * @protocol: protocol indicator (must be 0)
 180 * @kern: caused by kernel or by userspace?
 181 *
 182 * This routine creates additional data structures used by the TIPC socket,
 183 * initializes them, and links them together.
 184 *
 185 * Returns 0 on success, errno otherwise
 186 */
 187
 188static int tipc_create(struct net *net, struct socket *sock, int protocol,
 189                       int kern)
 190{
 191        const struct proto_ops *ops;
 192        socket_state state;
 193        struct sock *sk;
 194        struct tipc_port *tp_ptr;
 195
 196        /* Validate arguments */
 197
 198        if (!net_eq(net, &init_net))
 199                return -EAFNOSUPPORT;
 200
 201        if (unlikely(protocol != 0))
 202                return -EPROTONOSUPPORT;
 203
 204        switch (sock->type) {
 205        case SOCK_STREAM:
 206                ops = &stream_ops;
 207                state = SS_UNCONNECTED;
 208                break;
 209        case SOCK_SEQPACKET:
 210                ops = &packet_ops;
 211                state = SS_UNCONNECTED;
 212                break;
 213        case SOCK_DGRAM:
 214        case SOCK_RDM:
 215                ops = &msg_ops;
 216                state = SS_READY;
 217                break;
 218        default:
 219                return -EPROTOTYPE;
 220        }
 221
 222        /* Allocate socket's protocol area */
 223
 224        sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
 225        if (sk == NULL)
 226                return -ENOMEM;
 227
 228        /* Allocate TIPC port for socket to use */
 229
 230        tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
 231                                     TIPC_LOW_IMPORTANCE);
 232        if (unlikely(!tp_ptr)) {
 233                sk_free(sk);
 234                return -ENOMEM;
 235        }
 236
 237        /* Finish initializing socket data structures */
 238
 239        sock->ops = ops;
 240        sock->state = state;
 241
 242        sock_init_data(sock, sk);
 243        sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
 244        sk->sk_backlog_rcv = backlog_rcv;
 245        tipc_sk(sk)->p = tp_ptr;
 246
 247        spin_unlock_bh(tp_ptr->lock);
 248
 249        if (sock->state == SS_READY) {
 250                tipc_set_portunreturnable(tp_ptr->ref, 1);
 251                if (sock->type == SOCK_DGRAM)
 252                        tipc_set_portunreliable(tp_ptr->ref, 1);
 253        }
 254
 255        atomic_inc(&tipc_user_count);
 256        return 0;
 257}
 258
 259/**
 260 * release - destroy a TIPC socket
 261 * @sock: socket to destroy
 262 *
 263 * This routine cleans up any messages that are still queued on the socket.
 264 * For DGRAM and RDM socket types, all queued messages are rejected.
 265 * For SEQPACKET and STREAM socket types, the first message is rejected
 266 * and any others are discarded.  (If the first message on a STREAM socket
 267 * is partially-read, it is discarded and the next one is rejected instead.)
 268 *
 269 * NOTE: Rejected messages are not necessarily returned to the sender!  They
 270 * are returned or discarded according to the "destination droppable" setting
 271 * specified for the message by the sender.
 272 *
 273 * Returns 0 on success, errno otherwise
 274 */
 275
 276static int release(struct socket *sock)
 277{
 278        struct sock *sk = sock->sk;
 279        struct tipc_port *tport;
 280        struct sk_buff *buf;
 281        int res;
 282
 283        /*
 284         * Exit if socket isn't fully initialized (occurs when a failed accept()
 285         * releases a pre-allocated child socket that was never used)
 286         */
 287
 288        if (sk == NULL)
 289                return 0;
 290
 291        tport = tipc_sk_port(sk);
 292        lock_sock(sk);
 293
 294        /*
 295         * Reject all unreceived messages, except on an active connection
 296         * (which disconnects locally & sends a 'FIN+' to peer)
 297         */
 298
 299        while (sock->state != SS_DISCONNECTING) {
 300                buf = __skb_dequeue(&sk->sk_receive_queue);
 301                if (buf == NULL)
 302                        break;
 303                atomic_dec(&tipc_queue_size);
 304                if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf)))
 305                        buf_discard(buf);
 306                else {
 307                        if ((sock->state == SS_CONNECTING) ||
 308                            (sock->state == SS_CONNECTED)) {
 309                                sock->state = SS_DISCONNECTING;
 310                                tipc_disconnect(tport->ref);
 311                        }
 312                        tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
 313                }
 314        }
 315
 316        /*
 317         * Delete TIPC port; this ensures no more messages are queued
 318         * (also disconnects an active connection & sends a 'FIN-' to peer)
 319         */
 320
 321        res = tipc_deleteport(tport->ref);
 322
 323        /* Discard any remaining (connection-based) messages in receive queue */
 324
 325        discard_rx_queue(sk);
 326
 327        /* Reject any messages that accumulated in backlog queue */
 328
 329        sock->state = SS_DISCONNECTING;
 330        release_sock(sk);
 331
 332        sock_put(sk);
 333        sock->sk = NULL;
 334
 335        atomic_dec(&tipc_user_count);
 336        return res;
 337}
 338
 339/**
 340 * bind - associate or disassocate TIPC name(s) with a socket
 341 * @sock: socket structure
 342 * @uaddr: socket address describing name(s) and desired operation
 343 * @uaddr_len: size of socket address data structure
 344 *
 345 * Name and name sequence binding is indicated using a positive scope value;
 346 * a negative scope value unbinds the specified name.  Specifying no name
 347 * (i.e. a socket address length of 0) unbinds all names from the socket.
 348 *
 349 * Returns 0 on success, errno otherwise
 350 *
 351 * NOTE: This routine doesn't need to take the socket lock since it doesn't
 352 *       access any non-constant socket information.
 353 */
 354
 355static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
 356{
 357        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 358        u32 portref = tipc_sk_port(sock->sk)->ref;
 359
 360        if (unlikely(!uaddr_len))
 361                return tipc_withdraw(portref, 0, NULL);
 362
 363        if (uaddr_len < sizeof(struct sockaddr_tipc))
 364                return -EINVAL;
 365        if (addr->family != AF_TIPC)
 366                return -EAFNOSUPPORT;
 367
 368        if (addr->addrtype == TIPC_ADDR_NAME)
 369                addr->addr.nameseq.upper = addr->addr.nameseq.lower;
 370        else if (addr->addrtype != TIPC_ADDR_NAMESEQ)
 371                return -EAFNOSUPPORT;
 372
 373        return (addr->scope > 0) ?
 374                tipc_publish(portref, addr->scope, &addr->addr.nameseq) :
 375                tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq);
 376}
 377
 378/**
 379 * get_name - get port ID of socket or peer socket
 380 * @sock: socket structure
 381 * @uaddr: area for returned socket address
 382 * @uaddr_len: area for returned length of socket address
 383 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID
 384 *
 385 * Returns 0 on success, errno otherwise
 386 *
 387 * NOTE: This routine doesn't need to take the socket lock since it only
 388 *       accesses socket information that is unchanging (or which changes in
 389 *       a completely predictable manner).
 390 */
 391
 392static int get_name(struct socket *sock, struct sockaddr *uaddr,
 393                    int *uaddr_len, int peer)
 394{
 395        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
 396        struct tipc_sock *tsock = tipc_sk(sock->sk);
 397
 398        if (peer) {
 399                if ((sock->state != SS_CONNECTED) &&
 400                        ((peer != 2) || (sock->state != SS_DISCONNECTING)))
 401                        return -ENOTCONN;
 402                addr->addr.id.ref = tsock->peer_name.ref;
 403                addr->addr.id.node = tsock->peer_name.node;
 404        } else {
 405                tipc_ownidentity(tsock->p->ref, &addr->addr.id);
 406        }
 407
 408        *uaddr_len = sizeof(*addr);
 409        addr->addrtype = TIPC_ADDR_ID;
 410        addr->family = AF_TIPC;
 411        addr->scope = 0;
 412        addr->addr.name.domain = 0;
 413
 414        return 0;
 415}
 416
 417/**
 418 * poll - read and possibly block on pollmask
 419 * @file: file structure associated with the socket
 420 * @sock: socket for which to calculate the poll bits
 421 * @wait: ???
 422 *
 423 * Returns pollmask value
 424 *
 425 * COMMENTARY:
 426 * It appears that the usual socket locking mechanisms are not useful here
 427 * since the pollmask info is potentially out-of-date the moment this routine
 428 * exits.  TCP and other protocols seem to rely on higher level poll routines
 429 * to handle any preventable race conditions, so TIPC will do the same ...
 430 *
 431 * TIPC sets the returned events as follows:
 432 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty
 433 *    or if a connection-oriented socket is does not have an active connection
 434 *    (i.e. a read operation will not block).
 435 * b) POLLOUT is set except when a socket's connection has been terminated
 436 *    (i.e. a write operation will not block).
 437 * c) POLLHUP is set when a socket's connection has been terminated.
 438 *
 439 * IMPORTANT: The fact that a read or write operation will not block does NOT
 440 * imply that the operation will succeed!
 441 */
 442
 443static unsigned int poll(struct file *file, struct socket *sock,
 444                         poll_table *wait)
 445{
 446        struct sock *sk = sock->sk;
 447        u32 mask;
 448
 449        poll_wait(file, sk_sleep(sk), wait);
 450
 451        if (!skb_queue_empty(&sk->sk_receive_queue) ||
 452            (sock->state == SS_UNCONNECTED) ||
 453            (sock->state == SS_DISCONNECTING))
 454                mask = (POLLRDNORM | POLLIN);
 455        else
 456                mask = 0;
 457
 458        if (sock->state == SS_DISCONNECTING)
 459                mask |= POLLHUP;
 460        else
 461                mask |= POLLOUT;
 462
 463        return mask;
 464}
 465
 466/**
 467 * dest_name_check - verify user is permitted to send to specified port name
 468 * @dest: destination address
 469 * @m: descriptor for message to be sent
 470 *
 471 * Prevents restricted configuration commands from being issued by
 472 * unauthorized users.
 473 *
 474 * Returns 0 if permission is granted, otherwise errno
 475 */
 476
 477static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
 478{
 479        struct tipc_cfg_msg_hdr hdr;
 480
 481        if (likely(dest->addr.name.name.type >= TIPC_RESERVED_TYPES))
 482                return 0;
 483        if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
 484                return 0;
 485        if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
 486                return -EACCES;
 487
 488        if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr)))
 489                return -EFAULT;
 490        if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN)))
 491                return -EACCES;
 492
 493        return 0;
 494}
 495
 496/**
 497 * send_msg - send message in connectionless manner
 498 * @iocb: if NULL, indicates that socket lock is already held
 499 * @sock: socket structure
 500 * @m: message to send
 501 * @total_len: length of message
 502 *
 503 * Message must have an destination specified explicitly.
 504 * Used for SOCK_RDM and SOCK_DGRAM messages,
 505 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections.
 506 * (Note: 'SYN+' is prohibited on SOCK_STREAM.)
 507 *
 508 * Returns the number of bytes sent on success, or errno otherwise
 509 */
 510
 511static int send_msg(struct kiocb *iocb, struct socket *sock,
 512                    struct msghdr *m, size_t total_len)
 513{
 514        struct sock *sk = sock->sk;
 515        struct tipc_port *tport = tipc_sk_port(sk);
 516        struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
 517        int needs_conn;
 518        int res = -EINVAL;
 519
 520        if (unlikely(!dest))
 521                return -EDESTADDRREQ;
 522        if (unlikely((m->msg_namelen < sizeof(*dest)) ||
 523                     (dest->family != AF_TIPC)))
 524                return -EINVAL;
 525
 526        if (iocb)
 527                lock_sock(sk);
 528
 529        needs_conn = (sock->state != SS_READY);
 530        if (unlikely(needs_conn)) {
 531                if (sock->state == SS_LISTENING) {
 532                        res = -EPIPE;
 533                        goto exit;
 534                }
 535                if (sock->state != SS_UNCONNECTED) {
 536                        res = -EISCONN;
 537                        goto exit;
 538                }
 539                if ((tport->published) ||
 540                    ((sock->type == SOCK_STREAM) && (total_len != 0))) {
 541                        res = -EOPNOTSUPP;
 542                        goto exit;
 543                }
 544                if (dest->addrtype == TIPC_ADDR_NAME) {
 545                        tport->conn_type = dest->addr.name.name.type;
 546                        tport->conn_instance = dest->addr.name.name.instance;
 547                }
 548
 549                /* Abort any pending connection attempts (very unlikely) */
 550
 551                reject_rx_queue(sk);
 552        }
 553
 554        do {
 555                if (dest->addrtype == TIPC_ADDR_NAME) {
 556                        if ((res = dest_name_check(dest, m)))
 557                                break;
 558                        res = tipc_send2name(tport->ref,
 559                                             &dest->addr.name.name,
 560                                             dest->addr.name.domain,
 561                                             m->msg_iovlen,
 562                                             m->msg_iov);
 563                }
 564                else if (dest->addrtype == TIPC_ADDR_ID) {
 565                        res = tipc_send2port(tport->ref,
 566                                             &dest->addr.id,
 567                                             m->msg_iovlen,
 568                                             m->msg_iov);
 569                }
 570                else if (dest->addrtype == TIPC_ADDR_MCAST) {
 571                        if (needs_conn) {
 572                                res = -EOPNOTSUPP;
 573                                break;
 574                        }
 575                        if ((res = dest_name_check(dest, m)))
 576                                break;
 577                        res = tipc_multicast(tport->ref,
 578                                             &dest->addr.nameseq,
 579                                             0,
 580                                             m->msg_iovlen,
 581                                             m->msg_iov);
 582                }
 583                if (likely(res != -ELINKCONG)) {
 584                        if (needs_conn && (res >= 0)) {
 585                                sock->state = SS_CONNECTING;
 586                        }
 587                        break;
 588                }
 589                if (m->msg_flags & MSG_DONTWAIT) {
 590                        res = -EWOULDBLOCK;
 591                        break;
 592                }
 593                release_sock(sk);
 594                res = wait_event_interruptible(*sk_sleep(sk),
 595                                               !tport->congested);
 596                lock_sock(sk);
 597                if (res)
 598                        break;
 599        } while (1);
 600
 601exit:
 602        if (iocb)
 603                release_sock(sk);
 604        return res;
 605}
 606
 607/**
 608 * send_packet - send a connection-oriented message
 609 * @iocb: if NULL, indicates that socket lock is already held
 610 * @sock: socket structure
 611 * @m: message to send
 612 * @total_len: length of message
 613 *
 614 * Used for SOCK_SEQPACKET messages and SOCK_STREAM data.
 615 *
 616 * Returns the number of bytes sent on success, or errno otherwise
 617 */
 618
 619static int send_packet(struct kiocb *iocb, struct socket *sock,
 620                       struct msghdr *m, size_t total_len)
 621{
 622        struct sock *sk = sock->sk;
 623        struct tipc_port *tport = tipc_sk_port(sk);
 624        struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
 625        int res;
 626
 627        /* Handle implied connection establishment */
 628
 629        if (unlikely(dest))
 630                return send_msg(iocb, sock, m, total_len);
 631
 632        if (iocb)
 633                lock_sock(sk);
 634
 635        do {
 636                if (unlikely(sock->state != SS_CONNECTED)) {
 637                        if (sock->state == SS_DISCONNECTING)
 638                                res = -EPIPE;
 639                        else
 640                                res = -ENOTCONN;
 641                        break;
 642                }
 643
 644                res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov);
 645                if (likely(res != -ELINKCONG)) {
 646                        break;
 647                }
 648                if (m->msg_flags & MSG_DONTWAIT) {
 649                        res = -EWOULDBLOCK;
 650                        break;
 651                }
 652                release_sock(sk);
 653                res = wait_event_interruptible(*sk_sleep(sk),
 654                        (!tport->congested || !tport->connected));
 655                lock_sock(sk);
 656                if (res)
 657                        break;
 658        } while (1);
 659
 660        if (iocb)
 661                release_sock(sk);
 662        return res;
 663}
 664
 665/**
 666 * send_stream - send stream-oriented data
 667 * @iocb: (unused)
 668 * @sock: socket structure
 669 * @m: data to send
 670 * @total_len: total length of data to be sent
 671 *
 672 * Used for SOCK_STREAM data.
 673 *
 674 * Returns the number of bytes sent on success (or partial success),
 675 * or errno if no data sent
 676 */
 677
 678static int send_stream(struct kiocb *iocb, struct socket *sock,
 679                       struct msghdr *m, size_t total_len)
 680{
 681        struct sock *sk = sock->sk;
 682        struct tipc_port *tport = tipc_sk_port(sk);
 683        struct msghdr my_msg;
 684        struct iovec my_iov;
 685        struct iovec *curr_iov;
 686        int curr_iovlen;
 687        char __user *curr_start;
 688        u32 hdr_size;
 689        int curr_left;
 690        int bytes_to_send;
 691        int bytes_sent;
 692        int res;
 693
 694        lock_sock(sk);
 695
 696        /* Handle special cases where there is no connection */
 697
 698        if (unlikely(sock->state != SS_CONNECTED)) {
 699                if (sock->state == SS_UNCONNECTED) {
 700                        res = send_packet(NULL, sock, m, total_len);
 701                        goto exit;
 702                } else if (sock->state == SS_DISCONNECTING) {
 703                        res = -EPIPE;
 704                        goto exit;
 705                } else {
 706                        res = -ENOTCONN;
 707                        goto exit;
 708                }
 709        }
 710
 711        if (unlikely(m->msg_name)) {
 712                res = -EISCONN;
 713                goto exit;
 714        }
 715
 716        /*
 717         * Send each iovec entry using one or more messages
 718         *
 719         * Note: This algorithm is good for the most likely case
 720         * (i.e. one large iovec entry), but could be improved to pass sets
 721         * of small iovec entries into send_packet().
 722         */
 723
 724        curr_iov = m->msg_iov;
 725        curr_iovlen = m->msg_iovlen;
 726        my_msg.msg_iov = &my_iov;
 727        my_msg.msg_iovlen = 1;
 728        my_msg.msg_flags = m->msg_flags;
 729        my_msg.msg_name = NULL;
 730        bytes_sent = 0;
 731
 732        hdr_size = msg_hdr_sz(&tport->phdr);
 733
 734        while (curr_iovlen--) {
 735                curr_start = curr_iov->iov_base;
 736                curr_left = curr_iov->iov_len;
 737
 738                while (curr_left) {
 739                        bytes_to_send = tport->max_pkt - hdr_size;
 740                        if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
 741                                bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
 742                        if (curr_left < bytes_to_send)
 743                                bytes_to_send = curr_left;
 744                        my_iov.iov_base = curr_start;
 745                        my_iov.iov_len = bytes_to_send;
 746                        if ((res = send_packet(NULL, sock, &my_msg, 0)) < 0) {
 747                                if (bytes_sent)
 748                                        res = bytes_sent;
 749                                goto exit;
 750                        }
 751                        curr_left -= bytes_to_send;
 752                        curr_start += bytes_to_send;
 753                        bytes_sent += bytes_to_send;
 754                }
 755
 756                curr_iov++;
 757        }
 758        res = bytes_sent;
 759exit:
 760        release_sock(sk);
 761        return res;
 762}
 763
 764/**
 765 * auto_connect - complete connection setup to a remote port
 766 * @sock: socket structure
 767 * @msg: peer's response message
 768 *
 769 * Returns 0 on success, errno otherwise
 770 */
 771
 772static int auto_connect(struct socket *sock, struct tipc_msg *msg)
 773{
 774        struct tipc_sock *tsock = tipc_sk(sock->sk);
 775
 776        if (msg_errcode(msg)) {
 777                sock->state = SS_DISCONNECTING;
 778                return -ECONNREFUSED;
 779        }
 780
 781        tsock->peer_name.ref = msg_origport(msg);
 782        tsock->peer_name.node = msg_orignode(msg);
 783        tipc_connect2port(tsock->p->ref, &tsock->peer_name);
 784        tipc_set_portimportance(tsock->p->ref, msg_importance(msg));
 785        sock->state = SS_CONNECTED;
 786        return 0;
 787}
 788
 789/**
 790 * set_orig_addr - capture sender's address for received message
 791 * @m: descriptor for message info
 792 * @msg: received message header
 793 *
 794 * Note: Address is not captured if not requested by receiver.
 795 */
 796
 797static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
 798{
 799        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)m->msg_name;
 800
 801        if (addr) {
 802                addr->family = AF_TIPC;
 803                addr->addrtype = TIPC_ADDR_ID;
 804                addr->addr.id.ref = msg_origport(msg);
 805                addr->addr.id.node = msg_orignode(msg);
 806                addr->addr.name.domain = 0;     /* could leave uninitialized */
 807                addr->scope = 0;                /* could leave uninitialized */
 808                m->msg_namelen = sizeof(struct sockaddr_tipc);
 809        }
 810}
 811
 812/**
 813 * anc_data_recv - optionally capture ancillary data for received message
 814 * @m: descriptor for message info
 815 * @msg: received message header
 816 * @tport: TIPC port associated with message
 817 *
 818 * Note: Ancillary data is not captured if not requested by receiver.
 819 *
 820 * Returns 0 if successful, otherwise errno
 821 */
 822
 823static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 824                                struct tipc_port *tport)
 825{
 826        u32 anc_data[3];
 827        u32 err;
 828        u32 dest_type;
 829        int has_name;
 830        int res;
 831
 832        if (likely(m->msg_controllen == 0))
 833                return 0;
 834
 835        /* Optionally capture errored message object(s) */
 836
 837        err = msg ? msg_errcode(msg) : 0;
 838        if (unlikely(err)) {
 839                anc_data[0] = err;
 840                anc_data[1] = msg_data_sz(msg);
 841                if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data)))
 842                        return res;
 843                if (anc_data[1] &&
 844                    (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
 845                                    msg_data(msg))))
 846                        return res;
 847        }
 848
 849        /* Optionally capture message destination object */
 850
 851        dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
 852        switch (dest_type) {
 853        case TIPC_NAMED_MSG:
 854                has_name = 1;
 855                anc_data[0] = msg_nametype(msg);
 856                anc_data[1] = msg_namelower(msg);
 857                anc_data[2] = msg_namelower(msg);
 858                break;
 859        case TIPC_MCAST_MSG:
 860                has_name = 1;
 861                anc_data[0] = msg_nametype(msg);
 862                anc_data[1] = msg_namelower(msg);
 863                anc_data[2] = msg_nameupper(msg);
 864                break;
 865        case TIPC_CONN_MSG:
 866                has_name = (tport->conn_type != 0);
 867                anc_data[0] = tport->conn_type;
 868                anc_data[1] = tport->conn_instance;
 869                anc_data[2] = tport->conn_instance;
 870                break;
 871        default:
 872                has_name = 0;
 873        }
 874        if (has_name &&
 875            (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data)))
 876                return res;
 877
 878        return 0;
 879}
 880
 881/**
 882 * recv_msg - receive packet-oriented message
 883 * @iocb: (unused)
 884 * @m: descriptor for message info
 885 * @buf_len: total size of user buffer area
 886 * @flags: receive flags
 887 *
 888 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages.
 889 * If the complete message doesn't fit in user area, truncate it.
 890 *
 891 * Returns size of returned message data, errno otherwise
 892 */
 893
 894static int recv_msg(struct kiocb *iocb, struct socket *sock,
 895                    struct msghdr *m, size_t buf_len, int flags)
 896{
 897        struct sock *sk = sock->sk;
 898        struct tipc_port *tport = tipc_sk_port(sk);
 899        struct sk_buff *buf;
 900        struct tipc_msg *msg;
 901        unsigned int sz;
 902        u32 err;
 903        int res;
 904
 905        /* Catch invalid receive requests */
 906
 907        if (m->msg_iovlen != 1)
 908                return -EOPNOTSUPP;   /* Don't do multiple iovec entries yet */
 909
 910        if (unlikely(!buf_len))
 911                return -EINVAL;
 912
 913        lock_sock(sk);
 914
 915        if (unlikely(sock->state == SS_UNCONNECTED)) {
 916                res = -ENOTCONN;
 917                goto exit;
 918        }
 919
 920restart:
 921
 922        /* Look for a message in receive queue; wait if necessary */
 923
 924        while (skb_queue_empty(&sk->sk_receive_queue)) {
 925                if (sock->state == SS_DISCONNECTING) {
 926                        res = -ENOTCONN;
 927                        goto exit;
 928                }
 929                if (flags & MSG_DONTWAIT) {
 930                        res = -EWOULDBLOCK;
 931                        goto exit;
 932                }
 933                release_sock(sk);
 934                res = wait_event_interruptible(*sk_sleep(sk),
 935                        (!skb_queue_empty(&sk->sk_receive_queue) ||
 936                         (sock->state == SS_DISCONNECTING)));
 937                lock_sock(sk);
 938                if (res)
 939                        goto exit;
 940        }
 941
 942        /* Look at first message in receive queue */
 943
 944        buf = skb_peek(&sk->sk_receive_queue);
 945        msg = buf_msg(buf);
 946        sz = msg_data_sz(msg);
 947        err = msg_errcode(msg);
 948
 949        /* Complete connection setup for an implied connect */
 950
 951        if (unlikely(sock->state == SS_CONNECTING)) {
 952                res = auto_connect(sock, msg);
 953                if (res)
 954                        goto exit;
 955        }
 956
 957        /* Discard an empty non-errored message & try again */
 958
 959        if ((!sz) && (!err)) {
 960                advance_rx_queue(sk);
 961                goto restart;
 962        }
 963
 964        /* Capture sender's address (optional) */
 965
 966        set_orig_addr(m, msg);
 967
 968        /* Capture ancillary data (optional) */
 969
 970        res = anc_data_recv(m, msg, tport);
 971        if (res)
 972                goto exit;
 973
 974        /* Capture message data (if valid) & compute return value (always) */
 975
 976        if (!err) {
 977                if (unlikely(buf_len < sz)) {
 978                        sz = buf_len;
 979                        m->msg_flags |= MSG_TRUNC;
 980                }
 981                if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg),
 982                                          sz))) {
 983                        res = -EFAULT;
 984                        goto exit;
 985                }
 986                res = sz;
 987        } else {
 988                if ((sock->state == SS_READY) ||
 989                    ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
 990                        res = 0;
 991                else
 992                        res = -ECONNRESET;
 993        }
 994
 995        /* Consume received message (optional) */
 996
 997        if (likely(!(flags & MSG_PEEK))) {
 998                if ((sock->state != SS_READY) &&
 999                    (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1000                        tipc_acknowledge(tport->ref, tport->conn_unacked);
1001                advance_rx_queue(sk);
1002        }
1003exit:
1004        release_sock(sk);
1005        return res;
1006}
1007
1008/**
1009 * recv_stream - receive stream-oriented data
1010 * @iocb: (unused)
1011 * @m: descriptor for message info
1012 * @buf_len: total size of user buffer area
1013 * @flags: receive flags
1014 *
1015 * Used for SOCK_STREAM messages only.  If not enough data is available
1016 * will optionally wait for more; never truncates data.
1017 *
1018 * Returns size of returned message data, errno otherwise
1019 */
1020
1021static int recv_stream(struct kiocb *iocb, struct socket *sock,
1022                       struct msghdr *m, size_t buf_len, int flags)
1023{
1024        struct sock *sk = sock->sk;
1025        struct tipc_port *tport = tipc_sk_port(sk);
1026        struct sk_buff *buf;
1027        struct tipc_msg *msg;
1028        unsigned int sz;
1029        int sz_to_copy;
1030        int sz_copied = 0;
1031        int needed;
1032        char __user *crs = m->msg_iov->iov_base;
1033        unsigned char *buf_crs;
1034        u32 err;
1035        int res = 0;
1036
1037        /* Catch invalid receive attempts */
1038
1039        if (m->msg_iovlen != 1)
1040                return -EOPNOTSUPP;   /* Don't do multiple iovec entries yet */
1041
1042        if (unlikely(!buf_len))
1043                return -EINVAL;
1044
1045        lock_sock(sk);
1046
1047        if (unlikely((sock->state == SS_UNCONNECTED) ||
1048                     (sock->state == SS_CONNECTING))) {
1049                res = -ENOTCONN;
1050                goto exit;
1051        }
1052
1053restart:
1054
1055        /* Look for a message in receive queue; wait if necessary */
1056
1057        while (skb_queue_empty(&sk->sk_receive_queue)) {
1058                if (sock->state == SS_DISCONNECTING) {
1059                        res = -ENOTCONN;
1060                        goto exit;
1061                }
1062                if (flags & MSG_DONTWAIT) {
1063                        res = -EWOULDBLOCK;
1064                        goto exit;
1065                }
1066                release_sock(sk);
1067                res = wait_event_interruptible(*sk_sleep(sk),
1068                        (!skb_queue_empty(&sk->sk_receive_queue) ||
1069                         (sock->state == SS_DISCONNECTING)));
1070                lock_sock(sk);
1071                if (res)
1072                        goto exit;
1073        }
1074
1075        /* Look at first message in receive queue */
1076
1077        buf = skb_peek(&sk->sk_receive_queue);
1078        msg = buf_msg(buf);
1079        sz = msg_data_sz(msg);
1080        err = msg_errcode(msg);
1081
1082        /* Discard an empty non-errored message & try again */
1083
1084        if ((!sz) && (!err)) {
1085                advance_rx_queue(sk);
1086                goto restart;
1087        }
1088
1089        /* Optionally capture sender's address & ancillary data of first msg */
1090
1091        if (sz_copied == 0) {
1092                set_orig_addr(m, msg);
1093                res = anc_data_recv(m, msg, tport);
1094                if (res)
1095                        goto exit;
1096        }
1097
1098        /* Capture message data (if valid) & compute return value (always) */
1099
1100        if (!err) {
1101                buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
1102                sz = (unsigned char *)msg + msg_size(msg) - buf_crs;
1103
1104                needed = (buf_len - sz_copied);
1105                sz_to_copy = (sz <= needed) ? sz : needed;
1106                if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) {
1107                        res = -EFAULT;
1108                        goto exit;
1109                }
1110                sz_copied += sz_to_copy;
1111
1112                if (sz_to_copy < sz) {
1113                        if (!(flags & MSG_PEEK))
1114                                TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy;
1115                        goto exit;
1116                }
1117
1118                crs += sz_to_copy;
1119        } else {
1120                if (sz_copied != 0)
1121                        goto exit; /* can't add error msg to valid data */
1122
1123                if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)
1124                        res = 0;
1125                else
1126                        res = -ECONNRESET;
1127        }
1128
1129        /* Consume received message (optional) */
1130
1131        if (likely(!(flags & MSG_PEEK))) {
1132                if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1133                        tipc_acknowledge(tport->ref, tport->conn_unacked);
1134                advance_rx_queue(sk);
1135        }
1136
1137        /* Loop around if more data is required */
1138
1139        if ((sz_copied < buf_len) &&    /* didn't get all requested data */
1140            (!skb_queue_empty(&sk->sk_receive_queue) ||
1141             (flags & MSG_WAITALL)) &&  /* and more is ready or required */
1142            (!(flags & MSG_PEEK)) &&    /* and aren't just peeking at data */
1143            (!err))                     /* and haven't reached a FIN */
1144                goto restart;
1145
1146exit:
1147        release_sock(sk);
1148        return sz_copied ? sz_copied : res;
1149}
1150
1151/**
1152 * rx_queue_full - determine if receive queue can accept another message
1153 * @msg: message to be added to queue
1154 * @queue_size: current size of queue
1155 * @base: nominal maximum size of queue
1156 *
1157 * Returns 1 if queue is unable to accept message, 0 otherwise
1158 */
1159
1160static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1161{
1162        u32 threshold;
1163        u32 imp = msg_importance(msg);
1164
1165        if (imp == TIPC_LOW_IMPORTANCE)
1166                threshold = base;
1167        else if (imp == TIPC_MEDIUM_IMPORTANCE)
1168                threshold = base * 2;
1169        else if (imp == TIPC_HIGH_IMPORTANCE)
1170                threshold = base * 100;
1171        else
1172                return 0;
1173
1174        if (msg_connected(msg))
1175                threshold *= 4;
1176
1177        return (queue_size >= threshold);
1178}
1179
1180/**
1181 * filter_rcv - validate incoming message
1182 * @sk: socket
1183 * @buf: message
1184 *
1185 * Enqueues message on receive queue if acceptable; optionally handles
1186 * disconnect indication for a connected socket.
1187 *
1188 * Called with socket lock already taken; port lock may also be taken.
1189 *
1190 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1191 */
1192
1193static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1194{
1195        struct socket *sock = sk->sk_socket;
1196        struct tipc_msg *msg = buf_msg(buf);
1197        u32 recv_q_len;
1198
1199        /* Reject message if it is wrong sort of message for socket */
1200
1201        /*
1202         * WOULD IT BE BETTER TO JUST DISCARD THESE MESSAGES INSTEAD?
1203         * "NO PORT" ISN'T REALLY THE RIGHT ERROR CODE, AND THERE MAY
1204         * BE SECURITY IMPLICATIONS INHERENT IN REJECTING INVALID TRAFFIC
1205         */
1206
1207        if (sock->state == SS_READY) {
1208                if (msg_connected(msg)) {
1209                        msg_dbg(msg, "dispatch filter 1\n");
1210                        return TIPC_ERR_NO_PORT;
1211                }
1212        } else {
1213                if (msg_mcast(msg)) {
1214                        msg_dbg(msg, "dispatch filter 2\n");
1215                        return TIPC_ERR_NO_PORT;
1216                }
1217                if (sock->state == SS_CONNECTED) {
1218                        if (!msg_connected(msg)) {
1219                                msg_dbg(msg, "dispatch filter 3\n");
1220                                return TIPC_ERR_NO_PORT;
1221                        }
1222                }
1223                else if (sock->state == SS_CONNECTING) {
1224                        if (!msg_connected(msg) && (msg_errcode(msg) == 0)) {
1225                                msg_dbg(msg, "dispatch filter 4\n");
1226                                return TIPC_ERR_NO_PORT;
1227                        }
1228                }
1229                else if (sock->state == SS_LISTENING) {
1230                        if (msg_connected(msg) || msg_errcode(msg)) {
1231                                msg_dbg(msg, "dispatch filter 5\n");
1232                                return TIPC_ERR_NO_PORT;
1233                        }
1234                }
1235                else if (sock->state == SS_DISCONNECTING) {
1236                        msg_dbg(msg, "dispatch filter 6\n");
1237                        return TIPC_ERR_NO_PORT;
1238                }
1239                else /* (sock->state == SS_UNCONNECTED) */ {
1240                        if (msg_connected(msg) || msg_errcode(msg)) {
1241                                msg_dbg(msg, "dispatch filter 7\n");
1242                                return TIPC_ERR_NO_PORT;
1243                        }
1244                }
1245        }
1246
1247        /* Reject message if there isn't room to queue it */
1248
1249        recv_q_len = (u32)atomic_read(&tipc_queue_size);
1250        if (unlikely(recv_q_len >= OVERLOAD_LIMIT_BASE)) {
1251                if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE))
1252                        return TIPC_ERR_OVERLOAD;
1253        }
1254        recv_q_len = skb_queue_len(&sk->sk_receive_queue);
1255        if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
1256                if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
1257                        return TIPC_ERR_OVERLOAD;
1258        }
1259
1260        /* Enqueue message (finally!) */
1261
1262        msg_dbg(msg, "<DISP<: ");
1263        TIPC_SKB_CB(buf)->handle = msg_data(msg);
1264        atomic_inc(&tipc_queue_size);
1265        __skb_queue_tail(&sk->sk_receive_queue, buf);
1266
1267        /* Initiate connection termination for an incoming 'FIN' */
1268
1269        if (unlikely(msg_errcode(msg) && (sock->state == SS_CONNECTED))) {
1270                sock->state = SS_DISCONNECTING;
1271                tipc_disconnect_port(tipc_sk_port(sk));
1272        }
1273
1274        if (waitqueue_active(sk_sleep(sk)))
1275                wake_up_interruptible(sk_sleep(sk));
1276        return TIPC_OK;
1277}
1278
1279/**
1280 * backlog_rcv - handle incoming message from backlog queue
1281 * @sk: socket
1282 * @buf: message
1283 *
1284 * Caller must hold socket lock, but not port lock.
1285 *
1286 * Returns 0
1287 */
1288
1289static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
1290{
1291        u32 res;
1292
1293        res = filter_rcv(sk, buf);
1294        if (res)
1295                tipc_reject_msg(buf, res);
1296        return 0;
1297}
1298
1299/**
1300 * dispatch - handle incoming message
1301 * @tport: TIPC port that received message
1302 * @buf: message
1303 *
1304 * Called with port lock already taken.
1305 *
1306 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1307 */
1308
1309static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1310{
1311        struct sock *sk = (struct sock *)tport->usr_handle;
1312        u32 res;
1313
1314        /*
1315         * Process message if socket is unlocked; otherwise add to backlog queue
1316         *
1317         * This code is based on sk_receive_skb(), but must be distinct from it
1318         * since a TIPC-specific filter/reject mechanism is utilized
1319         */
1320
1321        bh_lock_sock(sk);
1322        if (!sock_owned_by_user(sk)) {
1323                res = filter_rcv(sk, buf);
1324        } else {
1325                if (sk_add_backlog(sk, buf))
1326                        res = TIPC_ERR_OVERLOAD;
1327                else
1328                        res = TIPC_OK;
1329        }
1330        bh_unlock_sock(sk);
1331
1332        return res;
1333}
1334
1335/**
1336 * wakeupdispatch - wake up port after congestion
1337 * @tport: port to wakeup
1338 *
1339 * Called with port lock already taken.
1340 */
1341
1342static void wakeupdispatch(struct tipc_port *tport)
1343{
1344        struct sock *sk = (struct sock *)tport->usr_handle;
1345
1346        if (waitqueue_active(sk_sleep(sk)))
1347                wake_up_interruptible(sk_sleep(sk));
1348}
1349
1350/**
1351 * connect - establish a connection to another TIPC port
1352 * @sock: socket structure
1353 * @dest: socket address for destination port
1354 * @destlen: size of socket address data structure
1355 * @flags: file-related flags associated with socket
1356 *
1357 * Returns 0 on success, errno otherwise
1358 */
1359
1360static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1361                   int flags)
1362{
1363        struct sock *sk = sock->sk;
1364        struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
1365        struct msghdr m = {NULL,};
1366        struct sk_buff *buf;
1367        struct tipc_msg *msg;
1368        int res;
1369
1370        lock_sock(sk);
1371
1372        /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
1373
1374        if (sock->state == SS_READY) {
1375                res = -EOPNOTSUPP;
1376                goto exit;
1377        }
1378
1379        /* For now, TIPC does not support the non-blocking form of connect() */
1380
1381        if (flags & O_NONBLOCK) {
1382                res = -EWOULDBLOCK;
1383                goto exit;
1384        }
1385
1386        /* Issue Posix-compliant error code if socket is in the wrong state */
1387
1388        if (sock->state == SS_LISTENING) {
1389                res = -EOPNOTSUPP;
1390                goto exit;
1391        }
1392        if (sock->state == SS_CONNECTING) {
1393                res = -EALREADY;
1394                goto exit;
1395        }
1396        if (sock->state != SS_UNCONNECTED) {
1397                res = -EISCONN;
1398                goto exit;
1399        }
1400
1401        /*
1402         * Reject connection attempt using multicast address
1403         *
1404         * Note: send_msg() validates the rest of the address fields,
1405         *       so there's no need to do it here
1406         */
1407
1408        if (dst->addrtype == TIPC_ADDR_MCAST) {
1409                res = -EINVAL;
1410                goto exit;
1411        }
1412
1413        /* Reject any messages already in receive queue (very unlikely) */
1414
1415        reject_rx_queue(sk);
1416
1417        /* Send a 'SYN-' to destination */
1418
1419        m.msg_name = dest;
1420        m.msg_namelen = destlen;
1421        res = send_msg(NULL, sock, &m, 0);
1422        if (res < 0) {
1423                goto exit;
1424        }
1425
1426        /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427
1428        release_sock(sk);
1429        res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430                        (!skb_queue_empty(&sk->sk_receive_queue) ||
1431                        (sock->state != SS_CONNECTING)),
1432                        sk->sk_rcvtimeo);
1433        lock_sock(sk);
1434
1435        if (res > 0) {
1436                buf = skb_peek(&sk->sk_receive_queue);
1437                if (buf != NULL) {
1438                        msg = buf_msg(buf);
1439                        res = auto_connect(sock, msg);
1440                        if (!res) {
1441                                if (!msg_data_sz(msg))
1442                                        advance_rx_queue(sk);
1443                        }
1444                } else {
1445                        if (sock->state == SS_CONNECTED) {
1446                                res = -EISCONN;
1447                        } else {
1448                                res = -ECONNREFUSED;
1449                        }
1450                }
1451        } else {
1452                if (res == 0)
1453                        res = -ETIMEDOUT;
1454                else
1455                        ; /* leave "res" unchanged */
1456                sock->state = SS_DISCONNECTING;
1457        }
1458
1459exit:
1460        release_sock(sk);
1461        return res;
1462}
1463
1464/**
1465 * listen - allow socket to listen for incoming connections
1466 * @sock: socket structure
1467 * @len: (unused)
1468 *
1469 * Returns 0 on success, errno otherwise
1470 */
1471
1472static int listen(struct socket *sock, int len)
1473{
1474        struct sock *sk = sock->sk;
1475        int res;
1476
1477        lock_sock(sk);
1478
1479        if (sock->state == SS_READY)
1480                res = -EOPNOTSUPP;
1481        else if (sock->state != SS_UNCONNECTED)
1482                res = -EINVAL;
1483        else {
1484                sock->state = SS_LISTENING;
1485                res = 0;
1486        }
1487
1488        release_sock(sk);
1489        return res;
1490}
1491
1492/**
1493 * accept - wait for connection request
1494 * @sock: listening socket
1495 * @newsock: new socket that is to be connected
1496 * @flags: file-related flags associated with socket
1497 *
1498 * Returns 0 on success, errno otherwise
1499 */
1500
1501static int accept(struct socket *sock, struct socket *new_sock, int flags)
1502{
1503        struct sock *sk = sock->sk;
1504        struct sk_buff *buf;
1505        int res;
1506
1507        lock_sock(sk);
1508
1509        if (sock->state == SS_READY) {
1510                res = -EOPNOTSUPP;
1511                goto exit;
1512        }
1513        if (sock->state != SS_LISTENING) {
1514                res = -EINVAL;
1515                goto exit;
1516        }
1517
1518        while (skb_queue_empty(&sk->sk_receive_queue)) {
1519                if (flags & O_NONBLOCK) {
1520                        res = -EWOULDBLOCK;
1521                        goto exit;
1522                }
1523                release_sock(sk);
1524                res = wait_event_interruptible(*sk_sleep(sk),
1525                                (!skb_queue_empty(&sk->sk_receive_queue)));
1526                lock_sock(sk);
1527                if (res)
1528                        goto exit;
1529        }
1530
1531        buf = skb_peek(&sk->sk_receive_queue);
1532
1533        res = tipc_create(sock_net(sock->sk), new_sock, 0, 0);
1534        if (!res) {
1535                struct sock *new_sk = new_sock->sk;
1536                struct tipc_sock *new_tsock = tipc_sk(new_sk);
1537                struct tipc_port *new_tport = new_tsock->p;
1538                u32 new_ref = new_tport->ref;
1539                struct tipc_msg *msg = buf_msg(buf);
1540
1541                lock_sock(new_sk);
1542
1543                /*
1544                 * Reject any stray messages received by new socket
1545                 * before the socket lock was taken (very, very unlikely)
1546                 */
1547
1548                reject_rx_queue(new_sk);
1549
1550                /* Connect new socket to it's peer */
1551
1552                new_tsock->peer_name.ref = msg_origport(msg);
1553                new_tsock->peer_name.node = msg_orignode(msg);
1554                tipc_connect2port(new_ref, &new_tsock->peer_name);
1555                new_sock->state = SS_CONNECTED;
1556
1557                tipc_set_portimportance(new_ref, msg_importance(msg));
1558                if (msg_named(msg)) {
1559                        new_tport->conn_type = msg_nametype(msg);
1560                        new_tport->conn_instance = msg_nameinst(msg);
1561                }
1562
1563                /*
1564                 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
1565                 * Respond to 'SYN+' by queuing it on new socket.
1566                 */
1567
1568                msg_dbg(msg,"<ACC<: ");
1569                if (!msg_data_sz(msg)) {
1570                        struct msghdr m = {NULL,};
1571
1572                        advance_rx_queue(sk);
1573                        send_packet(NULL, new_sock, &m, 0);
1574                } else {
1575                        __skb_dequeue(&sk->sk_receive_queue);
1576                        __skb_queue_head(&new_sk->sk_receive_queue, buf);
1577                }
1578                release_sock(new_sk);
1579        }
1580exit:
1581        release_sock(sk);
1582        return res;
1583}
1584
1585/**
1586 * shutdown - shutdown socket connection
1587 * @sock: socket structure
1588 * @how: direction to close (must be SHUT_RDWR)
1589 *
1590 * Terminates connection (if necessary), then purges socket's receive queue.
1591 *
1592 * Returns 0 on success, errno otherwise
1593 */
1594
1595static int shutdown(struct socket *sock, int how)
1596{
1597        struct sock *sk = sock->sk;
1598        struct tipc_port *tport = tipc_sk_port(sk);
1599        struct sk_buff *buf;
1600        int res;
1601
1602        if (how != SHUT_RDWR)
1603                return -EINVAL;
1604
1605        lock_sock(sk);
1606
1607        switch (sock->state) {
1608        case SS_CONNECTING:
1609        case SS_CONNECTED:
1610
1611                /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
1612restart:
1613                buf = __skb_dequeue(&sk->sk_receive_queue);
1614                if (buf) {
1615                        atomic_dec(&tipc_queue_size);
1616                        if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) {
1617                                buf_discard(buf);
1618                                goto restart;
1619                        }
1620                        tipc_disconnect(tport->ref);
1621                        tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
1622                } else {
1623                        tipc_shutdown(tport->ref);
1624                }
1625
1626                sock->state = SS_DISCONNECTING;
1627
1628                /* fall through */
1629
1630        case SS_DISCONNECTING:
1631
1632                /* Discard any unreceived messages; wake up sleeping tasks */
1633
1634                discard_rx_queue(sk);
1635                if (waitqueue_active(sk_sleep(sk)))
1636                        wake_up_interruptible(sk_sleep(sk));
1637                res = 0;
1638                break;
1639
1640        default:
1641                res = -ENOTCONN;
1642        }
1643
1644        release_sock(sk);
1645        return res;
1646}
1647
1648/**
1649 * setsockopt - set socket option
1650 * @sock: socket structure
1651 * @lvl: option level
1652 * @opt: option identifier
1653 * @ov: pointer to new option value
1654 * @ol: length of option value
1655 *
1656 * For stream sockets only, accepts and ignores all IPPROTO_TCP options
1657 * (to ease compatibility).
1658 *
1659 * Returns 0 on success, errno otherwise
1660 */
1661
1662static int setsockopt(struct socket *sock,
1663                      int lvl, int opt, char __user *ov, unsigned int ol)
1664{
1665        struct sock *sk = sock->sk;
1666        struct tipc_port *tport = tipc_sk_port(sk);
1667        u32 value;
1668        int res;
1669
1670        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1671                return 0;
1672        if (lvl != SOL_TIPC)
1673                return -ENOPROTOOPT;
1674        if (ol < sizeof(value))
1675                return -EINVAL;
1676        if ((res = get_user(value, (u32 __user *)ov)))
1677                return res;
1678
1679        lock_sock(sk);
1680
1681        switch (opt) {
1682        case TIPC_IMPORTANCE:
1683                res = tipc_set_portimportance(tport->ref, value);
1684                break;
1685        case TIPC_SRC_DROPPABLE:
1686                if (sock->type != SOCK_STREAM)
1687                        res = tipc_set_portunreliable(tport->ref, value);
1688                else
1689                        res = -ENOPROTOOPT;
1690                break;
1691        case TIPC_DEST_DROPPABLE:
1692                res = tipc_set_portunreturnable(tport->ref, value);
1693                break;
1694        case TIPC_CONN_TIMEOUT:
1695                sk->sk_rcvtimeo = msecs_to_jiffies(value);
1696                /* no need to set "res", since already 0 at this point */
1697                break;
1698        default:
1699                res = -EINVAL;
1700        }
1701
1702        release_sock(sk);
1703
1704        return res;
1705}
1706
1707/**
1708 * getsockopt - get socket option
1709 * @sock: socket structure
1710 * @lvl: option level
1711 * @opt: option identifier
1712 * @ov: receptacle for option value
1713 * @ol: receptacle for length of option value
1714 *
1715 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options
1716 * (to ease compatibility).
1717 *
1718 * Returns 0 on success, errno otherwise
1719 */
1720
1721static int getsockopt(struct socket *sock,
1722                      int lvl, int opt, char __user *ov, int __user *ol)
1723{
1724        struct sock *sk = sock->sk;
1725        struct tipc_port *tport = tipc_sk_port(sk);
1726        int len;
1727        u32 value;
1728        int res;
1729
1730        if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM))
1731                return put_user(0, ol);
1732        if (lvl != SOL_TIPC)
1733                return -ENOPROTOOPT;
1734        if ((res = get_user(len, ol)))
1735                return res;
1736
1737        lock_sock(sk);
1738
1739        switch (opt) {
1740        case TIPC_IMPORTANCE:
1741                res = tipc_portimportance(tport->ref, &value);
1742                break;
1743        case TIPC_SRC_DROPPABLE:
1744                res = tipc_portunreliable(tport->ref, &value);
1745                break;
1746        case TIPC_DEST_DROPPABLE:
1747                res = tipc_portunreturnable(tport->ref, &value);
1748                break;
1749        case TIPC_CONN_TIMEOUT:
1750                value = jiffies_to_msecs(sk->sk_rcvtimeo);
1751                /* no need to set "res", since already 0 at this point */
1752                break;
1753         case TIPC_NODE_RECVQ_DEPTH:
1754                value = (u32)atomic_read(&tipc_queue_size);
1755                break;
1756         case TIPC_SOCK_RECVQ_DEPTH:
1757                value = skb_queue_len(&sk->sk_receive_queue);
1758                break;
1759        default:
1760                res = -EINVAL;
1761        }
1762
1763        release_sock(sk);
1764
1765        if (res) {
1766                /* "get" failed */
1767        }
1768        else if (len < sizeof(value)) {
1769                res = -EINVAL;
1770        }
1771        else if (copy_to_user(ov, &value, sizeof(value))) {
1772                res = -EFAULT;
1773        }
1774        else {
1775                res = put_user(sizeof(value), ol);
1776        }
1777
1778        return res;
1779}
1780
1781/**
1782 * Protocol switches for the various types of TIPC sockets
1783 */
1784
1785static const struct proto_ops msg_ops = {
1786        .owner          = THIS_MODULE,
1787        .family         = AF_TIPC,
1788        .release        = release,
1789        .bind           = bind,
1790        .connect        = connect,
1791        .socketpair     = sock_no_socketpair,
1792        .accept         = accept,
1793        .getname        = get_name,
1794        .poll           = poll,
1795        .ioctl          = sock_no_ioctl,
1796        .listen         = listen,
1797        .shutdown       = shutdown,
1798        .setsockopt     = setsockopt,
1799        .getsockopt     = getsockopt,
1800        .sendmsg        = send_msg,
1801        .recvmsg        = recv_msg,
1802        .mmap           = sock_no_mmap,
1803        .sendpage       = sock_no_sendpage
1804};
1805
1806static const struct proto_ops packet_ops = {
1807        .owner          = THIS_MODULE,
1808        .family         = AF_TIPC,
1809        .release        = release,
1810        .bind           = bind,
1811        .connect        = connect,
1812        .socketpair     = sock_no_socketpair,
1813        .accept         = accept,
1814        .getname        = get_name,
1815        .poll           = poll,
1816        .ioctl          = sock_no_ioctl,
1817        .listen         = listen,
1818        .shutdown       = shutdown,
1819        .setsockopt     = setsockopt,
1820        .getsockopt     = getsockopt,
1821        .sendmsg        = send_packet,
1822        .recvmsg        = recv_msg,
1823        .mmap           = sock_no_mmap,
1824        .sendpage       = sock_no_sendpage
1825};
1826
1827static const struct proto_ops stream_ops = {
1828        .owner          = THIS_MODULE,
1829        .family         = AF_TIPC,
1830        .release        = release,
1831        .bind           = bind,
1832        .connect        = connect,
1833        .socketpair     = sock_no_socketpair,
1834        .accept         = accept,
1835        .getname        = get_name,
1836        .poll           = poll,
1837        .ioctl          = sock_no_ioctl,
1838        .listen         = listen,
1839        .shutdown       = shutdown,
1840        .setsockopt     = setsockopt,
1841        .getsockopt     = getsockopt,
1842        .sendmsg        = send_stream,
1843        .recvmsg        = recv_stream,
1844        .mmap           = sock_no_mmap,
1845        .sendpage       = sock_no_sendpage
1846};
1847
1848static const struct net_proto_family tipc_family_ops = {
1849        .owner          = THIS_MODULE,
1850        .family         = AF_TIPC,
1851        .create         = tipc_create
1852};
1853
1854static struct proto tipc_proto = {
1855        .name           = "TIPC",
1856        .owner          = THIS_MODULE,
1857        .obj_size       = sizeof(struct tipc_sock)
1858};
1859
1860/**
1861 * tipc_socket_init - initialize TIPC socket interface
1862 *
1863 * Returns 0 on success, errno otherwise
1864 */
1865int tipc_socket_init(void)
1866{
1867        int res;
1868
1869        res = proto_register(&tipc_proto, 1);
1870        if (res) {
1871                err("Failed to register TIPC protocol type\n");
1872                goto out;
1873        }
1874
1875        res = sock_register(&tipc_family_ops);
1876        if (res) {
1877                err("Failed to register TIPC socket type\n");
1878                proto_unregister(&tipc_proto);
1879                goto out;
1880        }
1881
1882        sockets_enabled = 1;
1883 out:
1884        return res;
1885}
1886
1887/**
1888 * tipc_socket_stop - stop TIPC socket interface
1889 */
1890
1891void tipc_socket_stop(void)
1892{
1893        if (!sockets_enabled)
1894                return;
1895
1896        sockets_enabled = 0;
1897        sock_unregister(tipc_family_ops.family);
1898        proto_unregister(&tipc_proto);
1899}
1900
1901