linux/net/dccp/proto.c
<<
>>
Prefs
   1/*
   2 *  net/dccp/proto.c
   3 *
   4 *  An implementation of the DCCP protocol
   5 *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   6 *
   7 *      This program is free software; you can redistribute it and/or modify it
   8 *      under the terms of the GNU General Public License version 2 as
   9 *      published by the Free Software Foundation.
  10 */
  11
  12#include <linux/dccp.h>
  13#include <linux/module.h>
  14#include <linux/types.h>
  15#include <linux/sched.h>
  16#include <linux/kernel.h>
  17#include <linux/skbuff.h>
  18#include <linux/netdevice.h>
  19#include <linux/in.h>
  20#include <linux/if_arp.h>
  21#include <linux/init.h>
  22#include <linux/random.h>
  23#include <net/checksum.h>
  24
  25#include <net/inet_sock.h>
  26#include <net/sock.h>
  27#include <net/xfrm.h>
  28
  29#include <asm/ioctls.h>
  30#include <asm/semaphore.h>
  31#include <linux/spinlock.h>
  32#include <linux/timer.h>
  33#include <linux/delay.h>
  34#include <linux/poll.h>
  35
  36#include "ccid.h"
  37#include "dccp.h"
  38#include "feat.h"
  39
  40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
  41
  42EXPORT_SYMBOL_GPL(dccp_statistics);
  43
  44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
  45
  46EXPORT_SYMBOL_GPL(dccp_orphan_count);
  47
  48struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
  49        .lhash_lock     = RW_LOCK_UNLOCKED,
  50        .lhash_users    = ATOMIC_INIT(0),
  51        .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
  52};
  53
  54EXPORT_SYMBOL_GPL(dccp_hashinfo);
  55
  56/* the maximum queue length for tx in packets. 0 is no limit */
  57int sysctl_dccp_tx_qlen __read_mostly = 5;
  58
  59void dccp_set_state(struct sock *sk, const int state)
  60{
  61        const int oldstate = sk->sk_state;
  62
  63        dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
  64                      dccp_state_name(oldstate), dccp_state_name(state));
  65        WARN_ON(state == oldstate);
  66
  67        switch (state) {
  68        case DCCP_OPEN:
  69                if (oldstate != DCCP_OPEN)
  70                        DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
  71                break;
  72
  73        case DCCP_CLOSED:
  74                if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
  75                    oldstate == DCCP_CLOSING)
  76                        DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
  77
  78                sk->sk_prot->unhash(sk);
  79                if (inet_csk(sk)->icsk_bind_hash != NULL &&
  80                    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
  81                        inet_put_port(sk);
  82                /* fall through */
  83        default:
  84                if (oldstate == DCCP_OPEN)
  85                        DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
  86        }
  87
  88        /* Change state AFTER socket is unhashed to avoid closed
  89         * socket sitting in hash tables.
  90         */
  91        sk->sk_state = state;
  92}
  93
  94EXPORT_SYMBOL_GPL(dccp_set_state);
  95
  96static void dccp_finish_passive_close(struct sock *sk)
  97{
  98        switch (sk->sk_state) {
  99        case DCCP_PASSIVE_CLOSE:
 100                /* Node (client or server) has received Close packet. */
 101                dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
 102                dccp_set_state(sk, DCCP_CLOSED);
 103                break;
 104        case DCCP_PASSIVE_CLOSEREQ:
 105                /*
 106                 * Client received CloseReq. We set the `active' flag so that
 107                 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
 108                 */
 109                dccp_send_close(sk, 1);
 110                dccp_set_state(sk, DCCP_CLOSING);
 111        }
 112}
 113
 114void dccp_done(struct sock *sk)
 115{
 116        dccp_set_state(sk, DCCP_CLOSED);
 117        dccp_clear_xmit_timers(sk);
 118
 119        sk->sk_shutdown = SHUTDOWN_MASK;
 120
 121        if (!sock_flag(sk, SOCK_DEAD))
 122                sk->sk_state_change(sk);
 123        else
 124                inet_csk_destroy_sock(sk);
 125}
 126
 127EXPORT_SYMBOL_GPL(dccp_done);
 128
 129const char *dccp_packet_name(const int type)
 130{
 131        static const char *dccp_packet_names[] = {
 132                [DCCP_PKT_REQUEST]  = "REQUEST",
 133                [DCCP_PKT_RESPONSE] = "RESPONSE",
 134                [DCCP_PKT_DATA]     = "DATA",
 135                [DCCP_PKT_ACK]      = "ACK",
 136                [DCCP_PKT_DATAACK]  = "DATAACK",
 137                [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
 138                [DCCP_PKT_CLOSE]    = "CLOSE",
 139                [DCCP_PKT_RESET]    = "RESET",
 140                [DCCP_PKT_SYNC]     = "SYNC",
 141                [DCCP_PKT_SYNCACK]  = "SYNCACK",
 142        };
 143
 144        if (type >= DCCP_NR_PKT_TYPES)
 145                return "INVALID";
 146        else
 147                return dccp_packet_names[type];
 148}
 149
 150EXPORT_SYMBOL_GPL(dccp_packet_name);
 151
 152const char *dccp_state_name(const int state)
 153{
 154        static char *dccp_state_names[] = {
 155        [DCCP_OPEN]             = "OPEN",
 156        [DCCP_REQUESTING]       = "REQUESTING",
 157        [DCCP_PARTOPEN]         = "PARTOPEN",
 158        [DCCP_LISTEN]           = "LISTEN",
 159        [DCCP_RESPOND]          = "RESPOND",
 160        [DCCP_CLOSING]          = "CLOSING",
 161        [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
 162        [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
 163        [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
 164        [DCCP_TIME_WAIT]        = "TIME_WAIT",
 165        [DCCP_CLOSED]           = "CLOSED",
 166        };
 167
 168        if (state >= DCCP_MAX_STATES)
 169                return "INVALID STATE!";
 170        else
 171                return dccp_state_names[state];
 172}
 173
 174EXPORT_SYMBOL_GPL(dccp_state_name);
 175
 176int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 177{
 178        struct dccp_sock *dp = dccp_sk(sk);
 179        struct dccp_minisock *dmsk = dccp_msk(sk);
 180        struct inet_connection_sock *icsk = inet_csk(sk);
 181
 182        dccp_minisock_init(&dp->dccps_minisock);
 183
 184        icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
 185        icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
 186        sk->sk_state            = DCCP_CLOSED;
 187        sk->sk_write_space      = dccp_write_space;
 188        icsk->icsk_sync_mss     = dccp_sync_mss;
 189        dp->dccps_mss_cache     = 536;
 190        dp->dccps_rate_last     = jiffies;
 191        dp->dccps_role          = DCCP_ROLE_UNDEFINED;
 192        dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
 193        dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
 194
 195        dccp_init_xmit_timers(sk);
 196
 197        /*
 198         * FIXME: We're hardcoding the CCID, and doing this at this point makes
 199         * the listening (master) sock get CCID control blocks, which is not
 200         * necessary, but for now, to not mess with the test userspace apps,
 201         * lets leave it here, later the real solution is to do this in a
 202         * setsockopt(CCIDs-I-want/accept). -acme
 203         */
 204        if (likely(ctl_sock_initialized)) {
 205                int rc = dccp_feat_init(dmsk);
 206
 207                if (rc)
 208                        return rc;
 209
 210                if (dmsk->dccpms_send_ack_vector) {
 211                        dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
 212                        if (dp->dccps_hc_rx_ackvec == NULL)
 213                                return -ENOMEM;
 214                }
 215                dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
 216                                                      sk, GFP_KERNEL);
 217                dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
 218                                                      sk, GFP_KERNEL);
 219                if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
 220                             dp->dccps_hc_tx_ccid == NULL)) {
 221                        ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
 222                        ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 223                        if (dmsk->dccpms_send_ack_vector) {
 224                                dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 225                                dp->dccps_hc_rx_ackvec = NULL;
 226                        }
 227                        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 228                        return -ENOMEM;
 229                }
 230        } else {
 231                /* control socket doesn't need feat nego */
 232                INIT_LIST_HEAD(&dmsk->dccpms_pending);
 233                INIT_LIST_HEAD(&dmsk->dccpms_conf);
 234        }
 235
 236        return 0;
 237}
 238
 239EXPORT_SYMBOL_GPL(dccp_init_sock);
 240
 241int dccp_destroy_sock(struct sock *sk)
 242{
 243        struct dccp_sock *dp = dccp_sk(sk);
 244        struct dccp_minisock *dmsk = dccp_msk(sk);
 245
 246        /*
 247         * DCCP doesn't use sk_write_queue, just sk_send_head
 248         * for retransmissions
 249         */
 250        if (sk->sk_send_head != NULL) {
 251                kfree_skb(sk->sk_send_head);
 252                sk->sk_send_head = NULL;
 253        }
 254
 255        /* Clean up a referenced DCCP bind bucket. */
 256        if (inet_csk(sk)->icsk_bind_hash != NULL)
 257                inet_put_port(sk);
 258
 259        kfree(dp->dccps_service_list);
 260        dp->dccps_service_list = NULL;
 261
 262        if (dmsk->dccpms_send_ack_vector) {
 263                dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
 264                dp->dccps_hc_rx_ackvec = NULL;
 265        }
 266        ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
 267        ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
 268        dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
 269
 270        /* clean up feature negotiation state */
 271        dccp_feat_clean(dmsk);
 272
 273        return 0;
 274}
 275
 276EXPORT_SYMBOL_GPL(dccp_destroy_sock);
 277
 278static inline int dccp_listen_start(struct sock *sk, int backlog)
 279{
 280        struct dccp_sock *dp = dccp_sk(sk);
 281
 282        dp->dccps_role = DCCP_ROLE_LISTEN;
 283        return inet_csk_listen_start(sk, backlog);
 284}
 285
 286static inline int dccp_need_reset(int state)
 287{
 288        return state != DCCP_CLOSED && state != DCCP_LISTEN &&
 289               state != DCCP_REQUESTING;
 290}
 291
 292int dccp_disconnect(struct sock *sk, int flags)
 293{
 294        struct inet_connection_sock *icsk = inet_csk(sk);
 295        struct inet_sock *inet = inet_sk(sk);
 296        int err = 0;
 297        const int old_state = sk->sk_state;
 298
 299        if (old_state != DCCP_CLOSED)
 300                dccp_set_state(sk, DCCP_CLOSED);
 301
 302        /*
 303         * This corresponds to the ABORT function of RFC793, sec. 3.8
 304         * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
 305         */
 306        if (old_state == DCCP_LISTEN) {
 307                inet_csk_listen_stop(sk);
 308        } else if (dccp_need_reset(old_state)) {
 309                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
 310                sk->sk_err = ECONNRESET;
 311        } else if (old_state == DCCP_REQUESTING)
 312                sk->sk_err = ECONNRESET;
 313
 314        dccp_clear_xmit_timers(sk);
 315        __skb_queue_purge(&sk->sk_receive_queue);
 316        if (sk->sk_send_head != NULL) {
 317                __kfree_skb(sk->sk_send_head);
 318                sk->sk_send_head = NULL;
 319        }
 320
 321        inet->dport = 0;
 322
 323        if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 324                inet_reset_saddr(sk);
 325
 326        sk->sk_shutdown = 0;
 327        sock_reset_flag(sk, SOCK_DONE);
 328
 329        icsk->icsk_backoff = 0;
 330        inet_csk_delack_init(sk);
 331        __sk_dst_reset(sk);
 332
 333        BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
 334
 335        sk->sk_error_report(sk);
 336        return err;
 337}
 338
 339EXPORT_SYMBOL_GPL(dccp_disconnect);
 340
 341/*
 342 *      Wait for a DCCP event.
 343 *
 344 *      Note that we don't need to lock the socket, as the upper poll layers
 345 *      take care of normal races (between the test and the event) and we don't
 346 *      go look at any of the socket buffers directly.
 347 */
 348unsigned int dccp_poll(struct file *file, struct socket *sock,
 349                       poll_table *wait)
 350{
 351        unsigned int mask;
 352        struct sock *sk = sock->sk;
 353
 354        poll_wait(file, sk->sk_sleep, wait);
 355        if (sk->sk_state == DCCP_LISTEN)
 356                return inet_csk_listen_poll(sk);
 357
 358        /* Socket is not locked. We are protected from async events
 359           by poll logic and correct handling of state changes
 360           made by another threads is impossible in any case.
 361         */
 362
 363        mask = 0;
 364        if (sk->sk_err)
 365                mask = POLLERR;
 366
 367        if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
 368                mask |= POLLHUP;
 369        if (sk->sk_shutdown & RCV_SHUTDOWN)
 370                mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 371
 372        /* Connected? */
 373        if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
 374                if (atomic_read(&sk->sk_rmem_alloc) > 0)
 375                        mask |= POLLIN | POLLRDNORM;
 376
 377                if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
 378                        if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 379                                mask |= POLLOUT | POLLWRNORM;
 380                        } else {  /* send SIGIO later */
 381                                set_bit(SOCK_ASYNC_NOSPACE,
 382                                        &sk->sk_socket->flags);
 383                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 384
 385                                /* Race breaker. If space is freed after
 386                                 * wspace test but before the flags are set,
 387                                 * IO signal will be lost.
 388                                 */
 389                                if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 390                                        mask |= POLLOUT | POLLWRNORM;
 391                        }
 392                }
 393        }
 394        return mask;
 395}
 396
 397EXPORT_SYMBOL_GPL(dccp_poll);
 398
 399int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 400{
 401        int rc = -ENOTCONN;
 402
 403        lock_sock(sk);
 404
 405        if (sk->sk_state == DCCP_LISTEN)
 406                goto out;
 407
 408        switch (cmd) {
 409        case SIOCINQ: {
 410                struct sk_buff *skb;
 411                unsigned long amount = 0;
 412
 413                skb = skb_peek(&sk->sk_receive_queue);
 414                if (skb != NULL) {
 415                        /*
 416                         * We will only return the amount of this packet since
 417                         * that is all that will be read.
 418                         */
 419                        amount = skb->len;
 420                }
 421                rc = put_user(amount, (int __user *)arg);
 422        }
 423                break;
 424        default:
 425                rc = -ENOIOCTLCMD;
 426                break;
 427        }
 428out:
 429        release_sock(sk);
 430        return rc;
 431}
 432
 433EXPORT_SYMBOL_GPL(dccp_ioctl);
 434
 435static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
 436                                   char __user *optval, int optlen)
 437{
 438        struct dccp_sock *dp = dccp_sk(sk);
 439        struct dccp_service_list *sl = NULL;
 440
 441        if (service == DCCP_SERVICE_INVALID_VALUE ||
 442            optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
 443                return -EINVAL;
 444
 445        if (optlen > sizeof(service)) {
 446                sl = kmalloc(optlen, GFP_KERNEL);
 447                if (sl == NULL)
 448                        return -ENOMEM;
 449
 450                sl->dccpsl_nr = optlen / sizeof(u32) - 1;
 451                if (copy_from_user(sl->dccpsl_list,
 452                                   optval + sizeof(service),
 453                                   optlen - sizeof(service)) ||
 454                    dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
 455                        kfree(sl);
 456                        return -EFAULT;
 457                }
 458        }
 459
 460        lock_sock(sk);
 461        dp->dccps_service = service;
 462
 463        kfree(dp->dccps_service_list);
 464
 465        dp->dccps_service_list = sl;
 466        release_sock(sk);
 467        return 0;
 468}
 469
 470/* byte 1 is feature.  the rest is the preference list */
 471static int dccp_setsockopt_change(struct sock *sk, int type,
 472                                  struct dccp_so_feat __user *optval)
 473{
 474        struct dccp_so_feat opt;
 475        u8 *val;
 476        int rc;
 477
 478        if (copy_from_user(&opt, optval, sizeof(opt)))
 479                return -EFAULT;
 480
 481        val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
 482        if (!val)
 483                return -ENOMEM;
 484
 485        if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
 486                rc = -EFAULT;
 487                goto out_free_val;
 488        }
 489
 490        rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
 491                              val, opt.dccpsf_len, GFP_KERNEL);
 492        if (rc)
 493                goto out_free_val;
 494
 495out:
 496        return rc;
 497
 498out_free_val:
 499        kfree(val);
 500        goto out;
 501}
 502
 503static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 504                char __user *optval, int optlen)
 505{
 506        struct dccp_sock *dp = dccp_sk(sk);
 507        int val, err = 0;
 508
 509        if (optlen < sizeof(int))
 510                return -EINVAL;
 511
 512        if (get_user(val, (int __user *)optval))
 513                return -EFAULT;
 514
 515        if (optname == DCCP_SOCKOPT_SERVICE)
 516                return dccp_setsockopt_service(sk, val, optval, optlen);
 517
 518        lock_sock(sk);
 519        switch (optname) {
 520        case DCCP_SOCKOPT_PACKET_SIZE:
 521                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 522                err = 0;
 523                break;
 524        case DCCP_SOCKOPT_CHANGE_L:
 525                if (optlen != sizeof(struct dccp_so_feat))
 526                        err = -EINVAL;
 527                else
 528                        err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
 529                                                     (struct dccp_so_feat __user *)
 530                                                     optval);
 531                break;
 532        case DCCP_SOCKOPT_CHANGE_R:
 533                if (optlen != sizeof(struct dccp_so_feat))
 534                        err = -EINVAL;
 535                else
 536                        err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
 537                                                     (struct dccp_so_feat __user *)
 538                                                     optval);
 539                break;
 540        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 541                if (dp->dccps_role != DCCP_ROLE_SERVER)
 542                        err = -EOPNOTSUPP;
 543                else
 544                        dp->dccps_server_timewait = (val != 0);
 545                break;
 546        case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
 547                if (val < 0 || val > 15)
 548                        err = -EINVAL;
 549                else
 550                        dp->dccps_pcslen = val;
 551                break;
 552        case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
 553                if (val < 0 || val > 15)
 554                        err = -EINVAL;
 555                else {
 556                        dp->dccps_pcrlen = val;
 557                        /* FIXME: add feature negotiation,
 558                         * ChangeL(MinimumChecksumCoverage, val) */
 559                }
 560                break;
 561        default:
 562                err = -ENOPROTOOPT;
 563                break;
 564        }
 565
 566        release_sock(sk);
 567        return err;
 568}
 569
 570int dccp_setsockopt(struct sock *sk, int level, int optname,
 571                    char __user *optval, int optlen)
 572{
 573        if (level != SOL_DCCP)
 574                return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
 575                                                             optname, optval,
 576                                                             optlen);
 577        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 578}
 579
 580EXPORT_SYMBOL_GPL(dccp_setsockopt);
 581
 582#ifdef CONFIG_COMPAT
 583int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
 584                           char __user *optval, int optlen)
 585{
 586        if (level != SOL_DCCP)
 587                return inet_csk_compat_setsockopt(sk, level, optname,
 588                                                  optval, optlen);
 589        return do_dccp_setsockopt(sk, level, optname, optval, optlen);
 590}
 591
 592EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
 593#endif
 594
 595static int dccp_getsockopt_service(struct sock *sk, int len,
 596                                   __be32 __user *optval,
 597                                   int __user *optlen)
 598{
 599        const struct dccp_sock *dp = dccp_sk(sk);
 600        const struct dccp_service_list *sl;
 601        int err = -ENOENT, slen = 0, total_len = sizeof(u32);
 602
 603        lock_sock(sk);
 604        if ((sl = dp->dccps_service_list) != NULL) {
 605                slen = sl->dccpsl_nr * sizeof(u32);
 606                total_len += slen;
 607        }
 608
 609        err = -EINVAL;
 610        if (total_len > len)
 611                goto out;
 612
 613        err = 0;
 614        if (put_user(total_len, optlen) ||
 615            put_user(dp->dccps_service, optval) ||
 616            (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
 617                err = -EFAULT;
 618out:
 619        release_sock(sk);
 620        return err;
 621}
 622
 623static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 624                    char __user *optval, int __user *optlen)
 625{
 626        struct dccp_sock *dp;
 627        int val, len;
 628
 629        if (get_user(len, optlen))
 630                return -EFAULT;
 631
 632        if (len < (int)sizeof(int))
 633                return -EINVAL;
 634
 635        dp = dccp_sk(sk);
 636
 637        switch (optname) {
 638        case DCCP_SOCKOPT_PACKET_SIZE:
 639                DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
 640                return 0;
 641        case DCCP_SOCKOPT_SERVICE:
 642                return dccp_getsockopt_service(sk, len,
 643                                               (__be32 __user *)optval, optlen);
 644        case DCCP_SOCKOPT_GET_CUR_MPS:
 645                val = dp->dccps_mss_cache;
 646                break;
 647        case DCCP_SOCKOPT_SERVER_TIMEWAIT:
 648                val = dp->dccps_server_timewait;
 649                break;
 650        case DCCP_SOCKOPT_SEND_CSCOV:
 651                val = dp->dccps_pcslen;
 652                break;
 653        case DCCP_SOCKOPT_RECV_CSCOV:
 654                val = dp->dccps_pcrlen;
 655                break;
 656        case 128 ... 191:
 657                return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 658                                             len, (u32 __user *)optval, optlen);
 659        case 192 ... 255:
 660                return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
 661                                             len, (u32 __user *)optval, optlen);
 662        default:
 663                return -ENOPROTOOPT;
 664        }
 665
 666        len = sizeof(val);
 667        if (put_user(len, optlen) || copy_to_user(optval, &val, len))
 668                return -EFAULT;
 669
 670        return 0;
 671}
 672
 673int dccp_getsockopt(struct sock *sk, int level, int optname,
 674                    char __user *optval, int __user *optlen)
 675{
 676        if (level != SOL_DCCP)
 677                return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
 678                                                             optname, optval,
 679                                                             optlen);
 680        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 681}
 682
 683EXPORT_SYMBOL_GPL(dccp_getsockopt);
 684
 685#ifdef CONFIG_COMPAT
 686int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 687                           char __user *optval, int __user *optlen)
 688{
 689        if (level != SOL_DCCP)
 690                return inet_csk_compat_getsockopt(sk, level, optname,
 691                                                  optval, optlen);
 692        return do_dccp_getsockopt(sk, level, optname, optval, optlen);
 693}
 694
 695EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 696#endif
 697
 698int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 699                 size_t len)
 700{
 701        const struct dccp_sock *dp = dccp_sk(sk);
 702        const int flags = msg->msg_flags;
 703        const int noblock = flags & MSG_DONTWAIT;
 704        struct sk_buff *skb;
 705        int rc, size;
 706        long timeo;
 707
 708        if (len > dp->dccps_mss_cache)
 709                return -EMSGSIZE;
 710
 711        lock_sock(sk);
 712
 713        if (sysctl_dccp_tx_qlen &&
 714            (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
 715                rc = -EAGAIN;
 716                goto out_release;
 717        }
 718
 719        timeo = sock_sndtimeo(sk, noblock);
 720
 721        /*
 722         * We have to use sk_stream_wait_connect here to set sk_write_pending,
 723         * so that the trick in dccp_rcv_request_sent_state_process.
 724         */
 725        /* Wait for a connection to finish. */
 726        if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
 727                if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
 728                        goto out_release;
 729
 730        size = sk->sk_prot->max_header + len;
 731        release_sock(sk);
 732        skb = sock_alloc_send_skb(sk, size, noblock, &rc);
 733        lock_sock(sk);
 734        if (skb == NULL)
 735                goto out_release;
 736
 737        skb_reserve(skb, sk->sk_prot->max_header);
 738        rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 739        if (rc != 0)
 740                goto out_discard;
 741
 742        skb_queue_tail(&sk->sk_write_queue, skb);
 743        dccp_write_xmit(sk,0);
 744out_release:
 745        release_sock(sk);
 746        return rc ? : len;
 747out_discard:
 748        kfree_skb(skb);
 749        goto out_release;
 750}
 751
 752EXPORT_SYMBOL_GPL(dccp_sendmsg);
 753
 754int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 755                 size_t len, int nonblock, int flags, int *addr_len)
 756{
 757        const struct dccp_hdr *dh;
 758        long timeo;
 759
 760        lock_sock(sk);
 761
 762        if (sk->sk_state == DCCP_LISTEN) {
 763                len = -ENOTCONN;
 764                goto out;
 765        }
 766
 767        timeo = sock_rcvtimeo(sk, nonblock);
 768
 769        do {
 770                struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 771
 772                if (skb == NULL)
 773                        goto verify_sock_status;
 774
 775                dh = dccp_hdr(skb);
 776
 777                switch (dh->dccph_type) {
 778                case DCCP_PKT_DATA:
 779                case DCCP_PKT_DATAACK:
 780                        goto found_ok_skb;
 781
 782                case DCCP_PKT_CLOSE:
 783                case DCCP_PKT_CLOSEREQ:
 784                        if (!(flags & MSG_PEEK))
 785                                dccp_finish_passive_close(sk);
 786                        /* fall through */
 787                case DCCP_PKT_RESET:
 788                        dccp_pr_debug("found fin (%s) ok!\n",
 789                                      dccp_packet_name(dh->dccph_type));
 790                        len = 0;
 791                        goto found_fin_ok;
 792                default:
 793                        dccp_pr_debug("packet_type=%s\n",
 794                                      dccp_packet_name(dh->dccph_type));
 795                        sk_eat_skb(sk, skb, 0);
 796                }
 797verify_sock_status:
 798                if (sock_flag(sk, SOCK_DONE)) {
 799                        len = 0;
 800                        break;
 801                }
 802
 803                if (sk->sk_err) {
 804                        len = sock_error(sk);
 805                        break;
 806                }
 807
 808                if (sk->sk_shutdown & RCV_SHUTDOWN) {
 809                        len = 0;
 810                        break;
 811                }
 812
 813                if (sk->sk_state == DCCP_CLOSED) {
 814                        if (!sock_flag(sk, SOCK_DONE)) {
 815                                /* This occurs when user tries to read
 816                                 * from never connected socket.
 817                                 */
 818                                len = -ENOTCONN;
 819                                break;
 820                        }
 821                        len = 0;
 822                        break;
 823                }
 824
 825                if (!timeo) {
 826                        len = -EAGAIN;
 827                        break;
 828                }
 829
 830                if (signal_pending(current)) {
 831                        len = sock_intr_errno(timeo);
 832                        break;
 833                }
 834
 835                sk_wait_data(sk, &timeo);
 836                continue;
 837        found_ok_skb:
 838                if (len > skb->len)
 839                        len = skb->len;
 840                else if (len < skb->len)
 841                        msg->msg_flags |= MSG_TRUNC;
 842
 843                if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
 844                        /* Exception. Bailout! */
 845                        len = -EFAULT;
 846                        break;
 847                }
 848        found_fin_ok:
 849                if (!(flags & MSG_PEEK))
 850                        sk_eat_skb(sk, skb, 0);
 851                break;
 852        } while (1);
 853out:
 854        release_sock(sk);
 855        return len;
 856}
 857
 858EXPORT_SYMBOL_GPL(dccp_recvmsg);
 859
 860int inet_dccp_listen(struct socket *sock, int backlog)
 861{
 862        struct sock *sk = sock->sk;
 863        unsigned char old_state;
 864        int err;
 865
 866        lock_sock(sk);
 867
 868        err = -EINVAL;
 869        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
 870                goto out;
 871
 872        old_state = sk->sk_state;
 873        if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
 874                goto out;
 875
 876        /* Really, if the socket is already in listen state
 877         * we can only allow the backlog to be adjusted.
 878         */
 879        if (old_state != DCCP_LISTEN) {
 880                /*
 881                 * FIXME: here it probably should be sk->sk_prot->listen_start
 882                 * see tcp_listen_start
 883                 */
 884                err = dccp_listen_start(sk, backlog);
 885                if (err)
 886                        goto out;
 887        }
 888        sk->sk_max_ack_backlog = backlog;
 889        err = 0;
 890
 891out:
 892        release_sock(sk);
 893        return err;
 894}
 895
 896EXPORT_SYMBOL_GPL(inet_dccp_listen);
 897
 898static void dccp_terminate_connection(struct sock *sk)
 899{
 900        u8 next_state = DCCP_CLOSED;
 901
 902        switch (sk->sk_state) {
 903        case DCCP_PASSIVE_CLOSE:
 904        case DCCP_PASSIVE_CLOSEREQ:
 905                dccp_finish_passive_close(sk);
 906                break;
 907        case DCCP_PARTOPEN:
 908                dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
 909                inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 910                /* fall through */
 911        case DCCP_OPEN:
 912                dccp_send_close(sk, 1);
 913
 914                if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
 915                    !dccp_sk(sk)->dccps_server_timewait)
 916                        next_state = DCCP_ACTIVE_CLOSEREQ;
 917                else
 918                        next_state = DCCP_CLOSING;
 919                /* fall through */
 920        default:
 921                dccp_set_state(sk, next_state);
 922        }
 923}
 924
 925void dccp_close(struct sock *sk, long timeout)
 926{
 927        struct dccp_sock *dp = dccp_sk(sk);
 928        struct sk_buff *skb;
 929        u32 data_was_unread = 0;
 930        int state;
 931
 932        lock_sock(sk);
 933
 934        sk->sk_shutdown = SHUTDOWN_MASK;
 935
 936        if (sk->sk_state == DCCP_LISTEN) {
 937                dccp_set_state(sk, DCCP_CLOSED);
 938
 939                /* Special case. */
 940                inet_csk_listen_stop(sk);
 941
 942                goto adjudge_to_death;
 943        }
 944
 945        sk_stop_timer(sk, &dp->dccps_xmit_timer);
 946
 947        /*
 948         * We need to flush the recv. buffs.  We do this only on the
 949         * descriptor close, not protocol-sourced closes, because the
 950          *reader process may not have drained the data yet!
 951         */
 952        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 953                data_was_unread += skb->len;
 954                __kfree_skb(skb);
 955        }
 956
 957        if (data_was_unread) {
 958                /* Unread data was tossed, send an appropriate Reset Code */
 959                DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
 960                dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
 961                dccp_set_state(sk, DCCP_CLOSED);
 962        } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 963                /* Check zero linger _after_ checking for unread data. */
 964                sk->sk_prot->disconnect(sk, 0);
 965        } else if (sk->sk_state != DCCP_CLOSED) {
 966                dccp_terminate_connection(sk);
 967        }
 968
 969        sk_stream_wait_close(sk, timeout);
 970
 971adjudge_to_death:
 972        state = sk->sk_state;
 973        sock_hold(sk);
 974        sock_orphan(sk);
 975        atomic_inc(sk->sk_prot->orphan_count);
 976
 977        /*
 978         * It is the last release_sock in its life. It will remove backlog.
 979         */
 980        release_sock(sk);
 981        /*
 982         * Now socket is owned by kernel and we acquire BH lock
 983         * to finish close. No need to check for user refs.
 984         */
 985        local_bh_disable();
 986        bh_lock_sock(sk);
 987        BUG_TRAP(!sock_owned_by_user(sk));
 988
 989        /* Have we already been destroyed by a softirq or backlog? */
 990        if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
 991                goto out;
 992
 993        if (sk->sk_state == DCCP_CLOSED)
 994                inet_csk_destroy_sock(sk);
 995
 996        /* Otherwise, socket is reprieved until protocol close. */
 997
 998out:
 999        bh_unlock_sock(sk);
1000        local_bh_enable();
1001        sock_put(sk);
1002}
1003
1004EXPORT_SYMBOL_GPL(dccp_close);
1005
1006void dccp_shutdown(struct sock *sk, int how)
1007{
1008        dccp_pr_debug("called shutdown(%x)\n", how);
1009}
1010
1011EXPORT_SYMBOL_GPL(dccp_shutdown);
1012
1013static int __init dccp_mib_init(void)
1014{
1015        int rc = -ENOMEM;
1016
1017        dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1018        if (dccp_statistics[0] == NULL)
1019                goto out;
1020
1021        dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1022        if (dccp_statistics[1] == NULL)
1023                goto out_free_one;
1024
1025        rc = 0;
1026out:
1027        return rc;
1028out_free_one:
1029        free_percpu(dccp_statistics[0]);
1030        dccp_statistics[0] = NULL;
1031        goto out;
1032
1033}
1034
1035static void dccp_mib_exit(void)
1036{
1037        free_percpu(dccp_statistics[0]);
1038        free_percpu(dccp_statistics[1]);
1039        dccp_statistics[0] = dccp_statistics[1] = NULL;
1040}
1041
1042static int thash_entries;
1043module_param(thash_entries, int, 0444);
1044MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1045
1046#ifdef CONFIG_IP_DCCP_DEBUG
1047int dccp_debug;
1048module_param(dccp_debug, bool, 0444);
1049MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1050
1051EXPORT_SYMBOL_GPL(dccp_debug);
1052#endif
1053
1054static int __init dccp_init(void)
1055{
1056        unsigned long goal;
1057        int ehash_order, bhash_order, i;
1058        int rc = -ENOBUFS;
1059
1060        BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1061                     FIELD_SIZEOF(struct sk_buff, cb));
1062
1063        dccp_hashinfo.bind_bucket_cachep =
1064                kmem_cache_create("dccp_bind_bucket",
1065                                  sizeof(struct inet_bind_bucket), 0,
1066                                  SLAB_HWCACHE_ALIGN, NULL);
1067        if (!dccp_hashinfo.bind_bucket_cachep)
1068                goto out;
1069
1070        /*
1071         * Size and allocate the main established and bind bucket
1072         * hash tables.
1073         *
1074         * The methodology is similar to that of the buffer cache.
1075         */
1076        if (num_physpages >= (128 * 1024))
1077                goal = num_physpages >> (21 - PAGE_SHIFT);
1078        else
1079                goal = num_physpages >> (23 - PAGE_SHIFT);
1080
1081        if (thash_entries)
1082                goal = (thash_entries *
1083                        sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1084        for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1085                ;
1086        do {
1087                dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1088                                        sizeof(struct inet_ehash_bucket);
1089                while (dccp_hashinfo.ehash_size &
1090                       (dccp_hashinfo.ehash_size - 1))
1091                        dccp_hashinfo.ehash_size--;
1092                dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1093                        __get_free_pages(GFP_ATOMIC, ehash_order);
1094        } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1095
1096        if (!dccp_hashinfo.ehash) {
1097                DCCP_CRIT("Failed to allocate DCCP established hash table");
1098                goto out_free_bind_bucket_cachep;
1099        }
1100
1101        for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1102                INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1103                INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1104        }
1105
1106        if (inet_ehash_locks_alloc(&dccp_hashinfo))
1107                        goto out_free_dccp_ehash;
1108
1109        bhash_order = ehash_order;
1110
1111        do {
1112                dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1113                                        sizeof(struct inet_bind_hashbucket);
1114                if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1115                    bhash_order > 0)
1116                        continue;
1117                dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1118                        __get_free_pages(GFP_ATOMIC, bhash_order);
1119        } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1120
1121        if (!dccp_hashinfo.bhash) {
1122                DCCP_CRIT("Failed to allocate DCCP bind hash table");
1123                goto out_free_dccp_locks;
1124        }
1125
1126        for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1127                spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1128                INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1129        }
1130
1131        rc = dccp_mib_init();
1132        if (rc)
1133                goto out_free_dccp_bhash;
1134
1135        rc = dccp_ackvec_init();
1136        if (rc)
1137                goto out_free_dccp_mib;
1138
1139        rc = dccp_sysctl_init();
1140        if (rc)
1141                goto out_ackvec_exit;
1142
1143        dccp_timestamping_init();
1144out:
1145        return rc;
1146out_ackvec_exit:
1147        dccp_ackvec_exit();
1148out_free_dccp_mib:
1149        dccp_mib_exit();
1150out_free_dccp_bhash:
1151        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1152        dccp_hashinfo.bhash = NULL;
1153out_free_dccp_locks:
1154        inet_ehash_locks_free(&dccp_hashinfo);
1155out_free_dccp_ehash:
1156        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1157        dccp_hashinfo.ehash = NULL;
1158out_free_bind_bucket_cachep:
1159        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1160        dccp_hashinfo.bind_bucket_cachep = NULL;
1161        goto out;
1162}
1163
1164static void __exit dccp_fini(void)
1165{
1166        dccp_mib_exit();
1167        free_pages((unsigned long)dccp_hashinfo.bhash,
1168                   get_order(dccp_hashinfo.bhash_size *
1169                             sizeof(struct inet_bind_hashbucket)));
1170        free_pages((unsigned long)dccp_hashinfo.ehash,
1171                   get_order(dccp_hashinfo.ehash_size *
1172                             sizeof(struct inet_ehash_bucket)));
1173        inet_ehash_locks_free(&dccp_hashinfo);
1174        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1175        dccp_ackvec_exit();
1176        dccp_sysctl_exit();
1177}
1178
1179module_init(dccp_init);
1180module_exit(dccp_fini);
1181
1182MODULE_LICENSE("GPL");
1183MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1184MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
1185
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.