linux/net/mptcp/sockopt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Multipath TCP
   3 *
   4 * Copyright (c) 2021, Red Hat.
   5 */
   6
   7#define pr_fmt(fmt) "MPTCP: " fmt
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <net/sock.h>
  12#include <net/protocol.h>
  13#include <net/tcp.h>
  14#include <net/mptcp.h>
  15#include "protocol.h"
  16
  17static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
  18{
  19        sock_owned_by_me((const struct sock *)msk);
  20
  21        if (likely(!__mptcp_check_fallback(msk)))
  22                return NULL;
  23
  24        return msk->first;
  25}
  26
  27static u32 sockopt_seq_reset(const struct sock *sk)
  28{
  29        sock_owned_by_me(sk);
  30
  31        /* Highbits contain state.  Allows to distinguish sockopt_seq
  32         * of listener and established:
  33         * s0 = new_listener()
  34         * sockopt(s0) - seq is 1
  35         * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
  36         * sockopt(s0) - seq increments to 2 on s0
  37         * sockopt(s1) // seq increments to 2 on s1 (different option)
  38         * new ssk completes join, inherits options from s0 // seq 2
  39         * Needs sync from mptcp join logic, but ssk->seq == msk->seq
  40         *
  41         * Set High order bits to sk_state so ssk->seq == msk->seq test
  42         * will fail.
  43         */
  44
  45        return (u32)sk->sk_state << 24u;
  46}
  47
  48static void sockopt_seq_inc(struct mptcp_sock *msk)
  49{
  50        u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
  51
  52        msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
  53}
  54
  55static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
  56                                unsigned int optlen, int *val)
  57{
  58        if (optlen < sizeof(int))
  59                return -EINVAL;
  60
  61        if (copy_from_sockptr(val, optval, sizeof(*val)))
  62                return -EFAULT;
  63
  64        return 0;
  65}
  66
  67static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
  68{
  69        struct mptcp_subflow_context *subflow;
  70        struct sock *sk = (struct sock *)msk;
  71
  72        lock_sock(sk);
  73        sockopt_seq_inc(msk);
  74
  75        mptcp_for_each_subflow(msk, subflow) {
  76                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
  77                bool slow = lock_sock_fast(ssk);
  78
  79                switch (optname) {
  80                case SO_DEBUG:
  81                        sock_valbool_flag(ssk, SOCK_DBG, !!val);
  82                        break;
  83                case SO_KEEPALIVE:
  84                        if (ssk->sk_prot->keepalive)
  85                                ssk->sk_prot->keepalive(ssk, !!val);
  86                        sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
  87                        break;
  88                case SO_PRIORITY:
  89                        ssk->sk_priority = val;
  90                        break;
  91                case SO_SNDBUF:
  92                case SO_SNDBUFFORCE:
  93                        ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
  94                        WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
  95                        break;
  96                case SO_RCVBUF:
  97                case SO_RCVBUFFORCE:
  98                        ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
  99                        WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
 100                        break;
 101                case SO_MARK:
 102                        if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
 103                                ssk->sk_mark = sk->sk_mark;
 104                                sk_dst_reset(ssk);
 105                        }
 106                        break;
 107                case SO_INCOMING_CPU:
 108                        WRITE_ONCE(ssk->sk_incoming_cpu, val);
 109                        break;
 110                }
 111
 112                subflow->setsockopt_seq = msk->setsockopt_seq;
 113                unlock_sock_fast(ssk, slow);
 114        }
 115
 116        release_sock(sk);
 117}
 118
 119static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
 120{
 121        sockptr_t optval = KERNEL_SOCKPTR(&val);
 122        struct sock *sk = (struct sock *)msk;
 123        int ret;
 124
 125        ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 126                              optval, sizeof(val));
 127        if (ret)
 128                return ret;
 129
 130        mptcp_sol_socket_sync_intval(msk, optname, val);
 131        return 0;
 132}
 133
 134static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
 135{
 136        struct sock *sk = (struct sock *)msk;
 137
 138        WRITE_ONCE(sk->sk_incoming_cpu, val);
 139
 140        mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
 141}
 142
 143static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
 144{
 145        sockptr_t optval = KERNEL_SOCKPTR(&val);
 146        struct mptcp_subflow_context *subflow;
 147        struct sock *sk = (struct sock *)msk;
 148        int ret;
 149
 150        ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 151                              optval, sizeof(val));
 152        if (ret)
 153                return ret;
 154
 155        lock_sock(sk);
 156        mptcp_for_each_subflow(msk, subflow) {
 157                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 158                bool slow = lock_sock_fast(ssk);
 159
 160                sock_set_timestamp(sk, optname, !!val);
 161                unlock_sock_fast(ssk, slow);
 162        }
 163
 164        release_sock(sk);
 165        return 0;
 166}
 167
 168static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 169                                           sockptr_t optval,
 170                                           unsigned int optlen)
 171{
 172        int val, ret;
 173
 174        ret = mptcp_get_int_option(msk, optval, optlen, &val);
 175        if (ret)
 176                return ret;
 177
 178        switch (optname) {
 179        case SO_KEEPALIVE:
 180                mptcp_sol_socket_sync_intval(msk, optname, val);
 181                return 0;
 182        case SO_DEBUG:
 183        case SO_MARK:
 184        case SO_PRIORITY:
 185        case SO_SNDBUF:
 186        case SO_SNDBUFFORCE:
 187        case SO_RCVBUF:
 188        case SO_RCVBUFFORCE:
 189                return mptcp_sol_socket_intval(msk, optname, val);
 190        case SO_INCOMING_CPU:
 191                mptcp_so_incoming_cpu(msk, val);
 192                return 0;
 193        case SO_TIMESTAMP_OLD:
 194        case SO_TIMESTAMP_NEW:
 195        case SO_TIMESTAMPNS_OLD:
 196        case SO_TIMESTAMPNS_NEW:
 197                return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
 198        }
 199
 200        return -ENOPROTOOPT;
 201}
 202
 203static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
 204                                                    int optname,
 205                                                    sockptr_t optval,
 206                                                    unsigned int optlen)
 207{
 208        struct mptcp_subflow_context *subflow;
 209        struct sock *sk = (struct sock *)msk;
 210        struct so_timestamping timestamping;
 211        int ret;
 212
 213        if (optlen == sizeof(timestamping)) {
 214                if (copy_from_sockptr(&timestamping, optval,
 215                                      sizeof(timestamping)))
 216                        return -EFAULT;
 217        } else if (optlen == sizeof(int)) {
 218                memset(&timestamping, 0, sizeof(timestamping));
 219
 220                if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
 221                        return -EFAULT;
 222        } else {
 223                return -EINVAL;
 224        }
 225
 226        ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
 227                              KERNEL_SOCKPTR(&timestamping),
 228                              sizeof(timestamping));
 229        if (ret)
 230                return ret;
 231
 232        lock_sock(sk);
 233
 234        mptcp_for_each_subflow(msk, subflow) {
 235                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 236                bool slow = lock_sock_fast(ssk);
 237
 238                sock_set_timestamping(sk, optname, timestamping);
 239                unlock_sock_fast(ssk, slow);
 240        }
 241
 242        release_sock(sk);
 243
 244        return 0;
 245}
 246
 247static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
 248                                              unsigned int optlen)
 249{
 250        struct mptcp_subflow_context *subflow;
 251        struct sock *sk = (struct sock *)msk;
 252        struct linger ling;
 253        sockptr_t kopt;
 254        int ret;
 255
 256        if (optlen < sizeof(ling))
 257                return -EINVAL;
 258
 259        if (copy_from_sockptr(&ling, optval, sizeof(ling)))
 260                return -EFAULT;
 261
 262        kopt = KERNEL_SOCKPTR(&ling);
 263        ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
 264        if (ret)
 265                return ret;
 266
 267        lock_sock(sk);
 268        sockopt_seq_inc(msk);
 269        mptcp_for_each_subflow(msk, subflow) {
 270                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 271                bool slow = lock_sock_fast(ssk);
 272
 273                if (!ling.l_onoff) {
 274                        sock_reset_flag(ssk, SOCK_LINGER);
 275                } else {
 276                        ssk->sk_lingertime = sk->sk_lingertime;
 277                        sock_set_flag(ssk, SOCK_LINGER);
 278                }
 279
 280                subflow->setsockopt_seq = msk->setsockopt_seq;
 281                unlock_sock_fast(ssk, slow);
 282        }
 283
 284        release_sock(sk);
 285        return 0;
 286}
 287
 288static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 289                                       sockptr_t optval, unsigned int optlen)
 290{
 291        struct sock *sk = (struct sock *)msk;
 292        struct socket *ssock;
 293        int ret;
 294
 295        switch (optname) {
 296        case SO_REUSEPORT:
 297        case SO_REUSEADDR:
 298        case SO_BINDTODEVICE:
 299        case SO_BINDTOIFINDEX:
 300                lock_sock(sk);
 301                ssock = __mptcp_nmpc_socket(msk);
 302                if (!ssock) {
 303                        release_sock(sk);
 304                        return -EINVAL;
 305                }
 306
 307                ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
 308                if (ret == 0) {
 309                        if (optname == SO_REUSEPORT)
 310                                sk->sk_reuseport = ssock->sk->sk_reuseport;
 311                        else if (optname == SO_REUSEADDR)
 312                                sk->sk_reuse = ssock->sk->sk_reuse;
 313                        else if (optname == SO_BINDTODEVICE)
 314                                sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
 315                        else if (optname == SO_BINDTOIFINDEX)
 316                                sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
 317                }
 318                release_sock(sk);
 319                return ret;
 320        case SO_KEEPALIVE:
 321        case SO_PRIORITY:
 322        case SO_SNDBUF:
 323        case SO_SNDBUFFORCE:
 324        case SO_RCVBUF:
 325        case SO_RCVBUFFORCE:
 326        case SO_MARK:
 327        case SO_INCOMING_CPU:
 328        case SO_DEBUG:
 329        case SO_TIMESTAMP_OLD:
 330        case SO_TIMESTAMP_NEW:
 331        case SO_TIMESTAMPNS_OLD:
 332        case SO_TIMESTAMPNS_NEW:
 333                return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
 334                                                       optlen);
 335        case SO_TIMESTAMPING_OLD:
 336        case SO_TIMESTAMPING_NEW:
 337                return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
 338                                                                optval, optlen);
 339        case SO_LINGER:
 340                return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
 341        case SO_RCVLOWAT:
 342        case SO_RCVTIMEO_OLD:
 343        case SO_RCVTIMEO_NEW:
 344        case SO_BUSY_POLL:
 345        case SO_PREFER_BUSY_POLL:
 346        case SO_BUSY_POLL_BUDGET:
 347                /* No need to copy: only relevant for msk */
 348                return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
 349        case SO_NO_CHECK:
 350        case SO_DONTROUTE:
 351        case SO_BROADCAST:
 352        case SO_BSDCOMPAT:
 353        case SO_PASSCRED:
 354        case SO_PASSSEC:
 355        case SO_RXQ_OVFL:
 356        case SO_WIFI_STATUS:
 357        case SO_NOFCS:
 358        case SO_SELECT_ERR_QUEUE:
 359                return 0;
 360        }
 361
 362        /* SO_OOBINLINE is not supported, let's avoid the related mess
 363         * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
 364         * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
 365         * we must be careful with subflows
 366         *
 367         * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
 368         * explicitly the sk_protocol field
 369         *
 370         * SO_PEEK_OFF is unsupported, as it is for plain TCP
 371         * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
 372         * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
 373         * but likely needs careful design
 374         *
 375         * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
 376         * SO_TXTIME is currently unsupported
 377         */
 378
 379        return -EOPNOTSUPP;
 380}
 381
 382static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
 383                               sockptr_t optval, unsigned int optlen)
 384{
 385        struct sock *sk = (struct sock *)msk;
 386        int ret = -EOPNOTSUPP;
 387        struct socket *ssock;
 388
 389        switch (optname) {
 390        case IPV6_V6ONLY:
 391                lock_sock(sk);
 392                ssock = __mptcp_nmpc_socket(msk);
 393                if (!ssock) {
 394                        release_sock(sk);
 395                        return -EINVAL;
 396                }
 397
 398                ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
 399                if (ret == 0)
 400                        sk->sk_ipv6only = ssock->sk->sk_ipv6only;
 401
 402                release_sock(sk);
 403                break;
 404        }
 405
 406        return ret;
 407}
 408
 409static bool mptcp_supported_sockopt(int level, int optname)
 410{
 411        if (level == SOL_IP) {
 412                switch (optname) {
 413                /* should work fine */
 414                case IP_FREEBIND:
 415                case IP_TRANSPARENT:
 416
 417                /* the following are control cmsg related */
 418                case IP_PKTINFO:
 419                case IP_RECVTTL:
 420                case IP_RECVTOS:
 421                case IP_RECVOPTS:
 422                case IP_RETOPTS:
 423                case IP_PASSSEC:
 424                case IP_RECVORIGDSTADDR:
 425                case IP_CHECKSUM:
 426                case IP_RECVFRAGSIZE:
 427
 428                /* common stuff that need some love */
 429                case IP_TOS:
 430                case IP_TTL:
 431                case IP_BIND_ADDRESS_NO_PORT:
 432                case IP_MTU_DISCOVER:
 433                case IP_RECVERR:
 434
 435                /* possibly less common may deserve some love */
 436                case IP_MINTTL:
 437
 438                /* the following is apparently a no-op for plain TCP */
 439                case IP_RECVERR_RFC4884:
 440                        return true;
 441                }
 442
 443                /* IP_OPTIONS is not supported, needs subflow care */
 444                /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
 445                /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
 446                 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
 447                 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
 448                 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
 449                 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
 450                 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
 451                 * with mcast stuff
 452                 */
 453                /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
 454                return false;
 455        }
 456        if (level == SOL_IPV6) {
 457                switch (optname) {
 458                case IPV6_V6ONLY:
 459
 460                /* the following are control cmsg related */
 461                case IPV6_RECVPKTINFO:
 462                case IPV6_2292PKTINFO:
 463                case IPV6_RECVHOPLIMIT:
 464                case IPV6_2292HOPLIMIT:
 465                case IPV6_RECVRTHDR:
 466                case IPV6_2292RTHDR:
 467                case IPV6_RECVHOPOPTS:
 468                case IPV6_2292HOPOPTS:
 469                case IPV6_RECVDSTOPTS:
 470                case IPV6_2292DSTOPTS:
 471                case IPV6_RECVTCLASS:
 472                case IPV6_FLOWINFO:
 473                case IPV6_RECVPATHMTU:
 474                case IPV6_RECVORIGDSTADDR:
 475                case IPV6_RECVFRAGSIZE:
 476
 477                /* the following ones need some love but are quite common */
 478                case IPV6_TCLASS:
 479                case IPV6_TRANSPARENT:
 480                case IPV6_FREEBIND:
 481                case IPV6_PKTINFO:
 482                case IPV6_2292PKTOPTIONS:
 483                case IPV6_UNICAST_HOPS:
 484                case IPV6_MTU_DISCOVER:
 485                case IPV6_MTU:
 486                case IPV6_RECVERR:
 487                case IPV6_FLOWINFO_SEND:
 488                case IPV6_FLOWLABEL_MGR:
 489                case IPV6_MINHOPCOUNT:
 490                case IPV6_DONTFRAG:
 491                case IPV6_AUTOFLOWLABEL:
 492
 493                /* the following one is a no-op for plain TCP */
 494                case IPV6_RECVERR_RFC4884:
 495                        return true;
 496                }
 497
 498                /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
 499                 * not supported
 500                 */
 501                /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
 502                 * IPV6_MULTICAST_IF, IPV6_ADDRFORM,
 503                 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
 504                 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
 505                 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
 506                 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
 507                 * are not supported better not deal with mcast
 508                 */
 509                /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
 510
 511                /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
 512                /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
 513                return false;
 514        }
 515        if (level == SOL_TCP) {
 516                switch (optname) {
 517                /* the following are no-op or should work just fine */
 518                case TCP_THIN_DUPACK:
 519                case TCP_DEFER_ACCEPT:
 520
 521                /* the following need some love */
 522                case TCP_MAXSEG:
 523                case TCP_NODELAY:
 524                case TCP_THIN_LINEAR_TIMEOUTS:
 525                case TCP_CONGESTION:
 526                case TCP_ULP:
 527                case TCP_CORK:
 528                case TCP_KEEPIDLE:
 529                case TCP_KEEPINTVL:
 530                case TCP_KEEPCNT:
 531                case TCP_SYNCNT:
 532                case TCP_SAVE_SYN:
 533                case TCP_LINGER2:
 534                case TCP_WINDOW_CLAMP:
 535                case TCP_QUICKACK:
 536                case TCP_USER_TIMEOUT:
 537                case TCP_TIMESTAMP:
 538                case TCP_NOTSENT_LOWAT:
 539                case TCP_TX_DELAY:
 540                        return true;
 541                }
 542
 543                /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
 544
 545                /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
 546                 * TCP_REPAIR_WINDOW are not supported, better avoid this mess
 547                 */
 548                /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE,
 549                 * are not supported fastopen is currently unsupported
 550                 */
 551                /* TCP_INQ is currently unsupported, needs some recvmsg work */
 552        }
 553        return false;
 554}
 555
 556static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
 557                                               unsigned int optlen)
 558{
 559        struct mptcp_subflow_context *subflow;
 560        struct sock *sk = (struct sock *)msk;
 561        char name[TCP_CA_NAME_MAX];
 562        bool cap_net_admin;
 563        int ret;
 564
 565        if (optlen < 1)
 566                return -EINVAL;
 567
 568        ret = strncpy_from_sockptr(name, optval,
 569                                   min_t(long, TCP_CA_NAME_MAX - 1, optlen));
 570        if (ret < 0)
 571                return -EFAULT;
 572
 573        name[ret] = 0;
 574
 575        cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
 576
 577        ret = 0;
 578        lock_sock(sk);
 579        sockopt_seq_inc(msk);
 580        mptcp_for_each_subflow(msk, subflow) {
 581                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 582                int err;
 583
 584                lock_sock(ssk);
 585                err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
 586                if (err < 0 && ret == 0)
 587                        ret = err;
 588                subflow->setsockopt_seq = msk->setsockopt_seq;
 589                release_sock(ssk);
 590        }
 591
 592        if (ret == 0)
 593                strcpy(msk->ca_name, name);
 594
 595        release_sock(sk);
 596        return ret;
 597}
 598
 599static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 600                                    sockptr_t optval, unsigned int optlen)
 601{
 602        switch (optname) {
 603        case TCP_ULP:
 604                return -EOPNOTSUPP;
 605        case TCP_CONGESTION:
 606                return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
 607        }
 608
 609        return -EOPNOTSUPP;
 610}
 611
 612int mptcp_setsockopt(struct sock *sk, int level, int optname,
 613                     sockptr_t optval, unsigned int optlen)
 614{
 615        struct mptcp_sock *msk = mptcp_sk(sk);
 616        struct sock *ssk;
 617
 618        pr_debug("msk=%p", msk);
 619
 620        if (level == SOL_SOCKET)
 621                return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
 622
 623        if (!mptcp_supported_sockopt(level, optname))
 624                return -ENOPROTOOPT;
 625
 626        /* @@ the meaning of setsockopt() when the socket is connected and
 627         * there are multiple subflows is not yet defined. It is up to the
 628         * MPTCP-level socket to configure the subflows until the subflow
 629         * is in TCP fallback, when TCP socket options are passed through
 630         * to the one remaining subflow.
 631         */
 632        lock_sock(sk);
 633        ssk = __mptcp_tcp_fallback(msk);
 634        release_sock(sk);
 635        if (ssk)
 636                return tcp_setsockopt(ssk, level, optname, optval, optlen);
 637
 638        if (level == SOL_IPV6)
 639                return mptcp_setsockopt_v6(msk, optname, optval, optlen);
 640
 641        if (level == SOL_TCP)
 642                return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
 643
 644        return -EOPNOTSUPP;
 645}
 646
 647static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
 648                                          char __user *optval, int __user *optlen)
 649{
 650        struct sock *sk = (struct sock *)msk;
 651        struct socket *ssock;
 652        int ret = -EINVAL;
 653        struct sock *ssk;
 654
 655        lock_sock(sk);
 656        ssk = msk->first;
 657        if (ssk) {
 658                ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
 659                goto out;
 660        }
 661
 662        ssock = __mptcp_nmpc_socket(msk);
 663        if (!ssock)
 664                goto out;
 665
 666        ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
 667
 668out:
 669        release_sock(sk);
 670        return ret;
 671}
 672
 673static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 674                                    char __user *optval, int __user *optlen)
 675{
 676        switch (optname) {
 677        case TCP_ULP:
 678        case TCP_CONGESTION:
 679        case TCP_INFO:
 680        case TCP_CC_INFO:
 681                return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
 682                                                      optval, optlen);
 683        }
 684        return -EOPNOTSUPP;
 685}
 686
 687int mptcp_getsockopt(struct sock *sk, int level, int optname,
 688                     char __user *optval, int __user *option)
 689{
 690        struct mptcp_sock *msk = mptcp_sk(sk);
 691        struct sock *ssk;
 692
 693        pr_debug("msk=%p", msk);
 694
 695        /* @@ the meaning of setsockopt() when the socket is connected and
 696         * there are multiple subflows is not yet defined. It is up to the
 697         * MPTCP-level socket to configure the subflows until the subflow
 698         * is in TCP fallback, when socket options are passed through
 699         * to the one remaining subflow.
 700         */
 701        lock_sock(sk);
 702        ssk = __mptcp_tcp_fallback(msk);
 703        release_sock(sk);
 704        if (ssk)
 705                return tcp_getsockopt(ssk, level, optname, optval, option);
 706
 707        if (level == SOL_TCP)
 708                return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
 709        return -EOPNOTSUPP;
 710}
 711
 712static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
 713{
 714        static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
 715        struct sock *sk = (struct sock *)msk;
 716
 717        if (ssk->sk_prot->keepalive) {
 718                if (sock_flag(sk, SOCK_KEEPOPEN))
 719                        ssk->sk_prot->keepalive(ssk, 1);
 720                else
 721                        ssk->sk_prot->keepalive(ssk, 0);
 722        }
 723
 724        ssk->sk_priority = sk->sk_priority;
 725        ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
 726        ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
 727
 728        if (sk->sk_userlocks & tx_rx_locks) {
 729                ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
 730                if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
 731                        WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
 732                if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
 733                        WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
 734        }
 735
 736        if (sock_flag(sk, SOCK_LINGER)) {
 737                ssk->sk_lingertime = sk->sk_lingertime;
 738                sock_set_flag(ssk, SOCK_LINGER);
 739        } else {
 740                sock_reset_flag(ssk, SOCK_LINGER);
 741        }
 742
 743        if (sk->sk_mark != ssk->sk_mark) {
 744                ssk->sk_mark = sk->sk_mark;
 745                sk_dst_reset(ssk);
 746        }
 747
 748        sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
 749
 750        if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
 751                tcp_set_congestion_control(ssk, msk->ca_name, false, true);
 752}
 753
 754static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
 755{
 756        bool slow = lock_sock_fast(ssk);
 757
 758        sync_socket_options(msk, ssk);
 759
 760        unlock_sock_fast(ssk, slow);
 761}
 762
 763void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
 764{
 765        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 766
 767        msk_owned_by_me(msk);
 768
 769        if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
 770                __mptcp_sockopt_sync(msk, ssk);
 771
 772                subflow->setsockopt_seq = msk->setsockopt_seq;
 773        }
 774}
 775
 776void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
 777{
 778        struct mptcp_subflow_context *subflow;
 779        struct sock *sk = (struct sock *)msk;
 780        u32 seq;
 781
 782        seq = sockopt_seq_reset(sk);
 783
 784        mptcp_for_each_subflow(msk, subflow) {
 785                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 786                u32 sseq = READ_ONCE(subflow->setsockopt_seq);
 787
 788                if (sseq != msk->setsockopt_seq) {
 789                        __mptcp_sockopt_sync(msk, ssk);
 790                        WRITE_ONCE(subflow->setsockopt_seq, seq);
 791                } else if (sseq != seq) {
 792                        WRITE_ONCE(subflow->setsockopt_seq, seq);
 793                }
 794
 795                cond_resched();
 796        }
 797
 798        msk->setsockopt_seq = seq;
 799}
 800