linux/net/socket.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * NET          An implementation of the SOCKET network access protocol.
   4 *
   5 * Version:     @(#)socket.c    1.1.93  18/02/95
   6 *
   7 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   8 *              Ross Biro
   9 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  10 *
  11 * Fixes:
  12 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  13 *                                      shutdown()
  14 *              Alan Cox        :       verify_area() fixes
  15 *              Alan Cox        :       Removed DDI
  16 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  17 *              Alan Cox        :       Moved a load of checks to the very
  18 *                                      top level.
  19 *              Alan Cox        :       Move address structures to/from user
  20 *                                      mode above the protocol layers.
  21 *              Rob Janssen     :       Allow 0 length sends.
  22 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  23 *                                      tty drivers).
  24 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  25 *              Jeff Uphoff     :       Made max number of sockets command-line
  26 *                                      configurable.
  27 *              Matti Aarnio    :       Made the number of sockets dynamic,
  28 *                                      to be allocated when needed, and mr.
  29 *                                      Uphoff's max is used as max to be
  30 *                                      allowed to allocate.
  31 *              Linus           :       Argh. removed all the socket allocation
  32 *                                      altogether: it's in the inode now.
  33 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  34 *                                      for NetROM and future kernel nfsd type
  35 *                                      stuff.
  36 *              Alan Cox        :       sendmsg/recvmsg basics.
  37 *              Tom Dyas        :       Export net symbols.
  38 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  39 *              Alan Cox        :       Added thread locking to sys_* calls
  40 *                                      for sockets. May have errors at the
  41 *                                      moment.
  42 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  43 *              Andi Kleen      :       Some small cleanups, optimizations,
  44 *                                      and fixed a copy_from_user() bug.
  45 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  46 *              Tigran Aivazian :       Made listen(2) backlog sanity checks
  47 *                                      protocol-independent
  48 *
  49 *      This module is effectively the top level interface to the BSD socket
  50 *      paradigm.
  51 *
  52 *      Based upon Swansea University Computer Society NET3.039
  53 */
  54
  55#include <linux/bpf-cgroup.h>
  56#include <linux/ethtool.h>
  57#include <linux/mm.h>
  58#include <linux/socket.h>
  59#include <linux/file.h>
  60#include <linux/splice.h>
  61#include <linux/net.h>
  62#include <linux/interrupt.h>
  63#include <linux/thread_info.h>
  64#include <linux/rcupdate.h>
  65#include <linux/netdevice.h>
  66#include <linux/proc_fs.h>
  67#include <linux/seq_file.h>
  68#include <linux/mutex.h>
  69#include <linux/if_bridge.h>
  70#include <linux/if_vlan.h>
  71#include <linux/ptp_classify.h>
  72#include <linux/init.h>
  73#include <linux/poll.h>
  74#include <linux/cache.h>
  75#include <linux/module.h>
  76#include <linux/highmem.h>
  77#include <linux/mount.h>
  78#include <linux/pseudo_fs.h>
  79#include <linux/security.h>
  80#include <linux/syscalls.h>
  81#include <linux/compat.h>
  82#include <linux/kmod.h>
  83#include <linux/audit.h>
  84#include <linux/wireless.h>
  85#include <linux/nsproxy.h>
  86#include <linux/magic.h>
  87#include <linux/slab.h>
  88#include <linux/xattr.h>
  89#include <linux/nospec.h>
  90#include <linux/indirect_call_wrapper.h>
  91#include <linux/io_uring.h>
  92
  93#include <linux/uaccess.h>
  94#include <asm/unistd.h>
  95
  96#include <net/compat.h>
  97#include <net/wext.h>
  98#include <net/cls_cgroup.h>
  99
 100#include <net/sock.h>
 101#include <linux/netfilter.h>
 102
 103#include <linux/if_tun.h>
 104#include <linux/ipv6_route.h>
 105#include <linux/route.h>
 106#include <linux/termios.h>
 107#include <linux/sockios.h>
 108#include <net/busy_poll.h>
 109#include <linux/errqueue.h>
 110#include <linux/ptp_clock_kernel.h>
 111#include <trace/events/sock.h>
 112
 113#ifdef CONFIG_NET_RX_BUSY_POLL
 114unsigned int sysctl_net_busy_read __read_mostly;
 115unsigned int sysctl_net_busy_poll __read_mostly;
 116#endif
 117
 118static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
 119static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 121
 122static int sock_close(struct inode *inode, struct file *file);
 123static __poll_t sock_poll(struct file *file,
 124                              struct poll_table_struct *wait);
 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 126#ifdef CONFIG_COMPAT
 127static long compat_sock_ioctl(struct file *file,
 128                              unsigned int cmd, unsigned long arg);
 129#endif
 130static int sock_fasync(int fd, struct file *filp, int on);
 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 132                                struct pipe_inode_info *pipe, size_t len,
 133                                unsigned int flags);
 134static void sock_splice_eof(struct file *file);
 135
 136#ifdef CONFIG_PROC_FS
 137static void sock_show_fdinfo(struct seq_file *m, struct file *f)
 138{
 139        struct socket *sock = f->private_data;
 140        const struct proto_ops *ops = READ_ONCE(sock->ops);
 141
 142        if (ops->show_fdinfo)
 143                ops->show_fdinfo(m, sock);
 144}
 145#else
 146#define sock_show_fdinfo NULL
 147#endif
 148
 149/*
 150 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 151 *      in the operation structures but are done directly via the socketcall() multiplexor.
 152 */
 153
 154static const struct file_operations socket_file_ops = {
 155        .owner =        THIS_MODULE,
 156        .llseek =       no_llseek,
 157        .read_iter =    sock_read_iter,
 158        .write_iter =   sock_write_iter,
 159        .poll =         sock_poll,
 160        .unlocked_ioctl = sock_ioctl,
 161#ifdef CONFIG_COMPAT
 162        .compat_ioctl = compat_sock_ioctl,
 163#endif
 164        .uring_cmd =    io_uring_cmd_sock,
 165        .mmap =         sock_mmap,
 166        .release =      sock_close,
 167        .fasync =       sock_fasync,
 168        .splice_write = splice_to_socket,
 169        .splice_read =  sock_splice_read,
 170        .splice_eof =   sock_splice_eof,
 171        .show_fdinfo =  sock_show_fdinfo,
 172};
 173
 174static const char * const pf_family_names[] = {
 175        [PF_UNSPEC]     = "PF_UNSPEC",
 176        [PF_UNIX]       = "PF_UNIX/PF_LOCAL",
 177        [PF_INET]       = "PF_INET",
 178        [PF_AX25]       = "PF_AX25",
 179        [PF_IPX]        = "PF_IPX",
 180        [PF_APPLETALK]  = "PF_APPLETALK",
 181        [PF_NETROM]     = "PF_NETROM",
 182        [PF_BRIDGE]     = "PF_BRIDGE",
 183        [PF_ATMPVC]     = "PF_ATMPVC",
 184        [PF_X25]        = "PF_X25",
 185        [PF_INET6]      = "PF_INET6",
 186        [PF_ROSE]       = "PF_ROSE",
 187        [PF_DECnet]     = "PF_DECnet",
 188        [PF_NETBEUI]    = "PF_NETBEUI",
 189        [PF_SECURITY]   = "PF_SECURITY",
 190        [PF_KEY]        = "PF_KEY",
 191        [PF_NETLINK]    = "PF_NETLINK/PF_ROUTE",
 192        [PF_PACKET]     = "PF_PACKET",
 193        [PF_ASH]        = "PF_ASH",
 194        [PF_ECONET]     = "PF_ECONET",
 195        [PF_ATMSVC]     = "PF_ATMSVC",
 196        [PF_RDS]        = "PF_RDS",
 197        [PF_SNA]        = "PF_SNA",
 198        [PF_IRDA]       = "PF_IRDA",
 199        [PF_PPPOX]      = "PF_PPPOX",
 200        [PF_WANPIPE]    = "PF_WANPIPE",
 201        [PF_LLC]        = "PF_LLC",
 202        [PF_IB]         = "PF_IB",
 203        [PF_MPLS]       = "PF_MPLS",
 204        [PF_CAN]        = "PF_CAN",
 205        [PF_TIPC]       = "PF_TIPC",
 206        [PF_BLUETOOTH]  = "PF_BLUETOOTH",
 207        [PF_IUCV]       = "PF_IUCV",
 208        [PF_RXRPC]      = "PF_RXRPC",
 209        [PF_ISDN]       = "PF_ISDN",
 210        [PF_PHONET]     = "PF_PHONET",
 211        [PF_IEEE802154] = "PF_IEEE802154",
 212        [PF_CAIF]       = "PF_CAIF",
 213        [PF_ALG]        = "PF_ALG",
 214        [PF_NFC]        = "PF_NFC",
 215        [PF_VSOCK]      = "PF_VSOCK",
 216        [PF_KCM]        = "PF_KCM",
 217        [PF_QIPCRTR]    = "PF_QIPCRTR",
 218        [PF_SMC]        = "PF_SMC",
 219        [PF_XDP]        = "PF_XDP",
 220        [PF_MCTP]       = "PF_MCTP",
 221};
 222
 223/*
 224 *      The protocol list. Each protocol is registered in here.
 225 */
 226
 227static DEFINE_SPINLOCK(net_family_lock);
 228static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
 229
 230/*
 231 * Support routines.
 232 * Move socket addresses back and forth across the kernel/user
 233 * divide and look after the messy bits.
 234 */
 235
 236/**
 237 *      move_addr_to_kernel     -       copy a socket address into kernel space
 238 *      @uaddr: Address in user space
 239 *      @kaddr: Address in kernel space
 240 *      @ulen: Length in user space
 241 *
 242 *      The address is copied into kernel space. If the provided address is
 243 *      too long an error code of -EINVAL is returned. If the copy gives
 244 *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 245 */
 246
 247int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
 248{
 249        if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
 250                return -EINVAL;
 251        if (ulen == 0)
 252                return 0;
 253        if (copy_from_user(kaddr, uaddr, ulen))
 254                return -EFAULT;
 255        return audit_sockaddr(ulen, kaddr);
 256}
 257
 258/**
 259 *      move_addr_to_user       -       copy an address to user space
 260 *      @kaddr: kernel space address
 261 *      @klen: length of address in kernel
 262 *      @uaddr: user space address
 263 *      @ulen: pointer to user length field
 264 *
 265 *      The value pointed to by ulen on entry is the buffer length available.
 266 *      This is overwritten with the buffer space used. -EINVAL is returned
 267 *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 268 *      is returned if either the buffer or the length field are not
 269 *      accessible.
 270 *      After copying the data up to the limit the user specifies, the true
 271 *      length of the data is written over the length limit the user
 272 *      specified. Zero is returned for a success.
 273 */
 274
 275static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
 276                             void __user *uaddr, int __user *ulen)
 277{
 278        int err;
 279        int len;
 280
 281        BUG_ON(klen > sizeof(struct sockaddr_storage));
 282        err = get_user(len, ulen);
 283        if (err)
 284                return err;
 285        if (len > klen)
 286                len = klen;
 287        if (len < 0)
 288                return -EINVAL;
 289        if (len) {
 290                if (audit_sockaddr(klen, kaddr))
 291                        return -ENOMEM;
 292                if (copy_to_user(uaddr, kaddr, len))
 293                        return -EFAULT;
 294        }
 295        /*
 296         *      "fromlen shall refer to the value before truncation.."
 297         *                      1003.1g
 298         */
 299        return __put_user(klen, ulen);
 300}
 301
 302static struct kmem_cache *sock_inode_cachep __ro_after_init;
 303
 304static struct inode *sock_alloc_inode(struct super_block *sb)
 305{
 306        struct socket_alloc *ei;
 307
 308        ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
 309        if (!ei)
 310                return NULL;
 311        init_waitqueue_head(&ei->socket.wq.wait);
 312        ei->socket.wq.fasync_list = NULL;
 313        ei->socket.wq.flags = 0;
 314
 315        ei->socket.state = SS_UNCONNECTED;
 316        ei->socket.flags = 0;
 317        ei->socket.ops = NULL;
 318        ei->socket.sk = NULL;
 319        ei->socket.file = NULL;
 320
 321        return &ei->vfs_inode;
 322}
 323
 324static void sock_free_inode(struct inode *inode)
 325{
 326        struct socket_alloc *ei;
 327
 328        ei = container_of(inode, struct socket_alloc, vfs_inode);
 329        kmem_cache_free(sock_inode_cachep, ei);
 330}
 331
 332static void init_once(void *foo)
 333{
 334        struct socket_alloc *ei = (struct socket_alloc *)foo;
 335
 336        inode_init_once(&ei->vfs_inode);
 337}
 338
 339static void init_inodecache(void)
 340{
 341        sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 342                                              sizeof(struct socket_alloc),
 343                                              0,
 344                                              (SLAB_HWCACHE_ALIGN |
 345                                               SLAB_RECLAIM_ACCOUNT |
 346                                               SLAB_MEM_SPREAD | SLAB_ACCOUNT),
 347                                              init_once);
 348        BUG_ON(sock_inode_cachep == NULL);
 349}
 350
 351static const struct super_operations sockfs_ops = {
 352        .alloc_inode    = sock_alloc_inode,
 353        .free_inode     = sock_free_inode,
 354        .statfs         = simple_statfs,
 355};
 356
 357/*
 358 * sockfs_dname() is called from d_path().
 359 */
 360static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 361{
 362        return dynamic_dname(buffer, buflen, "socket:[%lu]",
 363                                d_inode(dentry)->i_ino);
 364}
 365
 366static const struct dentry_operations sockfs_dentry_operations = {
 367        .d_dname  = sockfs_dname,
 368};
 369
 370static int sockfs_xattr_get(const struct xattr_handler *handler,
 371                            struct dentry *dentry, struct inode *inode,
 372                            const char *suffix, void *value, size_t size)
 373{
 374        if (value) {
 375                if (dentry->d_name.len + 1 > size)
 376                        return -ERANGE;
 377                memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
 378        }
 379        return dentry->d_name.len + 1;
 380}
 381
 382#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
 383#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
 384#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
 385
 386static const struct xattr_handler sockfs_xattr_handler = {
 387        .name = XATTR_NAME_SOCKPROTONAME,
 388        .get = sockfs_xattr_get,
 389};
 390
 391static int sockfs_security_xattr_set(const struct xattr_handler *handler,
 392                                     struct mnt_idmap *idmap,
 393                                     struct dentry *dentry, struct inode *inode,
 394                                     const char *suffix, const void *value,
 395                                     size_t size, int flags)
 396{
 397        /* Handled by LSM. */
 398        return -EAGAIN;
 399}
 400
 401static const struct xattr_handler sockfs_security_xattr_handler = {
 402        .prefix = XATTR_SECURITY_PREFIX,
 403        .set = sockfs_security_xattr_set,
 404};
 405
 406static const struct xattr_handler * const sockfs_xattr_handlers[] = {
 407        &sockfs_xattr_handler,
 408        &sockfs_security_xattr_handler,
 409        NULL
 410};
 411
 412static int sockfs_init_fs_context(struct fs_context *fc)
 413{
 414        struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
 415        if (!ctx)
 416                return -ENOMEM;
 417        ctx->ops = &sockfs_ops;
 418        ctx->dops = &sockfs_dentry_operations;
 419        ctx->xattr = sockfs_xattr_handlers;
 420        return 0;
 421}
 422
 423static struct vfsmount *sock_mnt __read_mostly;
 424
 425static struct file_system_type sock_fs_type = {
 426        .name =         "sockfs",
 427        .init_fs_context = sockfs_init_fs_context,
 428        .kill_sb =      kill_anon_super,
 429};
 430
 431/*
 432 *      Obtains the first available file descriptor and sets it up for use.
 433 *
 434 *      These functions create file structures and maps them to fd space
 435 *      of the current process. On success it returns file descriptor
 436 *      and file struct implicitly stored in sock->file.
 437 *      Note that another thread may close file descriptor before we return
 438 *      from this function. We use the fact that now we do not refer
 439 *      to socket after mapping. If one day we will need it, this
 440 *      function will increment ref. count on file by 1.
 441 *
 442 *      In any case returned fd MAY BE not valid!
 443 *      This race condition is unavoidable
 444 *      with shared fd spaces, we cannot solve it inside kernel,
 445 *      but we take care of internal coherence yet.
 446 */
 447
 448/**
 449 *      sock_alloc_file - Bind a &socket to a &file
 450 *      @sock: socket
 451 *      @flags: file status flags
 452 *      @dname: protocol name
 453 *
 454 *      Returns the &file bound with @sock, implicitly storing it
 455 *      in sock->file. If dname is %NULL, sets to "".
 456 *
 457 *      On failure @sock is released, and an ERR pointer is returned.
 458 *
 459 *      This function uses GFP_KERNEL internally.
 460 */
 461
 462struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 463{
 464        struct file *file;
 465
 466        if (!dname)
 467                dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
 468
 469        file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
 470                                O_RDWR | (flags & O_NONBLOCK),
 471                                &socket_file_ops);
 472        if (IS_ERR(file)) {
 473                sock_release(sock);
 474                return file;
 475        }
 476
 477        file->f_mode |= FMODE_NOWAIT;
 478        sock->file = file;
 479        file->private_data = sock;
 480        stream_open(SOCK_INODE(sock), file);
 481        return file;
 482}
 483EXPORT_SYMBOL(sock_alloc_file);
 484
 485static int sock_map_fd(struct socket *sock, int flags)
 486{
 487        struct file *newfile;
 488        int fd = get_unused_fd_flags(flags);
 489        if (unlikely(fd < 0)) {
 490                sock_release(sock);
 491                return fd;
 492        }
 493
 494        newfile = sock_alloc_file(sock, flags, NULL);
 495        if (!IS_ERR(newfile)) {
 496                fd_install(fd, newfile);
 497                return fd;
 498        }
 499
 500        put_unused_fd(fd);
 501        return PTR_ERR(newfile);
 502}
 503
 504/**
 505 *      sock_from_file - Return the &socket bounded to @file.
 506 *      @file: file
 507 *
 508 *      On failure returns %NULL.
 509 */
 510
 511struct socket *sock_from_file(struct file *file)
 512{
 513        if (file->f_op == &socket_file_ops)
 514                return file->private_data;      /* set in sock_alloc_file */
 515
 516        return NULL;
 517}
 518EXPORT_SYMBOL(sock_from_file);
 519
 520/**
 521 *      sockfd_lookup - Go from a file number to its socket slot
 522 *      @fd: file handle
 523 *      @err: pointer to an error code return
 524 *
 525 *      The file handle passed in is locked and the socket it is bound
 526 *      to is returned. If an error occurs the err pointer is overwritten
 527 *      with a negative errno code and NULL is returned. The function checks
 528 *      for both invalid handles and passing a handle which is not a socket.
 529 *
 530 *      On a success the socket object pointer is returned.
 531 */
 532
 533struct socket *sockfd_lookup(int fd, int *err)
 534{
 535        struct file *file;
 536        struct socket *sock;
 537
 538        file = fget(fd);
 539        if (!file) {
 540                *err = -EBADF;
 541                return NULL;
 542        }
 543
 544        sock = sock_from_file(file);
 545        if (!sock) {
 546                *err = -ENOTSOCK;
 547                fput(file);
 548        }
 549        return sock;
 550}
 551EXPORT_SYMBOL(sockfd_lookup);
 552
 553static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 554{
 555        struct fd f = fdget(fd);
 556        struct socket *sock;
 557
 558        *err = -EBADF;
 559        if (f.file) {
 560                sock = sock_from_file(f.file);
 561                if (likely(sock)) {
 562                        *fput_needed = f.flags & FDPUT_FPUT;
 563                        return sock;
 564                }
 565                *err = -ENOTSOCK;
 566                fdput(f);
 567        }
 568        return NULL;
 569}
 570
 571static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
 572                                size_t size)
 573{
 574        ssize_t len;
 575        ssize_t used = 0;
 576
 577        len = security_inode_listsecurity(d_inode(dentry), buffer, size);
 578        if (len < 0)
 579                return len;
 580        used += len;
 581        if (buffer) {
 582                if (size < used)
 583                        return -ERANGE;
 584                buffer += len;
 585        }
 586
 587        len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
 588        used += len;
 589        if (buffer) {
 590                if (size < used)
 591                        return -ERANGE;
 592                memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
 593                buffer += len;
 594        }
 595
 596        return used;
 597}
 598
 599static int sockfs_setattr(struct mnt_idmap *idmap,
 600                          struct dentry *dentry, struct iattr *iattr)
 601{
 602        int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
 603
 604        if (!err && (iattr->ia_valid & ATTR_UID)) {
 605                struct socket *sock = SOCKET_I(d_inode(dentry));
 606
 607                if (sock->sk)
 608                        sock->sk->sk_uid = iattr->ia_uid;
 609                else
 610                        err = -ENOENT;
 611        }
 612
 613        return err;
 614}
 615
 616static const struct inode_operations sockfs_inode_ops = {
 617        .listxattr = sockfs_listxattr,
 618        .setattr = sockfs_setattr,
 619};
 620
 621/**
 622 *      sock_alloc - allocate a socket
 623 *
 624 *      Allocate a new inode and socket object. The two are bound together
 625 *      and initialised. The socket is then returned. If we are out of inodes
 626 *      NULL is returned. This functions uses GFP_KERNEL internally.
 627 */
 628
 629struct socket *sock_alloc(void)
 630{
 631        struct inode *inode;
 632        struct socket *sock;
 633
 634        inode = new_inode_pseudo(sock_mnt->mnt_sb);
 635        if (!inode)
 636                return NULL;
 637
 638        sock = SOCKET_I(inode);
 639
 640        inode->i_ino = get_next_ino();
 641        inode->i_mode = S_IFSOCK | S_IRWXUGO;
 642        inode->i_uid = current_fsuid();
 643        inode->i_gid = current_fsgid();
 644        inode->i_op = &sockfs_inode_ops;
 645
 646        return sock;
 647}
 648EXPORT_SYMBOL(sock_alloc);
 649
 650static void __sock_release(struct socket *sock, struct inode *inode)
 651{
 652        const struct proto_ops *ops = READ_ONCE(sock->ops);
 653
 654        if (ops) {
 655                struct module *owner = ops->owner;
 656
 657                if (inode)
 658                        inode_lock(inode);
 659                ops->release(sock);
 660                sock->sk = NULL;
 661                if (inode)
 662                        inode_unlock(inode);
 663                sock->ops = NULL;
 664                module_put(owner);
 665        }
 666
 667        if (sock->wq.fasync_list)
 668                pr_err("%s: fasync list not empty!\n", __func__);
 669
 670        if (!sock->file) {
 671                iput(SOCK_INODE(sock));
 672                return;
 673        }
 674        sock->file = NULL;
 675}
 676
 677/**
 678 *      sock_release - close a socket
 679 *      @sock: socket to close
 680 *
 681 *      The socket is released from the protocol stack if it has a release
 682 *      callback, and the inode is then released if the socket is bound to
 683 *      an inode not a file.
 684 */
 685void sock_release(struct socket *sock)
 686{
 687        __sock_release(sock, NULL);
 688}
 689EXPORT_SYMBOL(sock_release);
 690
 691void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
 692{
 693        u8 flags = *tx_flags;
 694
 695        if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
 696                flags |= SKBTX_HW_TSTAMP;
 697
 698                /* PTP hardware clocks can provide a free running cycle counter
 699                 * as a time base for virtual clocks. Tell driver to use the
 700                 * free running cycle counter for timestamp if socket is bound
 701                 * to virtual clock.
 702                 */
 703                if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
 704                        flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
 705        }
 706
 707        if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
 708                flags |= SKBTX_SW_TSTAMP;
 709
 710        if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
 711                flags |= SKBTX_SCHED_TSTAMP;
 712
 713        *tx_flags = flags;
 714}
 715EXPORT_SYMBOL(__sock_tx_timestamp);
 716
 717INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
 718                                           size_t));
 719INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
 720                                            size_t));
 721
 722static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
 723                                                 int flags)
 724{
 725        trace_sock_send_length(sk, ret, 0);
 726}
 727
 728static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
 729{
 730        int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
 731                                     inet_sendmsg, sock, msg,
 732                                     msg_data_left(msg));
 733        BUG_ON(ret == -EIOCBQUEUED);
 734
 735        if (trace_sock_send_length_enabled())
 736                call_trace_sock_send_length(sock->sk, ret, 0);
 737        return ret;
 738}
 739
 740static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
 741{
 742        int err = security_socket_sendmsg(sock, msg,
 743                                          msg_data_left(msg));
 744
 745        return err ?: sock_sendmsg_nosec(sock, msg);
 746}
 747
 748/**
 749 *      sock_sendmsg - send a message through @sock
 750 *      @sock: socket
 751 *      @msg: message to send
 752 *
 753 *      Sends @msg through @sock, passing through LSM.
 754 *      Returns the number of bytes sent, or an error code.
 755 */
 756int sock_sendmsg(struct socket *sock, struct msghdr *msg)
 757{
 758        struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
 759        struct sockaddr_storage address;
 760        int save_len = msg->msg_namelen;
 761        int ret;
 762
 763        if (msg->msg_name) {
 764                memcpy(&address, msg->msg_name, msg->msg_namelen);
 765                msg->msg_name = &address;
 766        }
 767
 768        ret = __sock_sendmsg(sock, msg);
 769        msg->msg_name = save_addr;
 770        msg->msg_namelen = save_len;
 771
 772        return ret;
 773}
 774EXPORT_SYMBOL(sock_sendmsg);
 775
 776/**
 777 *      kernel_sendmsg - send a message through @sock (kernel-space)
 778 *      @sock: socket
 779 *      @msg: message header
 780 *      @vec: kernel vec
 781 *      @num: vec array length
 782 *      @size: total message data size
 783 *
 784 *      Builds the message data with @vec and sends it through @sock.
 785 *      Returns the number of bytes sent, or an error code.
 786 */
 787
 788int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 789                   struct kvec *vec, size_t num, size_t size)
 790{
 791        iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
 792        return sock_sendmsg(sock, msg);
 793}
 794EXPORT_SYMBOL(kernel_sendmsg);
 795
 796/**
 797 *      kernel_sendmsg_locked - send a message through @sock (kernel-space)
 798 *      @sk: sock
 799 *      @msg: message header
 800 *      @vec: output s/g array
 801 *      @num: output s/g array length
 802 *      @size: total message data size
 803 *
 804 *      Builds the message data with @vec and sends it through @sock.
 805 *      Returns the number of bytes sent, or an error code.
 806 *      Caller must hold @sk.
 807 */
 808
 809int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
 810                          struct kvec *vec, size_t num, size_t size)
 811{
 812        struct socket *sock = sk->sk_socket;
 813        const struct proto_ops *ops = READ_ONCE(sock->ops);
 814
 815        if (!ops->sendmsg_locked)
 816                return sock_no_sendmsg_locked(sk, msg, size);
 817
 818        iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
 819
 820        return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
 821}
 822EXPORT_SYMBOL(kernel_sendmsg_locked);
 823
 824static bool skb_is_err_queue(const struct sk_buff *skb)
 825{
 826        /* pkt_type of skbs enqueued on the error queue are set to
 827         * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
 828         * in recvmsg, since skbs received on a local socket will never
 829         * have a pkt_type of PACKET_OUTGOING.
 830         */
 831        return skb->pkt_type == PACKET_OUTGOING;
 832}
 833
 834/* On transmit, software and hardware timestamps are returned independently.
 835 * As the two skb clones share the hardware timestamp, which may be updated
 836 * before the software timestamp is received, a hardware TX timestamp may be
 837 * returned only if there is no software TX timestamp. Ignore false software
 838 * timestamps, which may be made in the __sock_recv_timestamp() call when the
 839 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
 840 * hardware timestamp.
 841 */
 842static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
 843{
 844        return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
 845}
 846
 847static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
 848{
 849        bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
 850        struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 851        struct net_device *orig_dev;
 852        ktime_t hwtstamp;
 853
 854        rcu_read_lock();
 855        orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
 856        if (orig_dev) {
 857                *if_index = orig_dev->ifindex;
 858                hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
 859        } else {
 860                hwtstamp = shhwtstamps->hwtstamp;
 861        }
 862        rcu_read_unlock();
 863
 864        return hwtstamp;
 865}
 866
 867static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
 868                           int if_index)
 869{
 870        struct scm_ts_pktinfo ts_pktinfo;
 871        struct net_device *orig_dev;
 872
 873        if (!skb_mac_header_was_set(skb))
 874                return;
 875
 876        memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
 877
 878        if (!if_index) {
 879                rcu_read_lock();
 880                orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
 881                if (orig_dev)
 882                        if_index = orig_dev->ifindex;
 883                rcu_read_unlock();
 884        }
 885        ts_pktinfo.if_index = if_index;
 886
 887        ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
 888        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
 889                 sizeof(ts_pktinfo), &ts_pktinfo);
 890}
 891
 892/*
 893 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
 894 */
 895void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 896        struct sk_buff *skb)
 897{
 898        int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 899        int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 900        struct scm_timestamping_internal tss;
 901        int empty = 1, false_tstamp = 0;
 902        struct skb_shared_hwtstamps *shhwtstamps =
 903                skb_hwtstamps(skb);
 904        int if_index;
 905        ktime_t hwtstamp;
 906        u32 tsflags;
 907
 908        /* Race occurred between timestamp enabling and packet
 909           receiving.  Fill in the current time for now. */
 910        if (need_software_tstamp && skb->tstamp == 0) {
 911                __net_timestamp(skb);
 912                false_tstamp = 1;
 913        }
 914
 915        if (need_software_tstamp) {
 916                if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
 917                        if (new_tstamp) {
 918                                struct __kernel_sock_timeval tv;
 919
 920                                skb_get_new_timestamp(skb, &tv);
 921                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
 922                                         sizeof(tv), &tv);
 923                        } else {
 924                                struct __kernel_old_timeval tv;
 925
 926                                skb_get_timestamp(skb, &tv);
 927                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
 928                                         sizeof(tv), &tv);
 929                        }
 930                } else {
 931                        if (new_tstamp) {
 932                                struct __kernel_timespec ts;
 933
 934                                skb_get_new_timestampns(skb, &ts);
 935                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
 936                                         sizeof(ts), &ts);
 937                        } else {
 938                                struct __kernel_old_timespec ts;
 939
 940                                skb_get_timestampns(skb, &ts);
 941                                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
 942                                         sizeof(ts), &ts);
 943                        }
 944                }
 945        }
 946
 947        memset(&tss, 0, sizeof(tss));
 948        tsflags = READ_ONCE(sk->sk_tsflags);
 949        if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
 950            ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
 951                empty = 0;
 952        if (shhwtstamps &&
 953            (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 954            !skb_is_swtx_tstamp(skb, false_tstamp)) {
 955                if_index = 0;
 956                if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
 957                        hwtstamp = get_timestamp(sk, skb, &if_index);
 958                else
 959                        hwtstamp = shhwtstamps->hwtstamp;
 960
 961                if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
 962                        hwtstamp = ptp_convert_timestamp(&hwtstamp,
 963                                                         READ_ONCE(sk->sk_bind_phc));
 964
 965                if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
 966                        empty = 0;
 967
 968                        if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
 969                            !skb_is_err_queue(skb))
 970                                put_ts_pktinfo(msg, skb, if_index);
 971                }
 972        }
 973        if (!empty) {
 974                if (sock_flag(sk, SOCK_TSTAMP_NEW))
 975                        put_cmsg_scm_timestamping64(msg, &tss);
 976                else
 977                        put_cmsg_scm_timestamping(msg, &tss);
 978
 979                if (skb_is_err_queue(skb) && skb->len &&
 980                    SKB_EXT_ERR(skb)->opt_stats)
 981                        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
 982                                 skb->len, skb->data);
 983        }
 984}
 985EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 986
 987#ifdef CONFIG_WIRELESS
 988void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
 989        struct sk_buff *skb)
 990{
 991        int ack;
 992
 993        if (!sock_flag(sk, SOCK_WIFI_STATUS))
 994                return;
 995        if (!skb->wifi_acked_valid)
 996                return;
 997
 998        ack = skb->wifi_acked;
 999
1000        put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
1001}
1002EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
1003#endif
1004
1005static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
1006                                   struct sk_buff *skb)
1007{
1008        if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
1009                put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
1010                        sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
1011}
1012
1013static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
1014                           struct sk_buff *skb)
1015{
1016        if (sock_flag(sk, SOCK_RCVMARK) && skb) {
1017                /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
1018                __u32 mark = skb->mark;
1019
1020                put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
1021        }
1022}
1023
1024void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
1025                       struct sk_buff *skb)
1026{
1027        sock_recv_timestamp(msg, sk, skb);
1028        sock_recv_drops(msg, sk, skb);
1029        sock_recv_mark(msg, sk, skb);
1030}
1031EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
1032
1033INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
1034                                           size_t, int));
1035INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
1036                                            size_t, int));
1037
1038static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
1039{
1040        trace_sock_recv_length(sk, ret, flags);
1041}
1042
1043static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
1044                                     int flags)
1045{
1046        int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
1047                                     inet6_recvmsg,
1048                                     inet_recvmsg, sock, msg,
1049                                     msg_data_left(msg), flags);
1050        if (trace_sock_recv_length_enabled())
1051                call_trace_sock_recv_length(sock->sk, ret, flags);
1052        return ret;
1053}
1054
1055/**
1056 *      sock_recvmsg - receive a message from @sock
1057 *      @sock: socket
1058 *      @msg: message to receive
1059 *      @flags: message flags
1060 *
1061 *      Receives @msg from @sock, passing through LSM. Returns the total number
1062 *      of bytes received, or an error.
1063 */
1064int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
1065{
1066        int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
1067
1068        return err ?: sock_recvmsg_nosec(sock, msg, flags);
1069}
1070EXPORT_SYMBOL(sock_recvmsg);
1071
1072/**
1073 *      kernel_recvmsg - Receive a message from a socket (kernel space)
1074 *      @sock: The socket to receive the message from
1075 *      @msg: Received message
1076 *      @vec: Input s/g array for message data
1077 *      @num: Size of input s/g array
1078 *      @size: Number of bytes to read
1079 *      @flags: Message flags (MSG_DONTWAIT, etc...)
1080 *
1081 *      On return the msg structure contains the scatter/gather array passed in the
1082 *      vec argument. The array is modified so that it consists of the unfilled
1083 *      portion of the original array.
1084 *
1085 *      The returned value is the total number of bytes received, or an error.
1086 */
1087
1088int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1089                   struct kvec *vec, size_t num, size_t size, int flags)
1090{
1091        msg->msg_control_is_user = false;
1092        iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1093        return sock_recvmsg(sock, msg, flags);
1094}
1095EXPORT_SYMBOL(kernel_recvmsg);
1096
1097static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
1098                                struct pipe_inode_info *pipe, size_t len,
1099                                unsigned int flags)
1100{
1101        struct socket *sock = file->private_data;
1102        const struct proto_ops *ops;
1103
1104        ops = READ_ONCE(sock->ops);
1105        if (unlikely(!ops->splice_read))
1106                return copy_splice_read(file, ppos, pipe, len, flags);
1107
1108        return ops->splice_read(sock, ppos, pipe, len, flags);
1109}
1110
1111static void sock_splice_eof(struct file *file)
1112{
1113        struct socket *sock = file->private_data;
1114        const struct proto_ops *ops;
1115
1116        ops = READ_ONCE(sock->ops);
1117        if (ops->splice_eof)
1118                ops->splice_eof(sock);
1119}
1120
1121static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
1122{
1123        struct file *file = iocb->ki_filp;
1124        struct socket *sock = file->private_data;
1125        struct msghdr msg = {.msg_iter = *to,
1126                             .msg_iocb = iocb};
1127        ssize_t res;
1128
1129        if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
1130                msg.msg_flags = MSG_DONTWAIT;
1131
1132        if (iocb->ki_pos != 0)
1133                return -ESPIPE;
1134
1135        if (!iov_iter_count(to))        /* Match SYS5 behaviour */
1136                return 0;
1137
1138        res = sock_recvmsg(sock, &msg, msg.msg_flags);
1139        *to = msg.msg_iter;
1140        return res;
1141}
1142
1143static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
1144{
1145        struct file *file = iocb->ki_filp;
1146        struct socket *sock = file->private_data;
1147        struct msghdr msg = {.msg_iter = *from,
1148                             .msg_iocb = iocb};
1149        ssize_t res;
1150
1151        if (iocb->ki_pos != 0)
1152                return -ESPIPE;
1153
1154        if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
1155                msg.msg_flags = MSG_DONTWAIT;
1156
1157        if (sock->type == SOCK_SEQPACKET)
1158                msg.msg_flags |= MSG_EOR;
1159
1160        res = __sock_sendmsg(sock, &msg);
1161        *from = msg.msg_iter;
1162        return res;
1163}
1164
1165/*
1166 * Atomic setting of ioctl hooks to avoid race
1167 * with module unload.
1168 */
1169
1170static DEFINE_MUTEX(br_ioctl_mutex);
1171static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1172                            unsigned int cmd, struct ifreq *ifr,
1173                            void __user *uarg);
1174
1175void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1176                             unsigned int cmd, struct ifreq *ifr,
1177                             void __user *uarg))
1178{
1179        mutex_lock(&br_ioctl_mutex);
1180        br_ioctl_hook = hook;
1181        mutex_unlock(&br_ioctl_mutex);
1182}
1183EXPORT_SYMBOL(brioctl_set);
1184
1185int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1186                  struct ifreq *ifr, void __user *uarg)
1187{
1188        int err = -ENOPKG;
1189
1190        if (!br_ioctl_hook)
1191                request_module("bridge");
1192
1193        mutex_lock(&br_ioctl_mutex);
1194        if (br_ioctl_hook)
1195                err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1196        mutex_unlock(&br_ioctl_mutex);
1197
1198        return err;
1199}
1200
1201static DEFINE_MUTEX(vlan_ioctl_mutex);
1202static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1203
1204void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1205{
1206        mutex_lock(&vlan_ioctl_mutex);
1207        vlan_ioctl_hook = hook;
1208        mutex_unlock(&vlan_ioctl_mutex);
1209}
1210EXPORT_SYMBOL(vlan_ioctl_set);
1211
1212static long sock_do_ioctl(struct net *net, struct socket *sock,
1213                          unsigned int cmd, unsigned long arg)
1214{
1215        const struct proto_ops *ops = READ_ONCE(sock->ops);
1216        struct ifreq ifr;
1217        bool need_copyout;
1218        int err;
1219        void __user *argp = (void __user *)arg;
1220        void __user *data;
1221
1222        err = ops->ioctl(sock, cmd, arg);
1223
1224        /*
1225         * If this ioctl is unknown try to hand it down
1226         * to the NIC driver.
1227         */
1228        if (err != -ENOIOCTLCMD)
1229                return err;
1230
1231        if (!is_socket_ioctl_cmd(cmd))
1232                return -ENOTTY;
1233
1234        if (get_user_ifreq(&ifr, &data, argp))
1235                return -EFAULT;
1236        err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
1237        if (!err && need_copyout)
1238                if (put_user_ifreq(&ifr, argp))
1239                        return -EFAULT;
1240
1241        return err;
1242}
1243
1244/*
1245 *      With an ioctl, arg may well be a user mode pointer, but we don't know
1246 *      what to do with it - that's up to the protocol still.
1247 */
1248
1249static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1250{
1251        const struct proto_ops  *ops;
1252        struct socket *sock;
1253        struct sock *sk;
1254        void __user *argp = (void __user *)arg;
1255        int pid, err;
1256        struct net *net;
1257
1258        sock = file->private_data;
1259        ops = READ_ONCE(sock->ops);
1260        sk = sock->sk;
1261        net = sock_net(sk);
1262        if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1263                struct ifreq ifr;
1264                void __user *data;
1265                bool need_copyout;
1266                if (get_user_ifreq(&ifr, &data, argp))
1267                        return -EFAULT;
1268                err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
1269                if (!err && need_copyout)
1270                        if (put_user_ifreq(&ifr, argp))
1271                                return -EFAULT;
1272        } else
1273#ifdef CONFIG_WEXT_CORE
1274        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
1275                err = wext_handle_ioctl(net, cmd, argp);
1276        } else
1277#endif
1278                switch (cmd) {
1279                case FIOSETOWN:
1280                case SIOCSPGRP:
1281                        err = -EFAULT;
1282                        if (get_user(pid, (int __user *)argp))
1283                                break;
1284                        err = f_setown(sock->file, pid, 1);
1285                        break;
1286                case FIOGETOWN:
1287                case SIOCGPGRP:
1288                        err = put_user(f_getown(sock->file),
1289                                       (int __user *)argp);
1290                        break;
1291                case SIOCGIFBR:
1292                case SIOCSIFBR:
1293                case SIOCBRADDBR:
1294                case SIOCBRDELBR:
1295                        err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1296                        break;
1297                case SIOCGIFVLAN:
1298                case SIOCSIFVLAN:
1299                        err = -ENOPKG;
1300                        if (!vlan_ioctl_hook)
1301                                request_module("8021q");
1302
1303                        mutex_lock(&vlan_ioctl_mutex);
1304                        if (vlan_ioctl_hook)
1305                                err = vlan_ioctl_hook(net, argp);
1306                        mutex_unlock(&vlan_ioctl_mutex);
1307                        break;
1308                case SIOCGSKNS:
1309                        err = -EPERM;
1310                        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1311                                break;
1312
1313                        err = open_related_ns(&net->ns, get_net_ns);
1314                        break;
1315                case SIOCGSTAMP_OLD:
1316                case SIOCGSTAMPNS_OLD:
1317                        if (!ops->gettstamp) {
1318                                err = -ENOIOCTLCMD;
1319                                break;
1320                        }
1321                        err = ops->gettstamp(sock, argp,
1322                                             cmd == SIOCGSTAMP_OLD,
1323                                             !IS_ENABLED(CONFIG_64BIT));
1324                        break;
1325                case SIOCGSTAMP_NEW:
1326                case SIOCGSTAMPNS_NEW:
1327                        if (!ops->gettstamp) {
1328                                err = -ENOIOCTLCMD;
1329                                break;
1330                        }
1331                        err = ops->gettstamp(sock, argp,
1332                                             cmd == SIOCGSTAMP_NEW,
1333                                             false);
1334                        break;
1335
1336                case SIOCGIFCONF:
1337                        err = dev_ifconf(net, argp);
1338                        break;
1339
1340                default:
1341                        err = sock_do_ioctl(net, sock, cmd, arg);
1342                        break;
1343                }
1344        return err;
1345}
1346
1347/**
1348 *      sock_create_lite - creates a socket
1349 *      @family: protocol family (AF_INET, ...)
1350 *      @type: communication type (SOCK_STREAM, ...)
1351 *      @protocol: protocol (0, ...)
1352 *      @res: new socket
1353 *
1354 *      Creates a new socket and assigns it to @res, passing through LSM.
1355 *      The new socket initialization is not complete, see kernel_accept().
1356 *      Returns 0 or an error. On failure @res is set to %NULL.
1357 *      This function internally uses GFP_KERNEL.
1358 */
1359
1360int sock_create_lite(int family, int type, int protocol, struct socket **res)
1361{
1362        int err;
1363        struct socket *sock = NULL;
1364
1365        err = security_socket_create(family, type, protocol, 1);
1366        if (err)
1367                goto out;
1368
1369        sock = sock_alloc();
1370        if (!sock) {
1371                err = -ENOMEM;
1372                goto out;
1373        }
1374
1375        sock->type = type;
1376        err = security_socket_post_create(sock, family, type, protocol, 1);
1377        if (err)
1378                goto out_release;
1379
1380out:
1381        *res = sock;
1382        return err;
1383out_release:
1384        sock_release(sock);
1385        sock = NULL;
1386        goto out;
1387}
1388EXPORT_SYMBOL(sock_create_lite);
1389
1390/* No kernel lock held - perfect */
1391static __poll_t sock_poll(struct file *file, poll_table *wait)
1392{
1393        struct socket *sock = file->private_data;
1394        const struct proto_ops *ops = READ_ONCE(sock->ops);
1395        __poll_t events = poll_requested_events(wait), flag = 0;
1396
1397        if (!ops->poll)
1398                return 0;
1399
1400        if (sk_can_busy_loop(sock->sk)) {
1401                /* poll once if requested by the syscall */
1402                if (events & POLL_BUSY_LOOP)
1403                        sk_busy_loop(sock->sk, 1);
1404
1405                /* if this socket can poll_ll, tell the system call */
1406                flag = POLL_BUSY_LOOP;
1407        }
1408
1409        return ops->poll(file, sock, wait) | flag;
1410}
1411
1412static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1413{
1414        struct socket *sock = file->private_data;
1415
1416        return READ_ONCE(sock->ops)->mmap(file, sock, vma);
1417}
1418
1419static int sock_close(struct inode *inode, struct file *filp)
1420{
1421        __sock_release(SOCKET_I(inode), inode);
1422        return 0;
1423}
1424
1425/*
1426 *      Update the socket async list
1427 *
1428 *      Fasync_list locking strategy.
1429 *
1430 *      1. fasync_list is modified only under process context socket lock
1431 *         i.e. under semaphore.
1432 *      2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1433 *         or under socket lock
1434 */
1435
1436static int sock_fasync(int fd, struct file *filp, int on)
1437{
1438        struct socket *sock = filp->private_data;
1439        struct sock *sk = sock->sk;
1440        struct socket_wq *wq = &sock->wq;
1441
1442        if (sk == NULL)
1443                return -EINVAL;
1444
1445        lock_sock(sk);
1446        fasync_helper(fd, filp, on, &wq->fasync_list);
1447
1448        if (!wq->fasync_list)
1449                sock_reset_flag(sk, SOCK_FASYNC);
1450        else
1451                sock_set_flag(sk, SOCK_FASYNC);
1452
1453        release_sock(sk);
1454        return 0;
1455}
1456
1457/* This function may be called only under rcu_lock */
1458
1459int sock_wake_async(struct socket_wq *wq, int how, int band)
1460{
1461        if (!wq || !wq->fasync_list)
1462                return -1;
1463
1464        switch (how) {
1465        case SOCK_WAKE_WAITD:
1466                if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1467                        break;
1468                goto call_kill;
1469        case SOCK_WAKE_SPACE:
1470                if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1471                        break;
1472                fallthrough;
1473        case SOCK_WAKE_IO:
1474call_kill:
1475                kill_fasync(&wq->fasync_list, SIGIO, band);
1476                break;
1477        case SOCK_WAKE_URG:
1478                kill_fasync(&wq->fasync_list, SIGURG, band);
1479        }
1480
1481        return 0;
1482}
1483EXPORT_SYMBOL(sock_wake_async);
1484
1485/**
1486 *      __sock_create - creates a socket
1487 *      @net: net namespace
1488 *      @family: protocol family (AF_INET, ...)
1489 *      @type: communication type (SOCK_STREAM, ...)
1490 *      @protocol: protocol (0, ...)
1491 *      @res: new socket
1492 *      @kern: boolean for kernel space sockets
1493 *
1494 *      Creates a new socket and assigns it to @res, passing through LSM.
1495 *      Returns 0 or an error. On failure @res is set to %NULL. @kern must
1496 *      be set to true if the socket resides in kernel space.
1497 *      This function internally uses GFP_KERNEL.
1498 */
1499
1500int __sock_create(struct net *net, int family, int type, int protocol,
1501                         struct socket **res, int kern)
1502{
1503        int err;
1504        struct socket *sock;
1505        const struct net_proto_family *pf;
1506
1507        /*
1508         *      Check protocol is in range
1509         */
1510        if (family < 0 || family >= NPROTO)
1511                return -EAFNOSUPPORT;
1512        if (type < 0 || type >= SOCK_MAX)
1513                return -EINVAL;
1514
1515        /* Compatibility.
1516
1517           This uglymoron is moved from INET layer to here to avoid
1518           deadlock in module load.
1519         */
1520        if (family == PF_INET && type == SOCK_PACKET) {
1521                pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1522                             current->comm);
1523                family = PF_PACKET;
1524        }
1525
1526        err = security_socket_create(family, type, protocol, kern);
1527        if (err)
1528                return err;
1529
1530        /*
1531         *      Allocate the socket and allow the family to set things up. if
1532         *      the protocol is 0, the family is instructed to select an appropriate
1533         *      default.
1534         */
1535        sock = sock_alloc();
1536        if (!sock) {
1537                net_warn_ratelimited("socket: no more sockets\n");
1538                return -ENFILE; /* Not exactly a match, but its the
1539                                   closest posix thing */
1540        }
1541
1542        sock->type = type;
1543
1544#ifdef CONFIG_MODULES
1545        /* Attempt to load a protocol module if the find failed.
1546         *
1547         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1548         * requested real, full-featured networking support upon configuration.
1549         * Otherwise module support will break!
1550         */
1551        if (rcu_access_pointer(net_families[family]) == NULL)
1552                request_module("net-pf-%d", family);
1553#endif
1554
1555        rcu_read_lock();
1556        pf = rcu_dereference(net_families[family]);
1557        err = -EAFNOSUPPORT;
1558        if (!pf)
1559                goto out_release;
1560
1561        /*
1562         * We will call the ->create function, that possibly is in a loadable
1563         * module, so we have to bump that loadable module refcnt first.
1564         */
1565        if (!try_module_get(pf->owner))
1566                goto out_release;
1567
1568        /* Now protected by module ref count */
1569        rcu_read_unlock();
1570
1571        err = pf->create(net, sock, protocol, kern);
1572        if (err < 0)
1573                goto out_module_put;
1574
1575        /*
1576         * Now to bump the refcnt of the [loadable] module that owns this
1577         * socket at sock_release time we decrement its refcnt.
1578         */
1579        if (!try_module_get(sock->ops->owner))
1580                goto out_module_busy;
1581
1582        /*
1583         * Now that we're done with the ->create function, the [loadable]
1584         * module can have its refcnt decremented
1585         */
1586        module_put(pf->owner);
1587        err = security_socket_post_create(sock, family, type, protocol, kern);
1588        if (err)
1589                goto out_sock_release;
1590        *res = sock;
1591
1592        return 0;
1593
1594out_module_busy:
1595        err = -EAFNOSUPPORT;
1596out_module_put:
1597        sock->ops = NULL;
1598        module_put(pf->owner);
1599out_sock_release:
1600        sock_release(sock);
1601        return err;
1602
1603out_release:
1604        rcu_read_unlock();
1605        goto out_sock_release;
1606}
1607EXPORT_SYMBOL(__sock_create);
1608
1609/**
1610 *      sock_create - creates a socket
1611 *      @family: protocol family (AF_INET, ...)
1612 *      @type: communication type (SOCK_STREAM, ...)
1613 *      @protocol: protocol (0, ...)
1614 *      @res: new socket
1615 *
1616 *      A wrapper around __sock_create().
1617 *      Returns 0 or an error. This function internally uses GFP_KERNEL.
1618 */
1619
1620int sock_create(int family, int type, int protocol, struct socket **res)
1621{
1622        return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1623}
1624EXPORT_SYMBOL(sock_create);
1625
1626/**
1627 *      sock_create_kern - creates a socket (kernel space)
1628 *      @net: net namespace
1629 *      @family: protocol family (AF_INET, ...)
1630 *      @type: communication type (SOCK_STREAM, ...)
1631 *      @protocol: protocol (0, ...)
1632 *      @res: new socket
1633 *
1634 *      A wrapper around __sock_create().
1635 *      Returns 0 or an error. This function internally uses GFP_KERNEL.
1636 */
1637
1638int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1639{
1640        return __sock_create(net, family, type, protocol, res, 1);
1641}
1642EXPORT_SYMBOL(sock_create_kern);
1643
1644static struct socket *__sys_socket_create(int family, int type, int protocol)
1645{
1646        struct socket *sock;
1647        int retval;
1648
1649        /* Check the SOCK_* constants for consistency.  */
1650        BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1651        BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1652        BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1653        BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1654
1655        if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1656                return ERR_PTR(-EINVAL);
1657        type &= SOCK_TYPE_MASK;
1658
1659        retval = sock_create(family, type, protocol, &sock);
1660        if (retval < 0)
1661                return ERR_PTR(retval);
1662
1663        return sock;
1664}
1665
1666struct file *__sys_socket_file(int family, int type, int protocol)
1667{
1668        struct socket *sock;
1669        int flags;
1670
1671        sock = __sys_socket_create(family, type, protocol);
1672        if (IS_ERR(sock))
1673                return ERR_CAST(sock);
1674
1675        flags = type & ~SOCK_TYPE_MASK;
1676        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1677                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1678
1679        return sock_alloc_file(sock, flags, NULL);
1680}
1681
1682/*      A hook for bpf progs to attach to and update socket protocol.
1683 *
1684 *      A static noinline declaration here could cause the compiler to
1685 *      optimize away the function. A global noinline declaration will
1686 *      keep the definition, but may optimize away the callsite.
1687 *      Therefore, __weak is needed to ensure that the call is still
1688 *      emitted, by telling the compiler that we don't know what the
1689 *      function might eventually be.
1690 */
1691
1692__bpf_hook_start();
1693
1694__weak noinline int update_socket_protocol(int family, int type, int protocol)
1695{
1696        return protocol;
1697}
1698
1699__bpf_hook_end();
1700
1701int __sys_socket(int family, int type, int protocol)
1702{
1703        struct socket *sock;
1704        int flags;
1705
1706        sock = __sys_socket_create(family, type,
1707                                   update_socket_protocol(family, type, protocol));
1708        if (IS_ERR(sock))
1709                return PTR_ERR(sock);
1710
1711        flags = type & ~SOCK_TYPE_MASK;
1712        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1713                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1714
1715        return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1716}
1717
1718SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1719{
1720        return __sys_socket(family, type, protocol);
1721}
1722
1723/*
1724 *      Create a pair of connected sockets.
1725 */
1726
1727int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1728{
1729        struct socket *sock1, *sock2;
1730        int fd1, fd2, err;
1731        struct file *newfile1, *newfile2;
1732        int flags;
1733
1734        flags = type & ~SOCK_TYPE_MASK;
1735        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1736                return -EINVAL;
1737        type &= SOCK_TYPE_MASK;
1738
1739        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1740                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1741
1742        /*
1743         * reserve descriptors and make sure we won't fail
1744         * to return them to userland.
1745         */
1746        fd1 = get_unused_fd_flags(flags);
1747        if (unlikely(fd1 < 0))
1748                return fd1;
1749
1750        fd2 = get_unused_fd_flags(flags);
1751        if (unlikely(fd2 < 0)) {
1752                put_unused_fd(fd1);
1753                return fd2;
1754        }
1755
1756        err = put_user(fd1, &usockvec[0]);
1757        if (err)
1758                goto out;
1759
1760        err = put_user(fd2, &usockvec[1]);
1761        if (err)
1762                goto out;
1763
1764        /*
1765         * Obtain the first socket and check if the underlying protocol
1766         * supports the socketpair call.
1767         */
1768
1769        err = sock_create(family, type, protocol, &sock1);
1770        if (unlikely(err < 0))
1771                goto out;
1772
1773        err = sock_create(family, type, protocol, &sock2);
1774        if (unlikely(err < 0)) {
1775                sock_release(sock1);
1776                goto out;
1777        }
1778
1779        err = security_socket_socketpair(sock1, sock2);
1780        if (unlikely(err)) {
1781                sock_release(sock2);
1782                sock_release(sock1);
1783                goto out;
1784        }
1785
1786        err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
1787        if (unlikely(err < 0)) {
1788                sock_release(sock2);
1789                sock_release(sock1);
1790                goto out;
1791        }
1792
1793        newfile1 = sock_alloc_file(sock1, flags, NULL);
1794        if (IS_ERR(newfile1)) {
1795                err = PTR_ERR(newfile1);
1796                sock_release(sock2);
1797                goto out;
1798        }
1799
1800        newfile2 = sock_alloc_file(sock2, flags, NULL);
1801        if (IS_ERR(newfile2)) {
1802                err = PTR_ERR(newfile2);
1803                fput(newfile1);
1804                goto out;
1805        }
1806
1807        audit_fd_pair(fd1, fd2);
1808
1809        fd_install(fd1, newfile1);
1810        fd_install(fd2, newfile2);
1811        return 0;
1812
1813out:
1814        put_unused_fd(fd2);
1815        put_unused_fd(fd1);
1816        return err;
1817}
1818
1819SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1820                int __user *, usockvec)
1821{
1822        return __sys_socketpair(family, type, protocol, usockvec);
1823}
1824
1825/*
1826 *      Bind a name to a socket. Nothing much to do here since it's
1827 *      the protocol's responsibility to handle the local address.
1828 *
1829 *      We move the socket address to kernel space before we call
1830 *      the protocol layer (having also checked the address is ok).
1831 */
1832
1833int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1834{
1835        struct socket *sock;
1836        struct sockaddr_storage address;
1837        int err, fput_needed;
1838
1839        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1840        if (sock) {
1841                err = move_addr_to_kernel(umyaddr, addrlen, &address);
1842                if (!err) {
1843                        err = security_socket_bind(sock,
1844                                                   (struct sockaddr *)&address,
1845                                                   addrlen);
1846                        if (!err)
1847                                err = READ_ONCE(sock->ops)->bind(sock,
1848                                                      (struct sockaddr *)
1849                                                      &address, addrlen);
1850                }
1851                fput_light(sock->file, fput_needed);
1852        }
1853        return err;
1854}
1855
1856SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1857{
1858        return __sys_bind(fd, umyaddr, addrlen);
1859}
1860
1861/*
1862 *      Perform a listen. Basically, we allow the protocol to do anything
1863 *      necessary for a listen, and if that works, we mark the socket as
1864 *      ready for listening.
1865 */
1866
1867int __sys_listen(int fd, int backlog)
1868{
1869        struct socket *sock;
1870        int err, fput_needed;
1871        int somaxconn;
1872
1873        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1874        if (sock) {
1875                somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
1876                if ((unsigned int)backlog > somaxconn)
1877                        backlog = somaxconn;
1878
1879                err = security_socket_listen(sock, backlog);
1880                if (!err)
1881                        err = READ_ONCE(sock->ops)->listen(sock, backlog);
1882
1883                fput_light(sock->file, fput_needed);
1884        }
1885        return err;
1886}
1887
1888SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1889{
1890        return __sys_listen(fd, backlog);
1891}
1892
1893struct file *do_accept(struct file *file, unsigned file_flags,
1894                       struct sockaddr __user *upeer_sockaddr,
1895                       int __user *upeer_addrlen, int flags)
1896{
1897        struct socket *sock, *newsock;
1898        struct file *newfile;
1899        int err, len;
1900        struct sockaddr_storage address;
1901        const struct proto_ops *ops;
1902
1903        sock = sock_from_file(file);
1904        if (!sock)
1905                return ERR_PTR(-ENOTSOCK);
1906
1907        newsock = sock_alloc();
1908        if (!newsock)
1909                return ERR_PTR(-ENFILE);
1910        ops = READ_ONCE(sock->ops);
1911
1912        newsock->type = sock->type;
1913        newsock->ops = ops;
1914
1915        /*
1916         * We don't need try_module_get here, as the listening socket (sock)
1917         * has the protocol module (sock->ops->owner) held.
1918         */
1919        __module_get(ops->owner);
1920
1921        newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1922        if (IS_ERR(newfile))
1923                return newfile;
1924
1925        err = security_socket_accept(sock, newsock);
1926        if (err)
1927                goto out_fd;
1928
1929        err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1930                                        false);
1931        if (err < 0)
1932                goto out_fd;
1933
1934        if (upeer_sockaddr) {
1935                len = ops->getname(newsock, (struct sockaddr *)&address, 2);
1936                if (len < 0) {
1937                        err = -ECONNABORTED;
1938                        goto out_fd;
1939                }
1940                err = move_addr_to_user(&address,
1941                                        len, upeer_sockaddr, upeer_addrlen);
1942                if (err < 0)
1943                        goto out_fd;
1944        }
1945
1946        /* File flags are not inherited via accept() unlike another OSes. */
1947        return newfile;
1948out_fd:
1949        fput(newfile);
1950        return ERR_PTR(err);
1951}
1952
1953static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1954                              int __user *upeer_addrlen, int flags)
1955{
1956        struct file *newfile;
1957        int newfd;
1958
1959        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1960                return -EINVAL;
1961
1962        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1963                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1964
1965        newfd = get_unused_fd_flags(flags);
1966        if (unlikely(newfd < 0))
1967                return newfd;
1968
1969        newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
1970                            flags);
1971        if (IS_ERR(newfile)) {
1972                put_unused_fd(newfd);
1973                return PTR_ERR(newfile);
1974        }
1975        fd_install(newfd, newfile);
1976        return newfd;
1977}
1978
1979/*
1980 *      For accept, we attempt to create a new socket, set up the link
1981 *      with the client, wake up the client, then return the new
1982 *      connected fd. We collect the address of the connector in kernel
1983 *      space and move it to user at the very end. This is unclean because
1984 *      we open the socket then return an error.
1985 *
1986 *      1003.1g adds the ability to recvmsg() to query connection pending
1987 *      status to recvmsg. We need to add that support in a way thats
1988 *      clean when we restructure accept also.
1989 */
1990
1991int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1992                  int __user *upeer_addrlen, int flags)
1993{
1994        int ret = -EBADF;
1995        struct fd f;
1996
1997        f = fdget(fd);
1998        if (f.file) {
1999                ret = __sys_accept4_file(f.file, upeer_sockaddr,
2000                                         upeer_addrlen, flags);
2001                fdput(f);
2002        }
2003
2004        return ret;
2005}
2006
2007SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
2008                int __user *, upeer_addrlen, int, flags)
2009{
2010        return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
2011}
2012
2013SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
2014                int __user *, upeer_addrlen)
2015{
2016        return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
2017}
2018
2019/*
2020 *      Attempt to connect to a socket with the server address.  The address
2021 *      is in user space so we verify it is OK and move it to kernel space.
2022 *
2023 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
2024 *      break bindings
2025 *
2026 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
2027 *      other SEQPACKET protocols that take time to connect() as it doesn't
2028 *      include the -EINPROGRESS status for such sockets.
2029 */
2030
2031int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
2032                       int addrlen, int file_flags)
2033{
2034        struct socket *sock;
2035        int err;
2036
2037        sock = sock_from_file(file);
2038        if (!sock) {
2039                err = -ENOTSOCK;
2040                goto out;
2041        }
2042
2043        err =
2044            security_socket_connect(sock, (struct sockaddr *)address, addrlen);
2045        if (err)
2046                goto out;
2047
2048        err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
2049                                addrlen, sock->file->f_flags | file_flags);
2050out:
2051        return err;
2052}
2053
2054int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
2055{
2056        int ret = -EBADF;
2057        struct fd f;
2058
2059        f = fdget(fd);
2060        if (f.file) {
2061                struct sockaddr_storage address;
2062
2063                ret = move_addr_to_kernel(uservaddr, addrlen, &address);
2064                if (!ret)
2065                        ret = __sys_connect_file(f.file, &address, addrlen, 0);
2066                fdput(f);
2067        }
2068
2069        return ret;
2070}
2071
2072SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2073                int, addrlen)
2074{
2075        return __sys_connect(fd, uservaddr, addrlen);
2076}
2077
2078/*
2079 *      Get the local address ('name') of a socket object. Move the obtained
2080 *      name to user space.
2081 */
2082
2083int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2084                      int __user *usockaddr_len)
2085{
2086        struct socket *sock;
2087        struct sockaddr_storage address;
2088        int err, fput_needed;
2089
2090        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2091        if (!sock)
2092                goto out;
2093
2094        err = security_socket_getsockname(sock);
2095        if (err)
2096                goto out_put;
2097
2098        err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
2099        if (err < 0)
2100                goto out_put;
2101        /* "err" is actually length in this case */
2102        err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
2103
2104out_put:
2105        fput_light(sock->file, fput_needed);
2106out:
2107        return err;
2108}
2109
2110SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2111                int __user *, usockaddr_len)
2112{
2113        return __sys_getsockname(fd, usockaddr, usockaddr_len);
2114}
2115
2116/*
2117 *      Get the remote address ('name') of a socket object. Move the obtained
2118 *      name to user space.
2119 */
2120
2121int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2122                      int __user *usockaddr_len)
2123{
2124        struct socket *sock;
2125        struct sockaddr_storage address;
2126        int err, fput_needed;
2127
2128        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2129        if (sock != NULL) {
2130                const struct proto_ops *ops = READ_ONCE(sock->ops);
2131
2132                err = security_socket_getpeername(sock);
2133                if (err) {
2134                        fput_light(sock->file, fput_needed);
2135                        return err;
2136                }
2137
2138                err = ops->getname(sock, (struct sockaddr *)&address, 1);
2139                if (err >= 0)
2140                        /* "err" is actually length in this case */
2141                        err = move_addr_to_user(&address, err, usockaddr,
2142                                                usockaddr_len);
2143                fput_light(sock->file, fput_needed);
2144        }
2145        return err;
2146}
2147
2148SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2149                int __user *, usockaddr_len)
2150{
2151        return __sys_getpeername(fd, usockaddr, usockaddr_len);
2152}
2153
2154/*
2155 *      Send a datagram to a given address. We move the address into kernel
2156 *      space and check the user space data area is readable before invoking
2157 *      the protocol.
2158 */
2159int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2160                 struct sockaddr __user *addr,  int addr_len)
2161{
2162        struct socket *sock;
2163        struct sockaddr_storage address;
2164        int err;
2165        struct msghdr msg;
2166        struct iovec iov;
2167        int fput_needed;
2168
2169        err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
2170        if (unlikely(err))
2171                return err;
2172        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2173        if (!sock)
2174                goto out;
2175
2176        msg.msg_name = NULL;
2177        msg.msg_control = NULL;
2178        msg.msg_controllen = 0;
2179        msg.msg_namelen = 0;
2180        msg.msg_ubuf = NULL;
2181        if (addr) {
2182                err = move_addr_to_kernel(addr, addr_len, &address);
2183                if (err < 0)
2184                        goto out_put;
2185                msg.msg_name = (struct sockaddr *)&address;
2186                msg.msg_namelen = addr_len;
2187        }
2188        flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
2189        if (sock->file->f_flags & O_NONBLOCK)
2190                flags |= MSG_DONTWAIT;
2191        msg.msg_flags = flags;
2192        err = __sock_sendmsg(sock, &msg);
2193
2194out_put:
2195        fput_light(sock->file, fput_needed);
2196out:
2197        return err;
2198}
2199
2200SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2201                unsigned int, flags, struct sockaddr __user *, addr,
2202                int, addr_len)
2203{
2204        return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2205}
2206
2207/*
2208 *      Send a datagram down a socket.
2209 */
2210
2211SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
2212                unsigned int, flags)
2213{
2214        return __sys_sendto(fd, buff, len, flags, NULL, 0);
2215}
2216
2217/*
2218 *      Receive a frame from the socket and optionally record the address of the
2219 *      sender. We verify the buffers are writable and if needed move the
2220 *      sender address from kernel to user space.
2221 */
2222int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2223                   struct sockaddr __user *addr, int __user *addr_len)
2224{
2225        struct sockaddr_storage address;
2226        struct msghdr msg = {
2227                /* Save some cycles and don't copy the address if not needed */
2228                .msg_name = addr ? (struct sockaddr *)&address : NULL,
2229        };
2230        struct socket *sock;
2231        struct iovec iov;
2232        int err, err2;
2233        int fput_needed;
2234
2235        err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
2236        if (unlikely(err))
2237                return err;
2238        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2239        if (!sock)
2240                goto out;
2241
2242        if (sock->file->f_flags & O_NONBLOCK)
2243                flags |= MSG_DONTWAIT;
2244        err = sock_recvmsg(sock, &msg, flags);
2245
2246        if (err >= 0 && addr != NULL) {
2247                err2 = move_addr_to_user(&address,
2248                                         msg.msg_namelen, addr, addr_len);
2249                if (err2 < 0)
2250                        err = err2;
2251        }
2252
2253        fput_light(sock->file, fput_needed);
2254out:
2255        return err;
2256}
2257
2258SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2259                unsigned int, flags, struct sockaddr __user *, addr,
2260                int __user *, addr_len)
2261{
2262        return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2263}
2264
2265/*
2266 *      Receive a datagram from a socket.
2267 */
2268
2269SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2270                unsigned int, flags)
2271{
2272        return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
2273}
2274
2275static bool sock_use_custom_sol_socket(const struct socket *sock)
2276{
2277        return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
2278}
2279
2280int do_sock_setsockopt(struct socket *sock, bool compat, int level,
2281                       int optname, sockptr_t optval, int optlen)
2282{
2283        const struct proto_ops *ops;
2284        char *kernel_optval = NULL;
2285        int err;
2286
2287        if (optlen < 0)
2288                return -EINVAL;
2289
2290        err = security_socket_setsockopt(sock, level, optname);
2291        if (err)
2292                goto out_put;
2293
2294        if (!compat)
2295                err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
2296                                                     optval, &optlen,
2297                                                     &kernel_optval);
2298        if (err < 0)
2299                goto out_put;
2300        if (err > 0) {
2301                err = 0;
2302                goto out_put;
2303        }
2304
2305        if (kernel_optval)
2306                optval = KERNEL_SOCKPTR(kernel_optval);
2307        ops = READ_ONCE(sock->ops);
2308        if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
2309                err = sock_setsockopt(sock, level, optname, optval, optlen);
2310        else if (unlikely(!ops->setsockopt))
2311                err = -EOPNOTSUPP;
2312        else
2313                err = ops->setsockopt(sock, level, optname, optval,
2314                                            optlen);
2315        kfree(kernel_optval);
2316out_put:
2317        return err;
2318}
2319EXPORT_SYMBOL(do_sock_setsockopt);
2320
2321/* Set a socket option. Because we don't know the option lengths we have
2322 * to pass the user mode parameter for the protocols to sort out.
2323 */
2324int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2325                     int optlen)
2326{
2327        sockptr_t optval = USER_SOCKPTR(user_optval);
2328        bool compat = in_compat_syscall();
2329        int err, fput_needed;
2330        struct socket *sock;
2331
2332        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2333        if (!sock)
2334                return err;
2335
2336        err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
2337
2338        fput_light(sock->file, fput_needed);
2339        return err;
2340}
2341
2342SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2343                char __user *, optval, int, optlen)
2344{
2345        return __sys_setsockopt(fd, level, optname, optval, optlen);
2346}
2347
2348INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2349                                                         int optname));
2350
2351int do_sock_getsockopt(struct socket *sock, bool compat, int level,
2352                       int optname, sockptr_t optval, sockptr_t optlen)
2353{
2354        int max_optlen __maybe_unused;
2355        const struct proto_ops *ops;
2356        int err;
2357
2358        err = security_socket_getsockopt(sock, level, optname);
2359        if (err)
2360                return err;
2361
2362        if (!compat)
2363                max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2364
2365        ops = READ_ONCE(sock->ops);
2366        if (level == SOL_SOCKET) {
2367                err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
2368        } else if (unlikely(!ops->getsockopt)) {
2369                err = -EOPNOTSUPP;
2370        } else {
2371                if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
2372                              "Invalid argument type"))
2373                        return -EOPNOTSUPP;
2374
2375                err = ops->getsockopt(sock, level, optname, optval.user,
2376                                      optlen.user);
2377        }
2378
2379        if (!compat)
2380                err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2381                                                     optval, optlen, max_optlen,
2382                                                     err);
2383
2384        return err;
2385}
2386EXPORT_SYMBOL(do_sock_getsockopt);
2387
2388/*
2389 *      Get a socket option. Because we don't know the option lengths we have
2390 *      to pass a user mode parameter for the protocols to sort out.
2391 */
2392int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2393                int __user *optlen)
2394{
2395        int err, fput_needed;
2396        struct socket *sock;
2397        bool compat;
2398
2399        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2400        if (!sock)
2401                return err;
2402
2403        compat = in_compat_syscall();
2404        err = do_sock_getsockopt(sock, compat, level, optname,
2405                                 USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
2406
2407        fput_light(sock->file, fput_needed);
2408        return err;
2409}
2410
2411SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2412                char __user *, optval, int __user *, optlen)
2413{
2414        return __sys_getsockopt(fd, level, optname, optval, optlen);
2415}
2416
2417/*
2418 *      Shutdown a socket.
2419 */
2420
2421int __sys_shutdown_sock(struct socket *sock, int how)
2422{
2423        int err;
2424
2425        err = security_socket_shutdown(sock, how);
2426        if (!err)
2427                err = READ_ONCE(sock->ops)->shutdown(sock, how);
2428
2429        return err;
2430}
2431
2432int __sys_shutdown(int fd, int how)
2433{
2434        int err, fput_needed;
2435        struct socket *sock;
2436
2437        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2438        if (sock != NULL) {
2439                err = __sys_shutdown_sock(sock, how);
2440                fput_light(sock->file, fput_needed);
2441        }
2442        return err;
2443}
2444
2445SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2446{
2447        return __sys_shutdown(fd, how);
2448}
2449
2450/* A couple of helpful macros for getting the address of the 32/64 bit
2451 * fields which are the same type (int / unsigned) on our platforms.
2452 */
2453#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2454#define COMPAT_NAMELEN(msg)     COMPAT_MSG(msg, msg_namelen)
2455#define COMPAT_FLAGS(msg)       COMPAT_MSG(msg, msg_flags)
2456
2457struct used_address {
2458        struct sockaddr_storage name;
2459        unsigned int name_len;
2460};
2461
2462int __copy_msghdr(struct msghdr *kmsg,
2463                  struct user_msghdr *msg,
2464                  struct sockaddr __user **save_addr)
2465{
2466        ssize_t err;
2467
2468        kmsg->msg_control_is_user = true;
2469        kmsg->msg_get_inq = 0;
2470        kmsg->msg_control_user = msg->msg_control;
2471        kmsg->msg_controllen = msg->msg_controllen;
2472        kmsg->msg_flags = msg->msg_flags;
2473
2474        kmsg->msg_namelen = msg->msg_namelen;
2475        if (!msg->msg_name)
2476                kmsg->msg_namelen = 0;
2477
2478        if (kmsg->msg_namelen < 0)
2479                return -EINVAL;
2480
2481        if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
2482                kmsg->msg_namelen = sizeof(struct sockaddr_storage);
2483
2484        if (save_addr)
2485                *save_addr = msg->msg_name;
2486
2487        if (msg->msg_name && kmsg->msg_namelen) {
2488                if (!save_addr) {
2489                        err = move_addr_to_kernel(msg->msg_name,
2490                                                  kmsg->msg_namelen,
2491                                                  kmsg->msg_name);
2492                        if (err < 0)
2493                                return err;
2494                }
2495        } else {
2496                kmsg->msg_name = NULL;
2497                kmsg->msg_namelen = 0;
2498        }
2499
2500        if (msg->msg_iovlen > UIO_MAXIOV)
2501                return -EMSGSIZE;
2502
2503        kmsg->msg_iocb = NULL;
2504        kmsg->msg_ubuf = NULL;
2505        return 0;
2506}
2507
2508static int copy_msghdr_from_user(struct msghdr *kmsg,
2509                                 struct user_msghdr __user *umsg,
2510                                 struct sockaddr __user **save_addr,
2511                                 struct iovec **iov)
2512{
2513        struct user_msghdr msg;
2514        ssize_t err;
2515
2516        if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2517                return -EFAULT;
2518
2519        err = __copy_msghdr(kmsg, &msg, save_addr);
2520        if (err)
2521                return err;
2522
2523        err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
2524                            msg.msg_iov, msg.msg_iovlen,
2525                            UIO_FASTIOV, iov, &kmsg->msg_iter);
2526        return err < 0 ? err : 0;
2527}
2528
2529static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2530                           unsigned int flags, struct used_address *used_address,
2531                           unsigned int allowed_msghdr_flags)
2532{
2533        unsigned char ctl[sizeof(struct cmsghdr) + 20]
2534                                __aligned(sizeof(__kernel_size_t));
2535        /* 20 is size of ipv6_pktinfo */
2536        unsigned char *ctl_buf = ctl;
2537        int ctl_len;
2538        ssize_t err;
2539
2540        err = -ENOBUFS;
2541
2542        if (msg_sys->msg_controllen > INT_MAX)
2543                goto out;
2544        flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
2545        ctl_len = msg_sys->msg_controllen;
2546        if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
2547                err =
2548                    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
2549                                                     sizeof(ctl));
2550                if (err)
2551                        goto out;
2552                ctl_buf = msg_sys->msg_control;
2553                ctl_len = msg_sys->msg_controllen;
2554        } else if (ctl_len) {
2555                BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2556                             CMSG_ALIGN(sizeof(struct cmsghdr)));
2557                if (ctl_len > sizeof(ctl)) {
2558                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
2559                        if (ctl_buf == NULL)
2560                                goto out;
2561                }
2562                err = -EFAULT;
2563                if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
2564                        goto out_freectl;
2565                msg_sys->msg_control = ctl_buf;
2566                msg_sys->msg_control_is_user = false;
2567        }
2568        flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
2569        msg_sys->msg_flags = flags;
2570
2571        if (sock->file->f_flags & O_NONBLOCK)
2572                msg_sys->msg_flags |= MSG_DONTWAIT;
2573        /*
2574         * If this is sendmmsg() and current destination address is same as
2575         * previously succeeded address, omit asking LSM's decision.
2576         * used_address->name_len is initialized to UINT_MAX so that the first
2577         * destination address never matches.
2578         */
2579        if (used_address && msg_sys->msg_name &&
2580            used_address->name_len == msg_sys->msg_namelen &&
2581            !memcmp(&used_address->name, msg_sys->msg_name,
2582                    used_address->name_len)) {
2583                err = sock_sendmsg_nosec(sock, msg_sys);
2584                goto out_freectl;
2585        }
2586        err = __sock_sendmsg(sock, msg_sys);
2587        /*
2588         * If this is sendmmsg() and sending to current destination address was
2589         * successful, remember it.
2590         */
2591        if (used_address && err >= 0) {
2592                used_address->name_len = msg_sys->msg_namelen;
2593                if (msg_sys->msg_name)
2594                        memcpy(&used_address->name, msg_sys->msg_name,
2595                               used_address->name_len);
2596        }
2597
2598out_freectl:
2599        if (ctl_buf != ctl)
2600                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2601out:
2602        return err;
2603}
2604
2605int sendmsg_copy_msghdr(struct msghdr *msg,
2606                        struct user_msghdr __user *umsg, unsigned flags,
2607                        struct iovec **iov)
2608{
2609        int err;
2610
2611        if (flags & MSG_CMSG_COMPAT) {
2612                struct compat_msghdr __user *msg_compat;
2613
2614                msg_compat = (struct compat_msghdr __user *) umsg;
2615                err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2616        } else {
2617                err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2618        }
2619        if (err < 0)
2620                return err;
2621
2622        return 0;
2623}
2624
2625static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2626                         struct msghdr *msg_sys, unsigned int flags,
2627                         struct used_address *used_address,
2628                         unsigned int allowed_msghdr_flags)
2629{
2630        struct sockaddr_storage address;
2631        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2632        ssize_t err;
2633
2634        msg_sys->msg_name = &address;
2635
2636        err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2637        if (err < 0)
2638                return err;
2639
2640        err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2641                                allowed_msghdr_flags);
2642        kfree(iov);
2643        return err;
2644}
2645
2646/*
2647 *      BSD sendmsg interface
2648 */
2649long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
2650                        unsigned int flags)
2651{
2652        return ____sys_sendmsg(sock, msg, flags, NULL, 0);
2653}
2654
2655long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2656                   bool forbid_cmsg_compat)
2657{
2658        int fput_needed, err;
2659        struct msghdr msg_sys;
2660        struct socket *sock;
2661
2662        if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2663                return -EINVAL;
2664
2665        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2666        if (!sock)
2667                goto out;
2668
2669        err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2670
2671        fput_light(sock->file, fput_needed);
2672out:
2673        return err;
2674}
2675
2676SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
2677{
2678        return __sys_sendmsg(fd, msg, flags, true);
2679}
2680
2681/*
2682 *      Linux sendmmsg interface
2683 */
2684
2685int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2686                   unsigned int flags, bool forbid_cmsg_compat)
2687{
2688        int fput_needed, err, datagrams;
2689        struct socket *sock;
2690        struct mmsghdr __user *entry;
2691        struct compat_mmsghdr __user *compat_entry;
2692        struct msghdr msg_sys;
2693        struct used_address used_address;
2694        unsigned int oflags = flags;
2695
2696        if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2697                return -EINVAL;
2698
2699        if (vlen > UIO_MAXIOV)
2700                vlen = UIO_MAXIOV;
2701
2702        datagrams = 0;
2703
2704        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2705        if (!sock)
2706                return err;
2707
2708        used_address.name_len = UINT_MAX;
2709        entry = mmsg;
2710        compat_entry = (struct compat_mmsghdr __user *)mmsg;
2711        err = 0;
2712        flags |= MSG_BATCH;
2713
2714        while (datagrams < vlen) {
2715                if (datagrams == vlen - 1)
2716                        flags = oflags;
2717
2718                if (MSG_CMSG_COMPAT & flags) {
2719                        err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
2720                                             &msg_sys, flags, &used_address, MSG_EOR);
2721                        if (err < 0)
2722                                break;
2723                        err = __put_user(err, &compat_entry->msg_len);
2724                        ++compat_entry;
2725                } else {
2726                        err = ___sys_sendmsg(sock,
2727                                             (struct user_msghdr __user *)entry,
2728                                             &msg_sys, flags, &used_address, MSG_EOR);
2729                        if (err < 0)
2730                                break;
2731                        err = put_user(err, &entry->msg_len);
2732                        ++entry;
2733                }
2734
2735                if (err)
2736                        break;
2737                ++datagrams;
2738                if (msg_data_left(&msg_sys))
2739                        break;
2740                cond_resched();
2741        }
2742
2743        fput_light(sock->file, fput_needed);
2744
2745        /* We only return an error if no datagrams were able to be sent */
2746        if (datagrams != 0)
2747                return datagrams;
2748
2749        return err;
2750}
2751
2752SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2753                unsigned int, vlen, unsigned int, flags)
2754{
2755        return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
2756}
2757
2758int recvmsg_copy_msghdr(struct msghdr *msg,
2759                        struct user_msghdr __user *umsg, unsigned flags,
2760                        struct sockaddr __user **uaddr,
2761                        struct iovec **iov)
2762{
2763        ssize_t err;
2764
2765        if (MSG_CMSG_COMPAT & flags) {
2766                struct compat_msghdr __user *msg_compat;
2767
2768                msg_compat = (struct compat_msghdr __user *) umsg;
2769                err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2770        } else {
2771                err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2772        }
2773        if (err < 0)
2774                return err;
2775
2776        return 0;
2777}
2778
2779static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2780                           struct user_msghdr __user *msg,
2781                           struct sockaddr __user *uaddr,
2782                           unsigned int flags, int nosec)
2783{
2784        struct compat_msghdr __user *msg_compat =
2785                                        (struct compat_msghdr __user *) msg;
2786        int __user *uaddr_len = COMPAT_NAMELEN(msg);
2787        struct sockaddr_storage addr;
2788        unsigned long cmsg_ptr;
2789        int len;
2790        ssize_t err;
2791
2792        msg_sys->msg_name = &addr;
2793        cmsg_ptr = (unsigned long)msg_sys->msg_control;
2794        msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
2795
2796        /* We assume all kernel code knows the size of sockaddr_storage */
2797        msg_sys->msg_namelen = 0;
2798
2799        if (sock->file->f_flags & O_NONBLOCK)
2800                flags |= MSG_DONTWAIT;
2801
2802        if (unlikely(nosec))
2803                err = sock_recvmsg_nosec(sock, msg_sys, flags);
2804        else
2805                err = sock_recvmsg(sock, msg_sys, flags);
2806
2807        if (err < 0)
2808                goto out;
2809        len = err;
2810
2811        if (uaddr != NULL) {
2812                err = move_addr_to_user(&addr,
2813                                        msg_sys->msg_namelen, uaddr,
2814                                        uaddr_len);
2815                if (err < 0)
2816                        goto out;
2817        }
2818        err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2819                         COMPAT_FLAGS(msg));
2820        if (err)
2821                goto out;
2822        if (MSG_CMSG_COMPAT & flags)
2823                err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2824                                 &msg_compat->msg_controllen);
2825        else
2826                err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2827                                 &msg->msg_controllen);
2828        if (err)
2829                goto out;
2830        err = len;
2831out:
2832        return err;
2833}
2834
2835static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2836                         struct msghdr *msg_sys, unsigned int flags, int nosec)
2837{
2838        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2839        /* user mode address pointers */
2840        struct sockaddr __user *uaddr;
2841        ssize_t err;
2842
2843        err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2844        if (err < 0)
2845                return err;
2846
2847        err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
2848        kfree(iov);
2849        return err;
2850}
2851
2852/*
2853 *      BSD recvmsg interface
2854 */
2855
2856long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2857                        struct user_msghdr __user *umsg,
2858                        struct sockaddr __user *uaddr, unsigned int flags)
2859{
2860        return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
2861}
2862
2863long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2864                   bool forbid_cmsg_compat)
2865{
2866        int fput_needed, err;
2867        struct msghdr msg_sys;
2868        struct socket *sock;
2869
2870        if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2871                return -EINVAL;
2872
2873        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2874        if (!sock)
2875                goto out;
2876
2877        err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2878
2879        fput_light(sock->file, fput_needed);
2880out:
2881        return err;
2882}
2883
2884SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
2885                unsigned int, flags)
2886{
2887        return __sys_recvmsg(fd, msg, flags, true);
2888}
2889
2890/*
2891 *     Linux recvmmsg interface
2892 */
2893
2894static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2895                          unsigned int vlen, unsigned int flags,
2896                          struct timespec64 *timeout)
2897{
2898        int fput_needed, err, datagrams;
2899        struct socket *sock;
2900        struct mmsghdr __user *entry;
2901        struct compat_mmsghdr __user *compat_entry;
2902        struct msghdr msg_sys;
2903        struct timespec64 end_time;
2904        struct timespec64 timeout64;
2905
2906        if (timeout &&
2907            poll_select_set_timeout(&end_time, timeout->tv_sec,
2908                                    timeout->tv_nsec))
2909                return -EINVAL;
2910
2911        datagrams = 0;
2912
2913        sock = sockfd_lookup_light(fd, &err, &fput_needed);
2914        if (!sock)
2915                return err;
2916
2917        if (likely(!(flags & MSG_ERRQUEUE))) {
2918                err = sock_error(sock->sk);
2919                if (err) {
2920                        datagrams = err;
2921                        goto out_put;
2922                }
2923        }
2924
2925        entry = mmsg;
2926        compat_entry = (struct compat_mmsghdr __user *)mmsg;
2927
2928        while (datagrams < vlen) {
2929                /*
2930                 * No need to ask LSM for more than the first datagram.
2931                 */
2932                if (MSG_CMSG_COMPAT & flags) {
2933                        err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
2934                                             &msg_sys, flags & ~MSG_WAITFORONE,
2935                                             datagrams);
2936                        if (err < 0)
2937                                break;
2938                        err = __put_user(err, &compat_entry->msg_len);
2939                        ++compat_entry;
2940                } else {
2941                        err = ___sys_recvmsg(sock,
2942                                             (struct user_msghdr __user *)entry,
2943                                             &msg_sys, flags & ~MSG_WAITFORONE,
2944                                             datagrams);
2945                        if (err < 0)
2946                                break;
2947                        err = put_user(err, &entry->msg_len);
2948                        ++entry;
2949                }
2950
2951                if (err)
2952                        break;
2953                ++datagrams;
2954
2955                /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2956                if (flags & MSG_WAITFORONE)
2957                        flags |= MSG_DONTWAIT;
2958
2959                if (timeout) {
2960                        ktime_get_ts64(&timeout64);
2961                        *timeout = timespec64_sub(end_time, timeout64);
2962                        if (timeout->tv_sec < 0) {
2963                                timeout->tv_sec = timeout->tv_nsec = 0;
2964                                break;
2965                        }
2966
2967                        /* Timeout, return less than vlen datagrams */
2968                        if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2969                                break;
2970                }
2971
2972                /* Out of band data, return right away */
2973                if (msg_sys.msg_flags & MSG_OOB)
2974                        break;
2975                cond_resched();
2976        }
2977
2978        if (err == 0)
2979                goto out_put;
2980
2981        if (datagrams == 0) {
2982                datagrams = err;
2983                goto out_put;
2984        }
2985
2986        /*
2987         * We may return less entries than requested (vlen) if the
2988         * sock is non block and there aren't enough datagrams...
2989         */
2990        if (err != -EAGAIN) {
2991                /*
2992                 * ... or  if recvmsg returns an error after we
2993                 * received some datagrams, where we record the
2994                 * error to return on the next call or if the
2995                 * app asks about it using getsockopt(SO_ERROR).
2996                 */
2997                WRITE_ONCE(sock->sk->sk_err, -err);
2998        }
2999out_put:
3000        fput_light(sock->file, fput_needed);
3001
3002        return datagrams;
3003}
3004
3005int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
3006                   unsigned int vlen, unsigned int flags,
3007                   struct __kernel_timespec __user *timeout,
3008                   struct old_timespec32 __user *timeout32)
3009{
3010        int datagrams;
3011        struct timespec64 timeout_sys;
3012
3013        if (timeout && get_timespec64(&timeout_sys, timeout))
3014                return -EFAULT;
3015
3016        if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
3017                return -EFAULT;
3018
3019        if (!timeout && !timeout32)
3020                return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
3021
3022        datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
3023
3024        if (datagrams <= 0)
3025                return datagrams;
3026
3027        if (timeout && put_timespec64(&timeout_sys, timeout))
3028                datagrams = -EFAULT;
3029
3030        if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
3031                datagrams = -EFAULT;
3032
3033        return datagrams;
3034}
3035
3036SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
3037                unsigned int, vlen, unsigned int, flags,
3038                struct __kernel_timespec __user *, timeout)
3039{
3040        if (flags & MSG_CMSG_COMPAT)
3041                return -EINVAL;
3042
3043        return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
3044}
3045
3046#ifdef CONFIG_COMPAT_32BIT_TIME
3047SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
3048                unsigned int, vlen, unsigned int, flags,
3049                struct old_timespec32 __user *, timeout)
3050{
3051        if (flags & MSG_CMSG_COMPAT)
3052                return -EINVAL;
3053
3054        return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
3055}
3056#endif
3057
3058#ifdef __ARCH_WANT_SYS_SOCKETCALL
3059/* Argument list sizes for sys_socketcall */
3060#define AL(x) ((x) * sizeof(unsigned long))
3061static const unsigned char nargs[21] = {
3062        AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
3063        AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
3064        AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
3065        AL(4), AL(5), AL(4)
3066};
3067
3068#undef AL
3069
3070/*
3071 *      System call vectors.
3072 *
3073 *      Argument checking cleaned up. Saved 20% in size.
3074 *  This function doesn't need to set the kernel lock because
3075 *  it is set by the callees.
3076 */
3077
3078SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
3079{
3080        unsigned long a[AUDITSC_ARGS];
3081        unsigned long a0, a1;
3082        int err;
3083        unsigned int len;
3084
3085        if (call < 1 || call > SYS_SENDMMSG)
3086                return -EINVAL;
3087        call = array_index_nospec(call, SYS_SENDMMSG + 1);
3088
3089        len = nargs[call];
3090        if (len > sizeof(a))
3091                return -EINVAL;
3092
3093        /* copy_from_user should be SMP safe. */
3094        if (copy_from_user(a, args, len))
3095                return -EFAULT;
3096
3097        err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3098        if (err)
3099                return err;
3100
3101        a0 = a[0];
3102        a1 = a[1];
3103
3104        switch (call) {
3105        case SYS_SOCKET:
3106                err = __sys_socket(a0, a1, a[2]);
3107                break;
3108        case SYS_BIND:
3109                err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
3110                break;
3111        case SYS_CONNECT:
3112                err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
3113                break;
3114        case SYS_LISTEN:
3115                err = __sys_listen(a0, a1);
3116                break;
3117        case SYS_ACCEPT:
3118                err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3119                                    (int __user *)a[2], 0);
3120                break;
3121        case SYS_GETSOCKNAME:
3122                err =
3123                    __sys_getsockname(a0, (struct sockaddr __user *)a1,
3124                                      (int __user *)a[2]);
3125                break;
3126        case SYS_GETPEERNAME:
3127                err =
3128                    __sys_getpeername(a0, (struct sockaddr __user *)a1,
3129                                      (int __user *)a[2]);
3130                break;
3131        case SYS_SOCKETPAIR:
3132                err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
3133                break;
3134        case SYS_SEND:
3135                err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3136                                   NULL, 0);
3137                break;
3138        case SYS_SENDTO:
3139                err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3140                                   (struct sockaddr __user *)a[4], a[5]);
3141                break;
3142        case SYS_RECV:
3143                err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3144                                     NULL, NULL);
3145                break;
3146        case SYS_RECVFROM:
3147                err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3148                                     (struct sockaddr __user *)a[4],
3149                                     (int __user *)a[5]);
3150                break;
3151        case SYS_SHUTDOWN:
3152                err = __sys_shutdown(a0, a1);
3153                break;
3154        case SYS_SETSOCKOPT:
3155                err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3156                                       a[4]);
3157                break;
3158        case SYS_GETSOCKOPT:
3159                err =
3160                    __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3161                                     (int __user *)a[4]);
3162                break;
3163        case SYS_SENDMSG:
3164                err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3165                                    a[2], true);
3166                break;
3167        case SYS_SENDMMSG:
3168                err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3169                                     a[3], true);
3170                break;
3171        case SYS_RECVMSG:
3172                err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3173                                    a[2], true);
3174                break;
3175        case SYS_RECVMMSG:
3176                if (IS_ENABLED(CONFIG_64BIT))
3177                        err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3178                                             a[2], a[3],
3179                                             (struct __kernel_timespec __user *)a[4],
3180                                             NULL);
3181                else
3182                        err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3183                                             a[2], a[3], NULL,
3184                                             (struct old_timespec32 __user *)a[4]);
3185                break;
3186        case SYS_ACCEPT4:
3187                err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3188                                    (int __user *)a[2], a[3]);
3189                break;
3190        default:
3191                err = -EINVAL;
3192                break;
3193        }
3194        return err;
3195}
3196
3197#endif                          /* __ARCH_WANT_SYS_SOCKETCALL */
3198
3199/**
3200 *      sock_register - add a socket protocol handler
3201 *      @ops: description of protocol
3202 *
3203 *      This function is called by a protocol handler that wants to
3204 *      advertise its address family, and have it linked into the
3205 *      socket interface. The value ops->family corresponds to the
3206 *      socket system call protocol family.
3207 */
3208int sock_register(const struct net_proto_family *ops)
3209{
3210        int err;
3211
3212        if (ops->family >= NPROTO) {
3213                pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
3214                return -ENOBUFS;
3215        }
3216
3217        spin_lock(&net_family_lock);
3218        if (rcu_dereference_protected(net_families[ops->family],
3219                                      lockdep_is_held(&net_family_lock)))
3220                err = -EEXIST;
3221        else {
3222                rcu_assign_pointer(net_families[ops->family], ops);
3223                err = 0;
3224        }
3225        spin_unlock(&net_family_lock);
3226
3227        pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
3228        return err;
3229}
3230EXPORT_SYMBOL(sock_register);
3231
3232/**
3233 *      sock_unregister - remove a protocol handler
3234 *      @family: protocol family to remove
3235 *
3236 *      This function is called by a protocol handler that wants to
3237 *      remove its address family, and have it unlinked from the
3238 *      new socket creation.
3239 *
3240 *      If protocol handler is a module, then it can use module reference
3241 *      counts to protect against new references. If protocol handler is not
3242 *      a module then it needs to provide its own protection in
3243 *      the ops->create routine.
3244 */
3245void sock_unregister(int family)
3246{
3247        BUG_ON(family < 0 || family >= NPROTO);
3248
3249        spin_lock(&net_family_lock);
3250        RCU_INIT_POINTER(net_families[family], NULL);
3251        spin_unlock(&net_family_lock);
3252
3253        synchronize_rcu();
3254
3255        pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
3256}
3257EXPORT_SYMBOL(sock_unregister);
3258
3259bool sock_is_registered(int family)
3260{
3261        return family < NPROTO && rcu_access_pointer(net_families[family]);
3262}
3263
3264static int __init sock_init(void)
3265{
3266        int err;
3267        /*
3268         *      Initialize the network sysctl infrastructure.
3269         */
3270        err = net_sysctl_init();
3271        if (err)
3272                goto out;
3273
3274        /*
3275         *      Initialize skbuff SLAB cache
3276         */
3277        skb_init();
3278
3279        /*
3280         *      Initialize the protocols module.
3281         */
3282
3283        init_inodecache();
3284
3285        err = register_filesystem(&sock_fs_type);
3286        if (err)
3287                goto out;
3288        sock_mnt = kern_mount(&sock_fs_type);
3289        if (IS_ERR(sock_mnt)) {
3290                err = PTR_ERR(sock_mnt);
3291                goto out_mount;
3292        }
3293
3294        /* The real protocol initialization is performed in later initcalls.
3295         */
3296
3297#ifdef CONFIG_NETFILTER
3298        err = netfilter_init();
3299        if (err)
3300                goto out;
3301#endif
3302
3303        ptp_classifier_init();
3304
3305out:
3306        return err;
3307
3308out_mount:
3309        unregister_filesystem(&sock_fs_type);
3310        goto out;
3311}
3312
3313core_initcall(sock_init);       /* early initcall */
3314
3315#ifdef CONFIG_PROC_FS
3316void socket_seq_show(struct seq_file *seq)
3317{
3318        seq_printf(seq, "sockets: used %d\n",
3319                   sock_inuse_get(seq->private));
3320}
3321#endif                          /* CONFIG_PROC_FS */
3322
3323/* Handle the fact that while struct ifreq has the same *layout* on
3324 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3325 * which are handled elsewhere, it still has different *size* due to
3326 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3327 * resulting in struct ifreq being 32 and 40 bytes respectively).
3328 * As a result, if the struct happens to be at the end of a page and
3329 * the next page isn't readable/writable, we get a fault. To prevent
3330 * that, copy back and forth to the full size.
3331 */
3332int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
3333{
3334        if (in_compat_syscall()) {
3335                struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
3336
3337                memset(ifr, 0, sizeof(*ifr));
3338                if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3339                        return -EFAULT;
3340
3341                if (ifrdata)
3342                        *ifrdata = compat_ptr(ifr32->ifr_data);
3343
3344                return 0;
3345        }
3346
3347        if (copy_from_user(ifr, arg, sizeof(*ifr)))
3348                return -EFAULT;
3349
3350        if (ifrdata)
3351                *ifrdata = ifr->ifr_data;
3352
3353        return 0;
3354}
3355EXPORT_SYMBOL(get_user_ifreq);
3356
3357int put_user_ifreq(struct ifreq *ifr, void __user *arg)
3358{
3359        size_t size = sizeof(*ifr);
3360
3361        if (in_compat_syscall())
3362                size = sizeof(struct compat_ifreq);
3363
3364        if (copy_to_user(arg, ifr, size))
3365                return -EFAULT;
3366
3367        return 0;
3368}
3369EXPORT_SYMBOL(put_user_ifreq);
3370
3371#ifdef CONFIG_COMPAT
3372static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3373{
3374        compat_uptr_t uptr32;
3375        struct ifreq ifr;
3376        void __user *saved;
3377        int err;
3378
3379        if (get_user_ifreq(&ifr, NULL, uifr32))
3380                return -EFAULT;
3381
3382        if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3383                return -EFAULT;
3384
3385        saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3386        ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
3387
3388        err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
3389        if (!err) {
3390                ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3391                if (put_user_ifreq(&ifr, uifr32))
3392                        err = -EFAULT;
3393        }
3394        return err;
3395}
3396
3397/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3398static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
3399                                 struct compat_ifreq __user *u_ifreq32)
3400{
3401        struct ifreq ifreq;
3402        void __user *data;
3403
3404        if (!is_socket_ioctl_cmd(cmd))
3405                return -ENOTTY;
3406        if (get_user_ifreq(&ifreq, &data, u_ifreq32))
3407                return -EFAULT;
3408        ifreq.ifr_data = data;
3409
3410        return dev_ioctl(net, cmd, &ifreq, data, NULL);
3411}
3412
3413static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3414                         unsigned int cmd, unsigned long arg)
3415{
3416        void __user *argp = compat_ptr(arg);
3417        struct sock *sk = sock->sk;
3418        struct net *net = sock_net(sk);
3419        const struct proto_ops *ops;
3420
3421        if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3422                return sock_ioctl(file, cmd, (unsigned long)argp);
3423
3424        switch (cmd) {
3425        case SIOCWANDEV:
3426                return compat_siocwandev(net, argp);
3427        case SIOCGSTAMP_OLD:
3428        case SIOCGSTAMPNS_OLD:
3429                ops = READ_ONCE(sock->ops);
3430                if (!ops->gettstamp)
3431                        return -ENOIOCTLCMD;
3432                return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3433                                      !COMPAT_USE_64BIT_TIME);
3434
3435        case SIOCETHTOOL:
3436        case SIOCBONDSLAVEINFOQUERY:
3437        case SIOCBONDINFOQUERY:
3438        case SIOCSHWTSTAMP:
3439        case SIOCGHWTSTAMP:
3440                return compat_ifr_data_ioctl(net, cmd, argp);
3441
3442        case FIOSETOWN:
3443        case SIOCSPGRP:
3444        case FIOGETOWN:
3445        case SIOCGPGRP:
3446        case SIOCBRADDBR:
3447        case SIOCBRDELBR:
3448        case SIOCGIFVLAN:
3449        case SIOCSIFVLAN:
3450        case SIOCGSKNS:
3451        case SIOCGSTAMP_NEW:
3452        case SIOCGSTAMPNS_NEW:
3453        case SIOCGIFCONF:
3454        case SIOCSIFBR:
3455        case SIOCGIFBR:
3456                return sock_ioctl(file, cmd, arg);
3457
3458        case SIOCGIFFLAGS:
3459        case SIOCSIFFLAGS:
3460        case SIOCGIFMAP:
3461        case SIOCSIFMAP:
3462        case SIOCGIFMETRIC:
3463        case SIOCSIFMETRIC:
3464        case SIOCGIFMTU:
3465        case SIOCSIFMTU:
3466        case SIOCGIFMEM:
3467        case SIOCSIFMEM:
3468        case SIOCGIFHWADDR:
3469        case SIOCSIFHWADDR:
3470        case SIOCADDMULTI:
3471        case SIOCDELMULTI:
3472        case SIOCGIFINDEX:
3473        case SIOCGIFADDR:
3474        case SIOCSIFADDR:
3475        case SIOCSIFHWBROADCAST:
3476        case SIOCDIFADDR:
3477        case SIOCGIFBRDADDR:
3478        case SIOCSIFBRDADDR:
3479        case SIOCGIFDSTADDR:
3480        case SIOCSIFDSTADDR:
3481        case SIOCGIFNETMASK:
3482        case SIOCSIFNETMASK:
3483        case SIOCSIFPFLAGS:
3484        case SIOCGIFPFLAGS:
3485        case SIOCGIFTXQLEN:
3486        case SIOCSIFTXQLEN:
3487        case SIOCBRADDIF:
3488        case SIOCBRDELIF:
3489        case SIOCGIFNAME:
3490        case SIOCSIFNAME:
3491        case SIOCGMIIPHY:
3492        case SIOCGMIIREG:
3493        case SIOCSMIIREG:
3494        case SIOCBONDENSLAVE:
3495        case SIOCBONDRELEASE:
3496        case SIOCBONDSETHWADDR:
3497        case SIOCBONDCHANGEACTIVE:
3498        case SIOCSARP:
3499        case SIOCGARP:
3500        case SIOCDARP:
3501        case SIOCOUTQ:
3502        case SIOCOUTQNSD:
3503        case SIOCATMARK:
3504                return sock_do_ioctl(net, sock, cmd, arg);
3505        }
3506
3507        return -ENOIOCTLCMD;
3508}
3509
3510static long compat_sock_ioctl(struct file *file, unsigned int cmd,
3511                              unsigned long arg)
3512{
3513        struct socket *sock = file->private_data;
3514        const struct proto_ops *ops = READ_ONCE(sock->ops);
3515        int ret = -ENOIOCTLCMD;
3516        struct sock *sk;
3517        struct net *net;
3518
3519        sk = sock->sk;
3520        net = sock_net(sk);
3521
3522        if (ops->compat_ioctl)
3523                ret = ops->compat_ioctl(sock, cmd, arg);
3524
3525        if (ret == -ENOIOCTLCMD &&
3526            (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3527                ret = compat_wext_handle_ioctl(net, cmd, arg);
3528
3529        if (ret == -ENOIOCTLCMD)
3530                ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3531
3532        return ret;
3533}
3534#endif
3535
3536/**
3537 *      kernel_bind - bind an address to a socket (kernel space)
3538 *      @sock: socket
3539 *      @addr: address
3540 *      @addrlen: length of address
3541 *
3542 *      Returns 0 or an error.
3543 */
3544
3545int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3546{
3547        struct sockaddr_storage address;
3548
3549        memcpy(&address, addr, addrlen);
3550
3551        return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
3552                                          addrlen);
3553}
3554EXPORT_SYMBOL(kernel_bind);
3555
3556/**
3557 *      kernel_listen - move socket to listening state (kernel space)
3558 *      @sock: socket
3559 *      @backlog: pending connections queue size
3560 *
3561 *      Returns 0 or an error.
3562 */
3563
3564int kernel_listen(struct socket *sock, int backlog)
3565{
3566        return READ_ONCE(sock->ops)->listen(sock, backlog);
3567}
3568EXPORT_SYMBOL(kernel_listen);
3569
3570/**
3571 *      kernel_accept - accept a connection (kernel space)
3572 *      @sock: listening socket
3573 *      @newsock: new connected socket
3574 *      @flags: flags
3575 *
3576 *      @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3577 *      If it fails, @newsock is guaranteed to be %NULL.
3578 *      Returns 0 or an error.
3579 */
3580
3581int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3582{
3583        struct sock *sk = sock->sk;
3584        const struct proto_ops *ops = READ_ONCE(sock->ops);
3585        int err;
3586
3587        err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3588                               newsock);
3589        if (err < 0)
3590                goto done;
3591
3592        err = ops->accept(sock, *newsock, flags, true);
3593        if (err < 0) {
3594                sock_release(*newsock);
3595                *newsock = NULL;
3596                goto done;
3597        }
3598
3599        (*newsock)->ops = ops;
3600        __module_get(ops->owner);
3601
3602done:
3603        return err;
3604}
3605EXPORT_SYMBOL(kernel_accept);
3606
3607/**
3608 *      kernel_connect - connect a socket (kernel space)
3609 *      @sock: socket
3610 *      @addr: address
3611 *      @addrlen: address length
3612 *      @flags: flags (O_NONBLOCK, ...)
3613 *
3614 *      For datagram sockets, @addr is the address to which datagrams are sent
3615 *      by default, and the only address from which datagrams are received.
3616 *      For stream sockets, attempts to connect to @addr.
3617 *      Returns 0 or an error code.
3618 */
3619
3620int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3621                   int flags)
3622{
3623        struct sockaddr_storage address;
3624
3625        memcpy(&address, addr, addrlen);
3626
3627        return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
3628                                             addrlen, flags);
3629}
3630EXPORT_SYMBOL(kernel_connect);
3631
3632/**
3633 *      kernel_getsockname - get the address which the socket is bound (kernel space)
3634 *      @sock: socket
3635 *      @addr: address holder
3636 *
3637 *      Fills the @addr pointer with the address which the socket is bound.
3638 *      Returns the length of the address in bytes or an error code.
3639 */
3640
3641int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
3642{
3643        return READ_ONCE(sock->ops)->getname(sock, addr, 0);
3644}
3645EXPORT_SYMBOL(kernel_getsockname);
3646
3647/**
3648 *      kernel_getpeername - get the address which the socket is connected (kernel space)
3649 *      @sock: socket
3650 *      @addr: address holder
3651 *
3652 *      Fills the @addr pointer with the address which the socket is connected.
3653 *      Returns the length of the address in bytes or an error code.
3654 */
3655
3656int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
3657{
3658        return READ_ONCE(sock->ops)->getname(sock, addr, 1);
3659}
3660EXPORT_SYMBOL(kernel_getpeername);
3661
3662/**
3663 *      kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
3664 *      @sock: socket
3665 *      @how: connection part
3666 *
3667 *      Returns 0 or an error.
3668 */
3669
3670int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3671{
3672        return READ_ONCE(sock->ops)->shutdown(sock, how);
3673}
3674EXPORT_SYMBOL(kernel_sock_shutdown);
3675
3676/**
3677 *      kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3678 *      @sk: socket
3679 *
3680 *      This routine returns the IP overhead imposed by a socket i.e.
3681 *      the length of the underlying IP header, depending on whether
3682 *      this is an IPv4 or IPv6 socket and the length from IP options turned
3683 *      on at the socket. Assumes that the caller has a lock on the socket.
3684 */
3685
3686u32 kernel_sock_ip_overhead(struct sock *sk)
3687{
3688        struct inet_sock *inet;
3689        struct ip_options_rcu *opt;
3690        u32 overhead = 0;
3691#if IS_ENABLED(CONFIG_IPV6)
3692        struct ipv6_pinfo *np;
3693        struct ipv6_txoptions *optv6 = NULL;
3694#endif /* IS_ENABLED(CONFIG_IPV6) */
3695
3696        if (!sk)
3697                return overhead;
3698
3699        switch (sk->sk_family) {
3700        case AF_INET:
3701                inet = inet_sk(sk);
3702                overhead += sizeof(struct iphdr);
3703                opt = rcu_dereference_protected(inet->inet_opt,
3704                                                sock_owned_by_user(sk));
3705                if (opt)
3706                        overhead += opt->opt.optlen;
3707                return overhead;
3708#if IS_ENABLED(CONFIG_IPV6)
3709        case AF_INET6:
3710                np = inet6_sk(sk);
3711                overhead += sizeof(struct ipv6hdr);
3712                if (np)
3713                        optv6 = rcu_dereference_protected(np->opt,
3714                                                          sock_owned_by_user(sk));
3715                if (optv6)
3716                        overhead += (optv6->opt_flen + optv6->opt_nflen);
3717                return overhead;
3718#endif /* IS_ENABLED(CONFIG_IPV6) */
3719        default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3720                return overhead;
3721        }
3722}
3723EXPORT_SYMBOL(kernel_sock_ip_overhead);
3724