linux/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45 *              Tigran Aivazian :       Made listen(2) backlog sanity checks
  46 *                                      protocol-independent
  47 *
  48 *
  49 *              This program is free software; you can redistribute it and/or
  50 *              modify it under the terms of the GNU General Public License
  51 *              as published by the Free Software Foundation; either version
  52 *              2 of the License, or (at your option) any later version.
  53 *
  54 *
  55 *      This module is effectively the top level interface to the BSD socket
  56 *      paradigm.
  57 *
  58 *      Based upon Swansea University Computer Society NET3.039
  59 */
  60
  61#include <linux/mm.h>
  62#include <linux/socket.h>
  63#include <linux/file.h>
  64#include <linux/net.h>
  65#include <linux/interrupt.h>
  66#include <linux/thread_info.h>
  67#include <linux/rcupdate.h>
  68#include <linux/netdevice.h>
  69#include <linux/proc_fs.h>
  70#include <linux/seq_file.h>
  71#include <linux/mutex.h>
  72#include <linux/wanrouter.h>
  73#include <linux/if_bridge.h>
  74#include <linux/if_frad.h>
  75#include <linux/if_vlan.h>
  76#include <linux/init.h>
  77#include <linux/poll.h>
  78#include <linux/cache.h>
  79#include <linux/module.h>
  80#include <linux/highmem.h>
  81#include <linux/mount.h>
  82#include <linux/security.h>
  83#include <linux/syscalls.h>
  84#include <linux/compat.h>
  85#include <linux/kmod.h>
  86#include <linux/audit.h>
  87#include <linux/wireless.h>
  88#include <linux/nsproxy.h>
  89
  90#include <asm/uaccess.h>
  91#include <asm/unistd.h>
  92
  93#include <net/compat.h>
  94#include <net/wext.h>
  95
  96#include <net/sock.h>
  97#include <linux/netfilter.h>
  98
  99static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 100static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 101                         unsigned long nr_segs, loff_t pos);
 102static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 103                          unsigned long nr_segs, loff_t pos);
 104static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 105
 106static int sock_close(struct inode *inode, struct file *file);
 107static unsigned int sock_poll(struct file *file,
 108                              struct poll_table_struct *wait);
 109static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 110#ifdef CONFIG_COMPAT
 111static long compat_sock_ioctl(struct file *file,
 112                              unsigned int cmd, unsigned long arg);
 113#endif
 114static int sock_fasync(int fd, struct file *filp, int on);
 115static ssize_t sock_sendpage(struct file *file, struct page *page,
 116                             int offset, size_t size, loff_t *ppos, int more);
 117static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 118                                struct pipe_inode_info *pipe, size_t len,
 119                                unsigned int flags);
 120
 121/*
 122 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 123 *      in the operation structures but are done directly via the socketcall() multiplexor.
 124 */
 125
 126static const struct file_operations socket_file_ops = {
 127        .owner =        THIS_MODULE,
 128        .llseek =       no_llseek,
 129        .aio_read =     sock_aio_read,
 130        .aio_write =    sock_aio_write,
 131        .poll =         sock_poll,
 132        .unlocked_ioctl = sock_ioctl,
 133#ifdef CONFIG_COMPAT
 134        .compat_ioctl = compat_sock_ioctl,
 135#endif
 136        .mmap =         sock_mmap,
 137        .open =         sock_no_open,   /* special open code to disallow open via /proc */
 138        .release =      sock_close,
 139        .fasync =       sock_fasync,
 140        .sendpage =     sock_sendpage,
 141        .splice_write = generic_splice_sendpage,
 142        .splice_read =  sock_splice_read,
 143};
 144
 145/*
 146 *      The protocol list. Each protocol is registered in here.
 147 */
 148
 149static DEFINE_SPINLOCK(net_family_lock);
 150static const struct net_proto_family *net_families[NPROTO] __read_mostly;
 151
 152/*
 153 *      Statistics counters of the socket lists
 154 */
 155
 156static DEFINE_PER_CPU(int, sockets_in_use) = 0;
 157
 158/*
 159 * Support routines.
 160 * Move socket addresses back and forth across the kernel/user
 161 * divide and look after the messy bits.
 162 */
 163
 164#define MAX_SOCK_ADDR   128             /* 108 for Unix domain -
 165                                           16 for IP, 16 for IPX,
 166                                           24 for IPv6,
 167                                           about 80 for AX.25
 168                                           must be at least one bigger than
 169                                           the AF_UNIX size (see net/unix/af_unix.c
 170                                           :unix_mkname()).
 171                                         */
 172
 173/**
 174 *      move_addr_to_kernel     -       copy a socket address into kernel space
 175 *      @uaddr: Address in user space
 176 *      @kaddr: Address in kernel space
 177 *      @ulen: Length in user space
 178 *
 179 *      The address is copied into kernel space. If the provided address is
 180 *      too long an error code of -EINVAL is returned. If the copy gives
 181 *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 182 */
 183
 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
 185{
 186        if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
 187                return -EINVAL;
 188        if (ulen == 0)
 189                return 0;
 190        if (copy_from_user(kaddr, uaddr, ulen))
 191                return -EFAULT;
 192        return audit_sockaddr(ulen, kaddr);
 193}
 194
 195/**
 196 *      move_addr_to_user       -       copy an address to user space
 197 *      @kaddr: kernel space address
 198 *      @klen: length of address in kernel
 199 *      @uaddr: user space address
 200 *      @ulen: pointer to user length field
 201 *
 202 *      The value pointed to by ulen on entry is the buffer length available.
 203 *      This is overwritten with the buffer space used. -EINVAL is returned
 204 *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 205 *      is returned if either the buffer or the length field are not
 206 *      accessible.
 207 *      After copying the data up to the limit the user specifies, the true
 208 *      length of the data is written over the length limit the user
 209 *      specified. Zero is returned for a success.
 210 */
 211
 212int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
 213                      int __user *ulen)
 214{
 215        int err;
 216        int len;
 217
 218        err = get_user(len, ulen);
 219        if (err)
 220                return err;
 221        if (len > klen)
 222                len = klen;
 223        if (len < 0 || len > sizeof(struct sockaddr_storage))
 224                return -EINVAL;
 225        if (len) {
 226                if (audit_sockaddr(klen, kaddr))
 227                        return -ENOMEM;
 228                if (copy_to_user(uaddr, kaddr, len))
 229                        return -EFAULT;
 230        }
 231        /*
 232         *      "fromlen shall refer to the value before truncation.."
 233         *                      1003.1g
 234         */
 235        return __put_user(klen, ulen);
 236}
 237
 238#define SOCKFS_MAGIC 0x534F434B
 239
 240static struct kmem_cache *sock_inode_cachep __read_mostly;
 241
 242static struct inode *sock_alloc_inode(struct super_block *sb)
 243{
 244        struct socket_alloc *ei;
 245
 246        ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 247        if (!ei)
 248                return NULL;
 249        init_waitqueue_head(&ei->socket.wait);
 250
 251        ei->socket.fasync_list = NULL;
 252        ei->socket.state = SS_UNCONNECTED;
 253        ei->socket.flags = 0;
 254        ei->socket.ops = NULL;
 255        ei->socket.sk = NULL;
 256        ei->socket.file = NULL;
 257
 258        return &ei->vfs_inode;
 259}
 260
 261static void sock_destroy_inode(struct inode *inode)
 262{
 263        kmem_cache_free(sock_inode_cachep,
 264                        container_of(inode, struct socket_alloc, vfs_inode));
 265}
 266
 267static void init_once(void *foo)
 268{
 269        struct socket_alloc *ei = (struct socket_alloc *)foo;
 270
 271        inode_init_once(&ei->vfs_inode);
 272}
 273
 274static int init_inodecache(void)
 275{
 276        sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 277                                              sizeof(struct socket_alloc),
 278                                              0,
 279                                              (SLAB_HWCACHE_ALIGN |
 280                                               SLAB_RECLAIM_ACCOUNT |
 281                                               SLAB_MEM_SPREAD),
 282                                              init_once);
 283        if (sock_inode_cachep == NULL)
 284                return -ENOMEM;
 285        return 0;
 286}
 287
 288static struct super_operations sockfs_ops = {
 289        .alloc_inode =  sock_alloc_inode,
 290        .destroy_inode =sock_destroy_inode,
 291        .statfs =       simple_statfs,
 292};
 293
 294static int sockfs_get_sb(struct file_system_type *fs_type,
 295                         int flags, const char *dev_name, void *data,
 296                         struct vfsmount *mnt)
 297{
 298        return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
 299                             mnt);
 300}
 301
 302static struct vfsmount *sock_mnt __read_mostly;
 303
 304static struct file_system_type sock_fs_type = {
 305        .name =         "sockfs",
 306        .get_sb =       sockfs_get_sb,
 307        .kill_sb =      kill_anon_super,
 308};
 309
 310static int sockfs_delete_dentry(struct dentry *dentry)
 311{
 312        /*
 313         * At creation time, we pretended this dentry was hashed
 314         * (by clearing DCACHE_UNHASHED bit in d_flags)
 315         * At delete time, we restore the truth : not hashed.
 316         * (so that dput() can proceed correctly)
 317         */
 318        dentry->d_flags |= DCACHE_UNHASHED;
 319        return 0;
 320}
 321
 322/*
 323 * sockfs_dname() is called from d_path().
 324 */
 325static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 326{
 327        return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
 328                                dentry->d_inode->i_ino);
 329}
 330
 331static struct dentry_operations sockfs_dentry_operations = {
 332        .d_delete = sockfs_delete_dentry,
 333        .d_dname  = sockfs_dname,
 334};
 335
 336/*
 337 *      Obtains the first available file descriptor and sets it up for use.
 338 *
 339 *      These functions create file structures and maps them to fd space
 340 *      of the current process. On success it returns file descriptor
 341 *      and file struct implicitly stored in sock->file.
 342 *      Note that another thread may close file descriptor before we return
 343 *      from this function. We use the fact that now we do not refer
 344 *      to socket after mapping. If one day we will need it, this
 345 *      function will increment ref. count on file by 1.
 346 *
 347 *      In any case returned fd MAY BE not valid!
 348 *      This race condition is unavoidable
 349 *      with shared fd spaces, we cannot solve it inside kernel,
 350 *      but we take care of internal coherence yet.
 351 */
 352
 353static int sock_alloc_fd(struct file **filep, int flags)
 354{
 355        int fd;
 356
 357        fd = get_unused_fd_flags(flags);
 358        if (likely(fd >= 0)) {
 359                struct file *file = get_empty_filp();
 360
 361                *filep = file;
 362                if (unlikely(!file)) {
 363                        put_unused_fd(fd);
 364                        return -ENFILE;
 365                }
 366        } else
 367                *filep = NULL;
 368        return fd;
 369}
 370
 371static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
 372{
 373        struct dentry *dentry;
 374        struct qstr name = { .name = "" };
 375
 376        dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
 377        if (unlikely(!dentry))
 378                return -ENOMEM;
 379
 380        dentry->d_op = &sockfs_dentry_operations;
 381        /*
 382         * We dont want to push this dentry into global dentry hash table.
 383         * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 384         * This permits a working /proc/$pid/fd/XXX on sockets
 385         */
 386        dentry->d_flags &= ~DCACHE_UNHASHED;
 387        d_instantiate(dentry, SOCK_INODE(sock));
 388
 389        sock->file = file;
 390        init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
 391                  &socket_file_ops);
 392        SOCK_INODE(sock)->i_fop = &socket_file_ops;
 393        file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 394        file->f_pos = 0;
 395        file->private_data = sock;
 396
 397        return 0;
 398}
 399
 400int sock_map_fd(struct socket *sock, int flags)
 401{
 402        struct file *newfile;
 403        int fd = sock_alloc_fd(&newfile, flags);
 404
 405        if (likely(fd >= 0)) {
 406                int err = sock_attach_fd(sock, newfile, flags);
 407
 408                if (unlikely(err < 0)) {
 409                        put_filp(newfile);
 410                        put_unused_fd(fd);
 411                        return err;
 412                }
 413                fd_install(fd, newfile);
 414        }
 415        return fd;
 416}
 417
 418static struct socket *sock_from_file(struct file *file, int *err)
 419{
 420        if (file->f_op == &socket_file_ops)
 421                return file->private_data;      /* set in sock_map_fd */
 422
 423        *err = -ENOTSOCK;
 424        return NULL;
 425}
 426
 427/**
 428 *      sockfd_lookup   -       Go from a file number to its socket slot
 429 *      @fd: file handle
 430 *      @err: pointer to an error code return
 431 *
 432 *      The file handle passed in is locked and the socket it is bound
 433 *      too is returned. If an error occurs the err pointer is overwritten
 434 *      with a negative errno code and NULL is returned. The function checks
 435 *      for both invalid handles and passing a handle which is not a socket.
 436 *
 437 *      On a success the socket object pointer is returned.
 438 */
 439
 440struct socket *sockfd_lookup(int fd, int *err)
 441{
 442        struct file *file;
 443        struct socket *sock;
 444
 445        file = fget(fd);
 446        if (!file) {
 447                *err = -EBADF;
 448                return NULL;
 449        }
 450
 451        sock = sock_from_file(file, err);
 452        if (!sock)
 453                fput(file);
 454        return sock;
 455}
 456
 457static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 458{
 459        struct file *file;
 460        struct socket *sock;
 461
 462        *err = -EBADF;
 463        file = fget_light(fd, fput_needed);
 464        if (file) {
 465                sock = sock_from_file(file, err);
 466                if (sock)
 467                        return sock;
 468                fput_light(file, *fput_needed);
 469        }
 470        return NULL;
 471}
 472
 473/**
 474 *      sock_alloc      -       allocate a socket
 475 *
 476 *      Allocate a new inode and socket object. The two are bound together
 477 *      and initialised. The socket is then returned. If we are out of inodes
 478 *      NULL is returned.
 479 */
 480
 481static struct socket *sock_alloc(void)
 482{
 483        struct inode *inode;
 484        struct socket *sock;
 485
 486        inode = new_inode(sock_mnt->mnt_sb);
 487        if (!inode)
 488                return NULL;
 489
 490        sock = SOCKET_I(inode);
 491
 492        inode->i_mode = S_IFSOCK | S_IRWXUGO;
 493        inode->i_uid = current_fsuid();
 494        inode->i_gid = current_fsgid();
 495
 496        get_cpu_var(sockets_in_use)++;
 497        put_cpu_var(sockets_in_use);
 498        return sock;
 499}
 500
 501/*
 502 *      In theory you can't get an open on this inode, but /proc provides
 503 *      a back door. Remember to keep it shut otherwise you'll let the
 504 *      creepy crawlies in.
 505 */
 506
 507static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 508{
 509        return -ENXIO;
 510}
 511
 512const struct file_operations bad_sock_fops = {
 513        .owner = THIS_MODULE,
 514        .open = sock_no_open,
 515};
 516
 517/**
 518 *      sock_release    -       close a socket
 519 *      @sock: socket to close
 520 *
 521 *      The socket is released from the protocol stack if it has a release
 522 *      callback, and the inode is then released if the socket is bound to
 523 *      an inode not a file.
 524 */
 525
 526void sock_release(struct socket *sock)
 527{
 528        if (sock->ops) {
 529                struct module *owner = sock->ops->owner;
 530
 531                sock->ops->release(sock);
 532                sock->ops = NULL;
 533                module_put(owner);
 534        }
 535
 536        if (sock->fasync_list)
 537                printk(KERN_ERR "sock_release: fasync list not empty!\n");
 538
 539        get_cpu_var(sockets_in_use)--;
 540        put_cpu_var(sockets_in_use);
 541        if (!sock->file) {
 542                iput(SOCK_INODE(sock));
 543                return;
 544        }
 545        sock->file = NULL;
 546}
 547
 548static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 549                                 struct msghdr *msg, size_t size)
 550{
 551        struct sock_iocb *si = kiocb_to_siocb(iocb);
 552        int err;
 553
 554        si->sock = sock;
 555        si->scm = NULL;
 556        si->msg = msg;
 557        si->size = size;
 558
 559        err = security_socket_sendmsg(sock, msg, size);
 560        if (err)
 561                return err;
 562
 563        return sock->ops->sendmsg(iocb, sock, msg, size);
 564}
 565
 566int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 567{
 568        struct kiocb iocb;
 569        struct sock_iocb siocb;
 570        int ret;
 571
 572        init_sync_kiocb(&iocb, NULL);
 573        iocb.private = &siocb;
 574        ret = __sock_sendmsg(&iocb, sock, msg, size);
 575        if (-EIOCBQUEUED == ret)
 576                ret = wait_on_sync_kiocb(&iocb);
 577        return ret;
 578}
 579
 580int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 581                   struct kvec *vec, size_t num, size_t size)
 582{
 583        mm_segment_t oldfs = get_fs();
 584        int result;
 585
 586        set_fs(KERNEL_DS);
 587        /*
 588         * the following is safe, since for compiler definitions of kvec and
 589         * iovec are identical, yielding the same in-core layout and alignment
 590         */
 591        msg->msg_iov = (struct iovec *)vec;
 592        msg->msg_iovlen = num;
 593        result = sock_sendmsg(sock, msg, size);
 594        set_fs(oldfs);
 595        return result;
 596}
 597
 598/*
 599 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
 600 */
 601void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 602        struct sk_buff *skb)
 603{
 604        ktime_t kt = skb->tstamp;
 605
 606        if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
 607                struct timeval tv;
 608                /* Race occurred between timestamp enabling and packet
 609                   receiving.  Fill in the current time for now. */
 610                if (kt.tv64 == 0)
 611                        kt = ktime_get_real();
 612                skb->tstamp = kt;
 613                tv = ktime_to_timeval(kt);
 614                put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
 615        } else {
 616                struct timespec ts;
 617                /* Race occurred between timestamp enabling and packet
 618                   receiving.  Fill in the current time for now. */
 619                if (kt.tv64 == 0)
 620                        kt = ktime_get_real();
 621                skb->tstamp = kt;
 622                ts = ktime_to_timespec(kt);
 623                put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
 624        }
 625}
 626
 627EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 628
 629static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 630                                 struct msghdr *msg, size_t size, int flags)
 631{
 632        int err;
 633        struct sock_iocb *si = kiocb_to_siocb(iocb);
 634
 635        si->sock = sock;
 636        si->scm = NULL;
 637        si->msg = msg;
 638        si->size = size;
 639        si->flags = flags;
 640
 641        err = security_socket_recvmsg(sock, msg, size, flags);
 642        if (err)
 643                return err;
 644
 645        return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 646}
 647
 648int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 649                 size_t size, int flags)
 650{
 651        struct kiocb iocb;
 652        struct sock_iocb siocb;
 653        int ret;
 654
 655        init_sync_kiocb(&iocb, NULL);
 656        iocb.private = &siocb;
 657        ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 658        if (-EIOCBQUEUED == ret)
 659                ret = wait_on_sync_kiocb(&iocb);
 660        return ret;
 661}
 662
 663int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 664                   struct kvec *vec, size_t num, size_t size, int flags)
 665{
 666        mm_segment_t oldfs = get_fs();
 667        int result;
 668
 669        set_fs(KERNEL_DS);
 670        /*
 671         * the following is safe, since for compiler definitions of kvec and
 672         * iovec are identical, yielding the same in-core layout and alignment
 673         */
 674        msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
 675        result = sock_recvmsg(sock, msg, size, flags);
 676        set_fs(oldfs);
 677        return result;
 678}
 679
 680static void sock_aio_dtor(struct kiocb *iocb)
 681{
 682        kfree(iocb->private);
 683}
 684
 685static ssize_t sock_sendpage(struct file *file, struct page *page,
 686                             int offset, size_t size, loff_t *ppos, int more)
 687{
 688        struct socket *sock;
 689        int flags;
 690
 691        sock = file->private_data;
 692
 693        flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 694        if (more)
 695                flags |= MSG_MORE;
 696
 697        return sock->ops->sendpage(sock, page, offset, size, flags);
 698}
 699
 700static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 701                                struct pipe_inode_info *pipe, size_t len,
 702                                unsigned int flags)
 703{
 704        struct socket *sock = file->private_data;
 705
 706        if (unlikely(!sock->ops->splice_read))
 707                return -EINVAL;
 708
 709        return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 710}
 711
 712static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 713                                         struct sock_iocb *siocb)
 714{
 715        if (!is_sync_kiocb(iocb)) {
 716                siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
 717                if (!siocb)
 718                        return NULL;
 719                iocb->ki_dtor = sock_aio_dtor;
 720        }
 721
 722        siocb->kiocb = iocb;
 723        iocb->private = siocb;
 724        return siocb;
 725}
 726
 727static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 728                struct file *file, const struct iovec *iov,
 729                unsigned long nr_segs)
 730{
 731        struct socket *sock = file->private_data;
 732        size_t size = 0;
 733        int i;
 734
 735        for (i = 0; i < nr_segs; i++)
 736                size += iov[i].iov_len;
 737
 738        msg->msg_name = NULL;
 739        msg->msg_namelen = 0;
 740        msg->msg_control = NULL;
 741        msg->msg_controllen = 0;
 742        msg->msg_iov = (struct iovec *)iov;
 743        msg->msg_iovlen = nr_segs;
 744        msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 745
 746        return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
 747}
 748
 749static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 750                                unsigned long nr_segs, loff_t pos)
 751{
 752        struct sock_iocb siocb, *x;
 753
 754        if (pos != 0)
 755                return -ESPIPE;
 756
 757        if (iocb->ki_left == 0) /* Match SYS5 behaviour */
 758                return 0;
 759
 760
 761        x = alloc_sock_iocb(iocb, &siocb);
 762        if (!x)
 763                return -ENOMEM;
 764        return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 765}
 766
 767static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 768                        struct file *file, const struct iovec *iov,
 769                        unsigned long nr_segs)
 770{
 771        struct socket *sock = file->private_data;
 772        size_t size = 0;
 773        int i;
 774
 775        for (i = 0; i < nr_segs; i++)
 776                size += iov[i].iov_len;
 777
 778        msg->msg_name = NULL;
 779        msg->msg_namelen = 0;
 780        msg->msg_control = NULL;
 781        msg->msg_controllen = 0;
 782        msg->msg_iov = (struct iovec *)iov;
 783        msg->msg_iovlen = nr_segs;
 784        msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 785        if (sock->type == SOCK_SEQPACKET)
 786                msg->msg_flags |= MSG_EOR;
 787
 788        return __sock_sendmsg(iocb, sock, msg, size);
 789}
 790
 791static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 792                          unsigned long nr_segs, loff_t pos)
 793{
 794        struct sock_iocb siocb, *x;
 795
 796        if (pos != 0)
 797                return -ESPIPE;
 798
 799        x = alloc_sock_iocb(iocb, &siocb);
 800        if (!x)
 801                return -ENOMEM;
 802
 803        return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 804}
 805
 806/*
 807 * Atomic setting of ioctl hooks to avoid race
 808 * with module unload.
 809 */
 810
 811static DEFINE_MUTEX(br_ioctl_mutex);
 812static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
 813
 814void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 815{
 816        mutex_lock(&br_ioctl_mutex);
 817        br_ioctl_hook = hook;
 818        mutex_unlock(&br_ioctl_mutex);
 819}
 820
 821EXPORT_SYMBOL(brioctl_set);
 822
 823static DEFINE_MUTEX(vlan_ioctl_mutex);
 824static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
 825
 826void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
 827{
 828        mutex_lock(&vlan_ioctl_mutex);
 829        vlan_ioctl_hook = hook;
 830        mutex_unlock(&vlan_ioctl_mutex);
 831}
 832
 833EXPORT_SYMBOL(vlan_ioctl_set);
 834
 835static DEFINE_MUTEX(dlci_ioctl_mutex);
 836static int (*dlci_ioctl_hook) (unsigned int, void __user *);
 837
 838void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 839{
 840        mutex_lock(&dlci_ioctl_mutex);
 841        dlci_ioctl_hook = hook;
 842        mutex_unlock(&dlci_ioctl_mutex);
 843}
 844
 845EXPORT_SYMBOL(dlci_ioctl_set);
 846
 847/*
 848 *      With an ioctl, arg may well be a user mode pointer, but we don't know
 849 *      what to do with it - that's up to the protocol still.
 850 */
 851
 852static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 853{
 854        struct socket *sock;
 855        struct sock *sk;
 856        void __user *argp = (void __user *)arg;
 857        int pid, err;
 858        struct net *net;
 859
 860        sock = file->private_data;
 861        sk = sock->sk;
 862        net = sock_net(sk);
 863        if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
 864                err = dev_ioctl(net, cmd, argp);
 865        } else
 866#ifdef CONFIG_WIRELESS_EXT
 867        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
 868                err = dev_ioctl(net, cmd, argp);
 869        } else
 870#endif                          /* CONFIG_WIRELESS_EXT */
 871                switch (cmd) {
 872                case FIOSETOWN:
 873                case SIOCSPGRP:
 874                        err = -EFAULT;
 875                        if (get_user(pid, (int __user *)argp))
 876                                break;
 877                        err = f_setown(sock->file, pid, 1);
 878                        break;
 879                case FIOGETOWN:
 880                case SIOCGPGRP:
 881                        err = put_user(f_getown(sock->file),
 882                                       (int __user *)argp);
 883                        break;
 884                case SIOCGIFBR:
 885                case SIOCSIFBR:
 886                case SIOCBRADDBR:
 887                case SIOCBRDELBR:
 888                        err = -ENOPKG;
 889                        if (!br_ioctl_hook)
 890                                request_module("bridge");
 891
 892                        mutex_lock(&br_ioctl_mutex);
 893                        if (br_ioctl_hook)
 894                                err = br_ioctl_hook(net, cmd, argp);
 895                        mutex_unlock(&br_ioctl_mutex);
 896                        break;
 897                case SIOCGIFVLAN:
 898                case SIOCSIFVLAN:
 899                        err = -ENOPKG;
 900                        if (!vlan_ioctl_hook)
 901                                request_module("8021q");
 902
 903                        mutex_lock(&vlan_ioctl_mutex);
 904                        if (vlan_ioctl_hook)
 905                                err = vlan_ioctl_hook(net, argp);
 906                        mutex_unlock(&vlan_ioctl_mutex);
 907                        break;
 908                case SIOCADDDLCI:
 909                case SIOCDELDLCI:
 910                        err = -ENOPKG;
 911                        if (!dlci_ioctl_hook)
 912                                request_module("dlci");
 913
 914                        mutex_lock(&dlci_ioctl_mutex);
 915                        if (dlci_ioctl_hook)
 916                                err = dlci_ioctl_hook(cmd, argp);
 917                        mutex_unlock(&dlci_ioctl_mutex);
 918                        break;
 919                default:
 920                        err = sock->ops->ioctl(sock, cmd, arg);
 921
 922                        /*
 923                         * If this ioctl is unknown try to hand it down
 924                         * to the NIC driver.
 925                         */
 926                        if (err == -ENOIOCTLCMD)
 927                                err = dev_ioctl(net, cmd, argp);
 928                        break;
 929                }
 930        return err;
 931}
 932
 933int sock_create_lite(int family, int type, int protocol, struct socket **res)
 934{
 935        int err;
 936        struct socket *sock = NULL;
 937
 938        err = security_socket_create(family, type, protocol, 1);
 939        if (err)
 940                goto out;
 941
 942        sock = sock_alloc();
 943        if (!sock) {
 944                err = -ENOMEM;
 945                goto out;
 946        }
 947
 948        sock->type = type;
 949        err = security_socket_post_create(sock, family, type, protocol, 1);
 950        if (err)
 951                goto out_release;
 952
 953out:
 954        *res = sock;
 955        return err;
 956out_release:
 957        sock_release(sock);
 958        sock = NULL;
 959        goto out;
 960}
 961
 962/* No kernel lock held - perfect */
 963static unsigned int sock_poll(struct file *file, poll_table *wait)
 964{
 965        struct socket *sock;
 966
 967        /*
 968         *      We can't return errors to poll, so it's either yes or no.
 969         */
 970        sock = file->private_data;
 971        return sock->ops->poll(file, sock, wait);
 972}
 973
 974static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 975{
 976        struct socket *sock = file->private_data;
 977
 978        return sock->ops->mmap(file, sock, vma);
 979}
 980
 981static int sock_close(struct inode *inode, struct file *filp)
 982{
 983        /*
 984         *      It was possible the inode is NULL we were
 985         *      closing an unfinished socket.
 986         */
 987
 988        if (!inode) {
 989                printk(KERN_DEBUG "sock_close: NULL inode\n");
 990                return 0;
 991        }
 992        sock_release(SOCKET_I(inode));
 993        return 0;
 994}
 995
 996/*
 997 *      Update the socket async list
 998 *
 999 *      Fasync_list locking strategy.
1000 *
1001 *      1. fasync_list is modified only under process context socket lock
1002 *         i.e. under semaphore.
1003 *      2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1004 *         or under socket lock.
1005 *      3. fasync_list can be used from softirq context, so that
1006 *         modification under socket lock have to be enhanced with
1007 *         write_lock_bh(&sk->sk_callback_lock).
1008 *                                                      --ANK (990710)
1009 */
1010
1011static int sock_fasync(int fd, struct file *filp, int on)
1012{
1013        struct fasync_struct *fa, *fna = NULL, **prev;
1014        struct socket *sock;
1015        struct sock *sk;
1016
1017        if (on) {
1018                fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
1019                if (fna == NULL)
1020                        return -ENOMEM;
1021        }
1022
1023        sock = filp->private_data;
1024
1025        sk = sock->sk;
1026        if (sk == NULL) {
1027                kfree(fna);
1028                return -EINVAL;
1029        }
1030
1031        lock_sock(sk);
1032
1033        prev = &(sock->fasync_list);
1034
1035        for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1036                if (fa->fa_file == filp)
1037                        break;
1038
1039        if (on) {
1040                if (fa != NULL) {
1041                        write_lock_bh(&sk->sk_callback_lock);
1042                        fa->fa_fd = fd;
1043                        write_unlock_bh(&sk->sk_callback_lock);
1044
1045                        kfree(fna);
1046                        goto out;
1047                }
1048                fna->fa_file = filp;
1049                fna->fa_fd = fd;
1050                fna->magic = FASYNC_MAGIC;
1051                fna->fa_next = sock->fasync_list;
1052                write_lock_bh(&sk->sk_callback_lock);
1053                sock->fasync_list = fna;
1054                write_unlock_bh(&sk->sk_callback_lock);
1055        } else {
1056                if (fa != NULL) {
1057                        write_lock_bh(&sk->sk_callback_lock);
1058                        *prev = fa->fa_next;
1059                        write_unlock_bh(&sk->sk_callback_lock);
1060                        kfree(fa);
1061                }
1062        }
1063
1064out:
1065        release_sock(sock->sk);
1066        return 0;
1067}
1068
1069/* This function may be called only under socket lock or callback_lock */
1070
1071int sock_wake_async(struct socket *sock, int how, int band)
1072{
1073        if (!sock || !sock->fasync_list)
1074                return -1;
1075        switch (how) {
1076        case SOCK_WAKE_WAITD:
1077                if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1078                        break;
1079                goto call_kill;
1080        case SOCK_WAKE_SPACE:
1081                if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1082                        break;
1083                /* fall through */
1084        case SOCK_WAKE_IO:
1085call_kill:
1086                __kill_fasync(sock->fasync_list, SIGIO, band);
1087                break;
1088        case SOCK_WAKE_URG:
1089                __kill_fasync(sock->fasync_list, SIGURG, band);
1090        }
1091        return 0;
1092}
1093
1094static int __sock_create(struct net *net, int family, int type, int protocol,
1095                         struct socket **res, int kern)
1096{
1097        int err;
1098        struct socket *sock;
1099        const struct net_proto_family *pf;
1100
1101        /*
1102         *      Check protocol is in range
1103         */
1104        if (family < 0 || family >= NPROTO)
1105                return -EAFNOSUPPORT;
1106        if (type < 0 || type >= SOCK_MAX)
1107                return -EINVAL;
1108
1109        /* Compatibility.
1110
1111           This uglymoron is moved from INET layer to here to avoid
1112           deadlock in module load.
1113         */
1114        if (family == PF_INET && type == SOCK_PACKET) {
1115                static int warned;
1116                if (!warned) {
1117                        warned = 1;
1118                        printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119                               current->comm);
1120                }
1121                family = PF_PACKET;
1122        }
1123
1124        err = security_socket_create(family, type, protocol, kern);
1125        if (err)
1126                return err;
1127
1128        /*
1129         *      Allocate the socket and allow the family to set things up. if
1130         *      the protocol is 0, the family is instructed to select an appropriate
1131         *      default.
1132         */
1133        sock = sock_alloc();
1134        if (!sock) {
1135                if (net_ratelimit())
1136                        printk(KERN_WARNING "socket: no more sockets\n");
1137                return -ENFILE; /* Not exactly a match, but its the
1138                                   closest posix thing */
1139        }
1140
1141        sock->type = type;
1142
1143#ifdef CONFIG_MODULES
1144        /* Attempt to load a protocol module if the find failed.
1145         *
1146         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1147         * requested real, full-featured networking support upon configuration.
1148         * Otherwise module support will break!
1149         */
1150        if (net_families[family] == NULL)
1151                request_module("net-pf-%d", family);
1152#endif
1153
1154        rcu_read_lock();
1155        pf = rcu_dereference(net_families[family]);
1156        err = -EAFNOSUPPORT;
1157        if (!pf)
1158                goto out_release;
1159
1160        /*
1161         * We will call the ->create function, that possibly is in a loadable
1162         * module, so we have to bump that loadable module refcnt first.
1163         */
1164        if (!try_module_get(pf->owner))
1165                goto out_release;
1166
1167        /* Now protected by module ref count */
1168        rcu_read_unlock();
1169
1170        err = pf->create(net, sock, protocol);
1171        if (err < 0)
1172                goto out_module_put;
1173
1174        /*
1175         * Now to bump the refcnt of the [loadable] module that owns this
1176         * socket at sock_release time we decrement its refcnt.
1177         */
1178        if (!try_module_get(sock->ops->owner))
1179                goto out_module_busy;
1180
1181        /*
1182         * Now that we're done with the ->create function, the [loadable]
1183         * module can have its refcnt decremented
1184         */
1185        module_put(pf->owner);
1186        err = security_socket_post_create(sock, family, type, protocol, kern);
1187        if (err)
1188                goto out_sock_release;
1189        *res = sock;
1190
1191        return 0;
1192
1193out_module_busy:
1194        err = -EAFNOSUPPORT;
1195out_module_put:
1196        sock->ops = NULL;
1197        module_put(pf->owner);
1198out_sock_release:
1199        sock_release(sock);
1200        return err;
1201
1202out_release:
1203        rcu_read_unlock();
1204        goto out_sock_release;
1205}
1206
1207int sock_create(int family, int type, int protocol, struct socket **res)
1208{
1209        return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1210}
1211
1212int sock_create_kern(int family, int type, int protocol, struct socket **res)
1213{
1214        return __sock_create(&init_net, family, type, protocol, res, 1);
1215}
1216
1217SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1218{
1219        int retval;
1220        struct socket *sock;
1221        int flags;
1222
1223        /* Check the SOCK_* constants for consistency.  */
1224        BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1225        BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1226        BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1227        BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1228
1229        flags = type & ~SOCK_TYPE_MASK;
1230        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1231                return -EINVAL;
1232        type &= SOCK_TYPE_MASK;
1233
1234        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1235                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1236
1237        retval = sock_create(family, type, protocol, &sock);
1238        if (retval < 0)
1239                goto out;
1240
1241        retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1242        if (retval < 0)
1243                goto out_release;
1244
1245out:
1246        /* It may be already another descriptor 8) Not kernel problem. */
1247        return retval;
1248
1249out_release:
1250        sock_release(sock);
1251        return retval;
1252}
1253
1254/*
1255 *      Create a pair of connected sockets.
1256 */
1257
1258SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1259                int __user *, usockvec)
1260{
1261        struct socket *sock1, *sock2;
1262        int fd1, fd2, err;
1263        struct file *newfile1, *newfile2;
1264        int flags;
1265
1266        flags = type & ~SOCK_TYPE_MASK;
1267        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1268                return -EINVAL;
1269        type &= SOCK_TYPE_MASK;
1270
1271        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1272                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1273
1274        /*
1275         * Obtain the first socket and check if the underlying protocol
1276         * supports the socketpair call.
1277         */
1278
1279        err = sock_create(family, type, protocol, &sock1);
1280        if (err < 0)
1281                goto out;
1282
1283        err = sock_create(family, type, protocol, &sock2);
1284        if (err < 0)
1285                goto out_release_1;
1286
1287        err = sock1->ops->socketpair(sock1, sock2);
1288        if (err < 0)
1289                goto out_release_both;
1290
1291        fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
1292        if (unlikely(fd1 < 0)) {
1293                err = fd1;
1294                goto out_release_both;
1295        }
1296
1297        fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
1298        if (unlikely(fd2 < 0)) {
1299                err = fd2;
1300                put_filp(newfile1);
1301                put_unused_fd(fd1);
1302                goto out_release_both;
1303        }
1304
1305        err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
1306        if (unlikely(err < 0)) {
1307                goto out_fd2;
1308        }
1309
1310        err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
1311        if (unlikely(err < 0)) {
1312                fput(newfile1);
1313                goto out_fd1;
1314        }
1315
1316        audit_fd_pair(fd1, fd2);
1317        fd_install(fd1, newfile1);
1318        fd_install(fd2, newfile2);
1319        /* fd1 and fd2 may be already another descriptors.
1320         * Not kernel problem.
1321         */
1322
1323        err = put_user(fd1, &usockvec[0]);
1324        if (!err)
1325                err = put_user(fd2, &usockvec[1]);
1326        if (!err)
1327                return 0;
1328
1329        sys_close(fd2);
1330        sys_close(fd1);
1331        return err;
1332
1333out_release_both:
1334        sock_release(sock2);
1335out_release_1:
1336        sock_release(sock1);
1337out:
1338        return err;
1339
1340out_fd2:
1341        put_filp(newfile1);
1342        sock_release(sock1);
1343out_fd1:
1344        put_filp(newfile2);
1345        sock_release(sock2);
1346        put_unused_fd(fd1);
1347        put_unused_fd(fd2);
1348        goto out;
1349}
1350
1351/*
1352 *      Bind a name to a socket. Nothing much to do here since it's
1353 *      the protocol's responsibility to handle the local address.
1354 *
1355 *      We move the socket address to kernel space before we call
1356 *      the protocol layer (having also checked the address is ok).
1357 */
1358
1359SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1360{
1361        struct socket *sock;
1362        struct sockaddr_storage address;
1363        int err, fput_needed;
1364
1365        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1366        if (sock) {
1367                err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
1368                if (err >= 0) {
1369                        err = security_socket_bind(sock,
1370                                                   (struct sockaddr *)&address,
1371                                                   addrlen);
1372                        if (!err)
1373                                err = sock->ops->bind(sock,
1374                                                      (struct sockaddr *)
1375                                                      &address, addrlen);
1376                }
1377                fput_light(sock->file, fput_needed);
1378        }
1379        return err;
1380}
1381
1382/*
1383 *      Perform a listen. Basically, we allow the protocol to do anything
1384 *      necessary for a listen, and if that works, we mark the socket as
1385 *      ready for listening.
1386 */
1387
1388SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1389{
1390        struct socket *sock;
1391        int err, fput_needed;
1392        int somaxconn;
1393
1394        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1395        if (sock) {
1396                somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
1397                if ((unsigned)backlog > somaxconn)
1398                        backlog = somaxconn;
1399
1400                err = security_socket_listen(sock, backlog);
1401                if (!err)
1402                        err = sock->ops->listen(sock, backlog);
1403
1404                fput_light(sock->file, fput_needed);
1405        }
1406        return err;
1407}
1408
1409/*
1410 *      For accept, we attempt to create a new socket, set up the link
1411 *      with the client, wake up the client, then return the new
1412 *      connected fd. We collect the address of the connector in kernel
1413 *      space and move it to user at the very end. This is unclean because
1414 *      we open the socket then return an error.
1415 *
1416 *      1003.1g adds the ability to recvmsg() to query connection pending
1417 *      status to recvmsg. We need to add that support in a way thats
1418 *      clean when we restucture accept also.
1419 */
1420
1421SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1422                int __user *, upeer_addrlen, int, flags)
1423{
1424        struct socket *sock, *newsock;
1425        struct file *newfile;
1426        int err, len, newfd, fput_needed;
1427        struct sockaddr_storage address;
1428
1429        if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1430                return -EINVAL;
1431
1432        if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1433                flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1434
1435        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1436        if (!sock)
1437                goto out;
1438
1439        err = -ENFILE;
1440        if (!(newsock = sock_alloc()))
1441                goto out_put;
1442
1443        newsock->type = sock->type;
1444        newsock->ops = sock->ops;
1445
1446        /*
1447         * We don't need try_module_get here, as the listening socket (sock)
1448         * has the protocol module (sock->ops->owner) held.
1449         */
1450        __module_get(newsock->ops->owner);
1451
1452        newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
1453        if (unlikely(newfd < 0)) {
1454                err = newfd;
1455                sock_release(newsock);
1456                goto out_put;
1457        }
1458
1459        err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
1460        if (err < 0)
1461                goto out_fd_simple;
1462
1463        err = security_socket_accept(sock, newsock);
1464        if (err)
1465                goto out_fd;
1466
1467        err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1468        if (err < 0)
1469                goto out_fd;
1470
1471        if (upeer_sockaddr) {
1472                if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
1473                                          &len, 2) < 0) {
1474                        err = -ECONNABORTED;
1475                        goto out_fd;
1476                }
1477                err = move_addr_to_user((struct sockaddr *)&address,
1478                                        len, upeer_sockaddr, upeer_addrlen);
1479                if (err < 0)
1480                        goto out_fd;
1481        }
1482
1483        /* File flags are not inherited via accept() unlike another OSes. */
1484
1485        fd_install(newfd, newfile);
1486        err = newfd;
1487
1488        security_socket_post_accept(sock, newsock);
1489
1490out_put:
1491        fput_light(sock->file, fput_needed);
1492out:
1493        return err;
1494out_fd_simple:
1495        sock_release(newsock);
1496        put_filp(newfile);
1497        put_unused_fd(newfd);
1498        goto out_put;
1499out_fd:
1500        fput(newfile);
1501        put_unused_fd(newfd);
1502        goto out_put;
1503}
1504
1505SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1506                int __user *, upeer_addrlen)
1507{
1508        return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
1509}
1510
1511/*
1512 *      Attempt to connect to a socket with the server address.  The address
1513 *      is in user space so we verify it is OK and move it to kernel space.
1514 *
1515 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1516 *      break bindings
1517 *
1518 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1519 *      other SEQPACKET protocols that take time to connect() as it doesn't
1520 *      include the -EINPROGRESS status for such sockets.
1521 */
1522
1523SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1524                int, addrlen)
1525{
1526        struct socket *sock;
1527        struct sockaddr_storage address;
1528        int err, fput_needed;
1529
1530        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1531        if (!sock)
1532                goto out;
1533        err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1534        if (err < 0)
1535                goto out_put;
1536
1537        err =
1538            security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1539        if (err)
1540                goto out_put;
1541
1542        err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1543                                 sock->file->f_flags);
1544out_put:
1545        fput_light(sock->file, fput_needed);
1546out:
1547        return err;
1548}
1549
1550/*
1551 *      Get the local address ('name') of a socket object. Move the obtained
1552 *      name to user space.
1553 */
1554
1555SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1556                int __user *, usockaddr_len)
1557{
1558        struct socket *sock;
1559        struct sockaddr_storage address;
1560        int len, err, fput_needed;
1561
1562        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1563        if (!sock)
1564                goto out;
1565
1566        err = security_socket_getsockname(sock);
1567        if (err)
1568                goto out_put;
1569
1570        err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1571        if (err)
1572                goto out_put;
1573        err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1574
1575out_put:
1576        fput_light(sock->file, fput_needed);
1577out:
1578        return err;
1579}
1580
1581/*
1582 *      Get the remote address ('name') of a socket object. Move the obtained
1583 *      name to user space.
1584 */
1585
1586SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1587                int __user *, usockaddr_len)
1588{
1589        struct socket *sock;
1590        struct sockaddr_storage address;
1591        int len, err, fput_needed;
1592
1593        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1594        if (sock != NULL) {
1595                err = security_socket_getpeername(sock);
1596                if (err) {
1597                        fput_light(sock->file, fput_needed);
1598                        return err;
1599                }
1600
1601                err =
1602                    sock->ops->getname(sock, (struct sockaddr *)&address, &len,
1603                                       1);
1604                if (!err)
1605                        err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
1606                                                usockaddr_len);
1607                fput_light(sock->file, fput_needed);
1608        }
1609        return err;
1610}
1611
1612/*
1613 *      Send a datagram to a given address. We move the address into kernel
1614 *      space and check the user space data area is readable before invoking
1615 *      the protocol.
1616 */
1617
1618SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1619                unsigned, flags, struct sockaddr __user *, addr,
1620                int, addr_len)
1621{
1622        struct socket *sock;
1623        struct sockaddr_storage address;
1624        int err;
1625        struct msghdr msg;
1626        struct iovec iov;
1627        int fput_needed;
1628
1629        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1630        if (!sock)
1631                goto out;
1632
1633        iov.iov_base = buff;
1634        iov.iov_len = len;
1635        msg.msg_name = NULL;
1636        msg.msg_iov = &iov;
1637        msg.msg_iovlen = 1;
1638        msg.msg_control = NULL;
1639        msg.msg_controllen = 0;
1640        msg.msg_namelen = 0;
1641        if (addr) {
1642                err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1643                if (err < 0)
1644                        goto out_put;
1645                msg.msg_name = (struct sockaddr *)&address;
1646                msg.msg_namelen = addr_len;
1647        }
1648        if (sock->file->f_flags & O_NONBLOCK)
1649                flags |= MSG_DONTWAIT;
1650        msg.msg_flags = flags;
1651        err = sock_sendmsg(sock, &msg, len);
1652
1653out_put:
1654        fput_light(sock->file, fput_needed);
1655out:
1656        return err;
1657}
1658
1659/*
1660 *      Send a datagram down a socket.
1661 */
1662
1663SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1664                unsigned, flags)
1665{
1666        return sys_sendto(fd, buff, len, flags, NULL, 0);
1667}
1668
1669/*
1670 *      Receive a frame from the socket and optionally record the address of the
1671 *      sender. We verify the buffers are writable and if needed move the
1672 *      sender address from kernel to user space.
1673 */
1674
1675SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1676                unsigned, flags, struct sockaddr __user *, addr,
1677                int __user *, addr_len)
1678{
1679        struct socket *sock;
1680        struct iovec iov;
1681        struct msghdr msg;
1682        struct sockaddr_storage address;
1683        int err, err2;
1684        int fput_needed;
1685
1686        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1687        if (!sock)
1688                goto out;
1689
1690        msg.msg_control = NULL;
1691        msg.msg_controllen = 0;
1692        msg.msg_iovlen = 1;
1693        msg.msg_iov = &iov;
1694        iov.iov_len = size;
1695        iov.iov_base = ubuf;
1696        msg.msg_name = (struct sockaddr *)&address;
1697        msg.msg_namelen = sizeof(address);
1698        if (sock->file->f_flags & O_NONBLOCK)
1699                flags |= MSG_DONTWAIT;
1700        err = sock_recvmsg(sock, &msg, size, flags);
1701
1702        if (err >= 0 && addr != NULL) {
1703                err2 = move_addr_to_user((struct sockaddr *)&address,
1704                                         msg.msg_namelen, addr, addr_len);
1705                if (err2 < 0)
1706                        err = err2;
1707        }
1708
1709        fput_light(sock->file, fput_needed);
1710out:
1711        return err;
1712}
1713
1714/*
1715 *      Receive a datagram from a socket.
1716 */
1717
1718asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1719                         unsigned flags)
1720{
1721        return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1722}
1723
1724/*
1725 *      Set a socket option. Because we don't know the option lengths we have
1726 *      to pass the user mode parameter for the protocols to sort out.
1727 */
1728
1729SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1730                char __user *, optval, int, optlen)
1731{
1732        int err, fput_needed;
1733        struct socket *sock;
1734
1735        if (optlen < 0)
1736                return -EINVAL;
1737
1738        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1739        if (sock != NULL) {
1740                err = security_socket_setsockopt(sock, level, optname);
1741                if (err)
1742                        goto out_put;
1743
1744                if (level == SOL_SOCKET)
1745                        err =
1746                            sock_setsockopt(sock, level, optname, optval,
1747                                            optlen);
1748                else
1749                        err =
1750                            sock->ops->setsockopt(sock, level, optname, optval,
1751                                                  optlen);
1752out_put:
1753                fput_light(sock->file, fput_needed);
1754        }
1755        return err;
1756}
1757
1758/*
1759 *      Get a socket option. Because we don't know the option lengths we have
1760 *      to pass a user mode parameter for the protocols to sort out.
1761 */
1762
1763SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1764                char __user *, optval, int __user *, optlen)
1765{
1766        int err, fput_needed;
1767        struct socket *sock;
1768
1769        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1770        if (sock != NULL) {
1771                err = security_socket_getsockopt(sock, level, optname);
1772                if (err)
1773                        goto out_put;
1774
1775                if (level == SOL_SOCKET)
1776                        err =
1777                            sock_getsockopt(sock, level, optname, optval,
1778                                            optlen);
1779                else
1780                        err =
1781                            sock->ops->getsockopt(sock, level, optname, optval,
1782                                                  optlen);
1783out_put:
1784                fput_light(sock->file, fput_needed);
1785        }
1786        return err;
1787}
1788
1789/*
1790 *      Shutdown a socket.
1791 */
1792
1793SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1794{
1795        int err, fput_needed;
1796        struct socket *sock;
1797
1798        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1799        if (sock != NULL) {
1800                err = security_socket_shutdown(sock, how);
1801                if (!err)
1802                        err = sock->ops->shutdown(sock, how);
1803                fput_light(sock->file, fput_needed);
1804        }
1805        return err;
1806}
1807
1808/* A couple of helpful macros for getting the address of the 32/64 bit
1809 * fields which are the same type (int / unsigned) on our platforms.
1810 */
1811#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1812#define COMPAT_NAMELEN(msg)     COMPAT_MSG(msg, msg_namelen)
1813#define COMPAT_FLAGS(msg)       COMPAT_MSG(msg, msg_flags)
1814
1815/*
1816 *      BSD sendmsg interface
1817 */
1818
1819SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1820{
1821        struct compat_msghdr __user *msg_compat =
1822            (struct compat_msghdr __user *)msg;
1823        struct socket *sock;
1824        struct sockaddr_storage address;
1825        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1826        unsigned char ctl[sizeof(struct cmsghdr) + 20]
1827            __attribute__ ((aligned(sizeof(__kernel_size_t))));
1828        /* 20 is size of ipv6_pktinfo */
1829        unsigned char *ctl_buf = ctl;
1830        struct msghdr msg_sys;
1831        int err, ctl_len, iov_size, total_len;
1832        int fput_needed;
1833
1834        err = -EFAULT;
1835        if (MSG_CMSG_COMPAT & flags) {
1836                if (get_compat_msghdr(&msg_sys, msg_compat))
1837                        return -EFAULT;
1838        }
1839        else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1840                return -EFAULT;
1841
1842        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1843        if (!sock)
1844                goto out;
1845
1846        /* do not move before msg_sys is valid */
1847        err = -EMSGSIZE;
1848        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1849                goto out_put;
1850
1851        /* Check whether to allocate the iovec area */
1852        err = -ENOMEM;
1853        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1854        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1855                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1856                if (!iov)
1857                        goto out_put;
1858        }
1859
1860        /* This will also move the address data into kernel space */
1861        if (MSG_CMSG_COMPAT & flags) {
1862                err = verify_compat_iovec(&msg_sys, iov,
1863                                          (struct sockaddr *)&address,
1864                                          VERIFY_READ);
1865        } else
1866                err = verify_iovec(&msg_sys, iov,
1867                                   (struct sockaddr *)&address,
1868                                   VERIFY_READ);
1869        if (err < 0)
1870                goto out_freeiov;
1871        total_len = err;
1872
1873        err = -ENOBUFS;
1874
1875        if (msg_sys.msg_controllen > INT_MAX)
1876                goto out_freeiov;
1877        ctl_len = msg_sys.msg_controllen;
1878        if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1879                err =
1880                    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1881                                                     sizeof(ctl));
1882                if (err)
1883                        goto out_freeiov;
1884                ctl_buf = msg_sys.msg_control;
1885                ctl_len = msg_sys.msg_controllen;
1886        } else if (ctl_len) {
1887                if (ctl_len > sizeof(ctl)) {
1888                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1889                        if (ctl_buf == NULL)
1890                                goto out_freeiov;
1891                }
1892                err = -EFAULT;
1893                /*
1894                 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1895                 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1896                 * checking falls down on this.
1897                 */
1898                if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1899                                   ctl_len))
1900                        goto out_freectl;
1901                msg_sys.msg_control = ctl_buf;
1902        }
1903        msg_sys.msg_flags = flags;
1904
1905        if (sock->file->f_flags & O_NONBLOCK)
1906                msg_sys.msg_flags |= MSG_DONTWAIT;
1907        err = sock_sendmsg(sock, &msg_sys, total_len);
1908
1909out_freectl:
1910        if (ctl_buf != ctl)
1911                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1912out_freeiov:
1913        if (iov != iovstack)
1914                sock_kfree_s(sock->sk, iov, iov_size);
1915out_put:
1916        fput_light(sock->file, fput_needed);
1917out:
1918        return err;
1919}
1920
1921/*
1922 *      BSD recvmsg interface
1923 */
1924
1925SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
1926                unsigned int, flags)
1927{
1928        struct compat_msghdr __user *msg_compat =
1929            (struct compat_msghdr __user *)msg;
1930        struct socket *sock;
1931        struct iovec iovstack[UIO_FASTIOV];
1932        struct iovec *iov = iovstack;
1933        struct msghdr msg_sys;
1934        unsigned long cmsg_ptr;
1935        int err, iov_size, total_len, len;
1936        int fput_needed;
1937
1938        /* kernel mode address */
1939        struct sockaddr_storage addr;
1940
1941        /* user mode address pointers */
1942        struct sockaddr __user *uaddr;
1943        int __user *uaddr_len;
1944
1945        if (MSG_CMSG_COMPAT & flags) {
1946                if (get_compat_msghdr(&msg_sys, msg_compat))
1947                        return -EFAULT;
1948        }
1949        else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1950                return -EFAULT;
1951
1952        sock = sockfd_lookup_light(fd, &err, &fput_needed);
1953        if (!sock)
1954                goto out;
1955
1956        err = -EMSGSIZE;
1957        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1958                goto out_put;
1959
1960        /* Check whether to allocate the iovec area */
1961        err = -ENOMEM;
1962        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1963        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1964                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1965                if (!iov)
1966                        goto out_put;
1967        }
1968
1969        /*
1970         *      Save the user-mode address (verify_iovec will change the
1971         *      kernel msghdr to use the kernel address space)
1972         */
1973
1974        uaddr = (__force void __user *)msg_sys.msg_name;
1975        uaddr_len = COMPAT_NAMELEN(msg);
1976        if (MSG_CMSG_COMPAT & flags) {
1977                err = verify_compat_iovec(&msg_sys, iov,
1978                                          (struct sockaddr *)&addr,
1979                                          VERIFY_WRITE);
1980        } else
1981                err = verify_iovec(&msg_sys, iov,
1982                                   (struct sockaddr *)&addr,
1983                                   VERIFY_WRITE);
1984        if (err < 0)
1985                goto out_freeiov;
1986        total_len = err;
1987
1988        cmsg_ptr = (unsigned long)msg_sys.msg_control;
1989        msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
1990
1991        if (sock->file->f_flags & O_NONBLOCK)
1992                flags |= MSG_DONTWAIT;
1993        err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1994        if (err < 0)
1995                goto out_freeiov;
1996        len = err;
1997
1998        if (uaddr != NULL) {
1999                err = move_addr_to_user((struct sockaddr *)&addr,
2000                                        msg_sys.msg_namelen, uaddr,
2001                                        uaddr_len);
2002                if (err < 0)
2003                        goto out_freeiov;
2004        }
2005        err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
2006                         COMPAT_FLAGS(msg));
2007        if (err)
2008                goto out_freeiov;
2009        if (MSG_CMSG_COMPAT & flags)
2010                err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
2011                                 &msg_compat->msg_controllen);
2012        else
2013                err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
2014                                 &msg->msg_controllen);
2015        if (err)
2016                goto out_freeiov;
2017        err = len;
2018
2019out_freeiov:
2020        if (iov != iovstack)
2021                sock_kfree_s(sock->sk, iov, iov_size);
2022out_put:
2023        fput_light(sock->file, fput_needed);
2024out:
2025        return err;
2026}
2027
2028#ifdef __ARCH_WANT_SYS_SOCKETCALL
2029
2030/* Argument list sizes for sys_socketcall */
2031#define AL(x) ((x) * sizeof(unsigned long))
2032static const unsigned char nargs[19]={
2033        AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2034        AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2035        AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
2036        AL(4)
2037};
2038
2039#undef AL
2040
2041/*
2042 *      System call vectors.
2043 *
2044 *      Argument checking cleaned up. Saved 20% in size.
2045 *  This function doesn't need to set the kernel lock because
2046 *  it is set by the callees.
2047 */
2048
2049SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2050{
2051        unsigned long a[6];
2052        unsigned long a0, a1;
2053        int err;
2054
2055        if (call < 1 || call > SYS_ACCEPT4)
2056                return -EINVAL;
2057
2058        /* copy_from_user should be SMP safe. */
2059        if (copy_from_user(a, args, nargs[call]))
2060                return -EFAULT;
2061
2062        audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2063
2064        a0 = a[0];
2065        a1 = a[1];
2066
2067        switch (call) {
2068        case SYS_SOCKET:
2069                err = sys_socket(a0, a1, a[2]);
2070                break;
2071        case SYS_BIND:
2072                err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2073                break;
2074        case SYS_CONNECT:
2075                err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2076                break;
2077        case SYS_LISTEN:
2078                err = sys_listen(a0, a1);
2079                break;
2080        case SYS_ACCEPT:
2081                err = sys_accept4(a0, (struct sockaddr __user *)a1,
2082                                  (int __user *)a[2], 0);
2083                break;
2084        case SYS_GETSOCKNAME:
2085                err =
2086                    sys_getsockname(a0, (struct sockaddr __user *)a1,
2087                                    (int __user *)a[2]);
2088                break;
2089        case SYS_GETPEERNAME:
2090                err =
2091                    sys_getpeername(a0, (struct sockaddr __user *)a1,
2092                                    (int __user *)a[2]);
2093                break;
2094        case SYS_SOCKETPAIR:
2095                err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2096                break;
2097        case SYS_SEND:
2098                err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2099                break;
2100        case SYS_SENDTO:
2101                err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2102                                 (struct sockaddr __user *)a[4], a[5]);
2103                break;
2104        case SYS_RECV:
2105                err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2106                break;
2107        case SYS_RECVFROM:
2108                err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2109                                   (struct sockaddr __user *)a[4],
2110                                   (int __user *)a[5]);
2111                break;
2112        case SYS_SHUTDOWN:
2113                err = sys_shutdown(a0, a1);
2114                break;
2115        case SYS_SETSOCKOPT:
2116                err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2117                break;
2118        case SYS_GETSOCKOPT:
2119                err =
2120                    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2121                                   (int __user *)a[4]);
2122                break;
2123        case SYS_SENDMSG:
2124                err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2125                break;
2126        case SYS_RECVMSG:
2127                err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2128                break;
2129        case SYS_ACCEPT4:
2130                err = sys_accept4(a0, (struct sockaddr __user *)a1,
2131                                  (int __user *)a[2], a[3]);
2132                break;
2133        default:
2134                err = -EINVAL;
2135                break;
2136        }
2137        return err;
2138}
2139
2140#endif                          /* __ARCH_WANT_SYS_SOCKETCALL */
2141
2142/**
2143 *      sock_register - add a socket protocol handler
2144 *      @ops: description of protocol
2145 *
2146 *      This function is called by a protocol handler that wants to
2147 *      advertise its address family, and have it linked into the
2148 *      socket interface. The value ops->family coresponds to the
2149 *      socket system call protocol family.
2150 */
2151int sock_register(const struct net_proto_family *ops)
2152{
2153        int err;
2154
2155        if (ops->family >= NPROTO) {
2156                printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2157                       NPROTO);
2158                return -ENOBUFS;
2159        }
2160
2161        spin_lock(&net_family_lock);
2162        if (net_families[ops->family])
2163                err = -EEXIST;
2164        else {
2165                net_families[ops->family] = ops;
2166                err = 0;
2167        }
2168        spin_unlock(&net_family_lock);
2169
2170        printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2171        return err;
2172}
2173
2174/**
2175 *      sock_unregister - remove a protocol handler
2176 *      @family: protocol family to remove
2177 *
2178 *      This function is called by a protocol handler that wants to
2179 *      remove its address family, and have it unlinked from the
2180 *      new socket creation.
2181 *
2182 *      If protocol handler is a module, then it can use module reference
2183 *      counts to protect against new references. If protocol handler is not
2184 *      a module then it needs to provide its own protection in
2185 *      the ops->create routine.
2186 */
2187void sock_unregister(int family)
2188{
2189        BUG_ON(family < 0 || family >= NPROTO);
2190
2191        spin_lock(&net_family_lock);
2192        net_families[family] = NULL;
2193        spin_unlock(&net_family_lock);
2194
2195        synchronize_rcu();
2196
2197        printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
2198}
2199
2200static int __init sock_init(void)
2201{
2202        /*
2203         *      Initialize sock SLAB cache.
2204         */
2205
2206        sk_init();
2207
2208        /*
2209         *      Initialize skbuff SLAB cache
2210         */
2211        skb_init();
2212
2213        /*
2214         *      Initialize the protocols module.
2215         */
2216
2217        init_inodecache();
2218        register_filesystem(&sock_fs_type);
2219        sock_mnt = kern_mount(&sock_fs_type);
2220
2221        /* The real protocol initialization is performed in later initcalls.
2222         */
2223
2224#ifdef CONFIG_NETFILTER
2225        netfilter_init();
2226#endif
2227
2228        return 0;
2229}
2230
2231core_initcall(sock_init);       /* early initcall */
2232
2233#ifdef CONFIG_PROC_FS
2234void socket_seq_show(struct seq_file *seq)
2235{
2236        int cpu;
2237        int counter = 0;
2238
2239        for_each_possible_cpu(cpu)
2240            counter += per_cpu(sockets_in_use, cpu);
2241
2242        /* It can be negative, by the way. 8) */
2243        if (counter < 0)
2244                counter = 0;
2245
2246        seq_printf(seq, "sockets: used %d\n", counter);
2247}
2248#endif                          /* CONFIG_PROC_FS */
2249
2250#ifdef CONFIG_COMPAT
2251static long compat_sock_ioctl(struct file *file, unsigned cmd,
2252                              unsigned long arg)
2253{
2254        struct socket *sock = file->private_data;
2255        int ret = -ENOIOCTLCMD;
2256        struct sock *sk;
2257        struct net *net;
2258
2259        sk = sock->sk;
2260        net = sock_net(sk);
2261
2262        if (sock->ops->compat_ioctl)
2263                ret = sock->ops->compat_ioctl(sock, cmd, arg);
2264
2265        if (ret == -ENOIOCTLCMD &&
2266            (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2267                ret = compat_wext_handle_ioctl(net, cmd, arg);
2268
2269        return ret;
2270}
2271#endif
2272
2273int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2274{
2275        return sock->ops->bind(sock, addr, addrlen);
2276}
2277
2278int kernel_listen(struct socket *sock, int backlog)
2279{
2280        return sock->ops->listen(sock, backlog);
2281}
2282
2283int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2284{
2285        struct sock *sk = sock->sk;
2286        int err;
2287
2288        err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2289                               newsock);
2290        if (err < 0)
2291                goto done;
2292
2293        err = sock->ops->accept(sock, *newsock, flags);
2294        if (err < 0) {
2295                sock_release(*newsock);
2296                *newsock = NULL;
2297                goto done;
2298        }
2299
2300        (*newsock)->ops = sock->ops;
2301        __module_get((*newsock)->ops->owner);
2302
2303done:
2304        return err;
2305}
2306
2307int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2308                   int flags)
2309{
2310        return sock->ops->connect(sock, addr, addrlen, flags);
2311}
2312
2313int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2314                         int *addrlen)
2315{
2316        return sock->ops->getname(sock, addr, addrlen, 0);
2317}
2318
2319int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2320                         int *addrlen)
2321{
2322        return sock->ops->getname(sock, addr, addrlen, 1);
2323}
2324
2325int kernel_getsockopt(struct socket *sock, int level, int optname,
2326                        char *optval, int *optlen)
2327{
2328        mm_segment_t oldfs = get_fs();
2329        int err;
2330
2331        set_fs(KERNEL_DS);
2332        if (level == SOL_SOCKET)
2333                err = sock_getsockopt(sock, level, optname, optval, optlen);
2334        else
2335                err = sock->ops->getsockopt(sock, level, optname, optval,
2336                                            optlen);
2337        set_fs(oldfs);
2338        return err;
2339}
2340
2341int kernel_setsockopt(struct socket *sock, int level, int optname,
2342                        char *optval, int optlen)
2343{
2344        mm_segment_t oldfs = get_fs();
2345        int err;
2346
2347        set_fs(KERNEL_DS);
2348        if (level == SOL_SOCKET)
2349                err = sock_setsockopt(sock, level, optname, optval, optlen);
2350        else
2351                err = sock->ops->setsockopt(sock, level, optname, optval,
2352                                            optlen);
2353        set_fs(oldfs);
2354        return err;
2355}
2356
2357int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2358                    size_t size, int flags)
2359{
2360        if (sock->ops->sendpage)
2361                return sock->ops->sendpage(sock, page, offset, size, flags);
2362
2363        return sock_no_sendpage(sock, page, offset, size, flags);
2364}
2365
2366int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2367{
2368        mm_segment_t oldfs = get_fs();
2369        int err;
2370
2371        set_fs(KERNEL_DS);
2372        err = sock->ops->ioctl(sock, cmd, arg);
2373        set_fs(oldfs);
2374
2375        return err;
2376}
2377
2378int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2379{
2380        return sock->ops->shutdown(sock, how);
2381}
2382
2383EXPORT_SYMBOL(sock_create);
2384EXPORT_SYMBOL(sock_create_kern);
2385EXPORT_SYMBOL(sock_create_lite);
2386EXPORT_SYMBOL(sock_map_fd);
2387EXPORT_SYMBOL(sock_recvmsg);
2388EXPORT_SYMBOL(sock_register);
2389EXPORT_SYMBOL(sock_release);
2390EXPORT_SYMBOL(sock_sendmsg);
2391EXPORT_SYMBOL(sock_unregister);
2392EXPORT_SYMBOL(sock_wake_async);
2393EXPORT_SYMBOL(sockfd_lookup);
2394EXPORT_SYMBOL(kernel_sendmsg);
2395EXPORT_SYMBOL(kernel_recvmsg);
2396EXPORT_SYMBOL(kernel_bind);
2397EXPORT_SYMBOL(kernel_listen);
2398EXPORT_SYMBOL(kernel_accept);
2399EXPORT_SYMBOL(kernel_connect);
2400EXPORT_SYMBOL(kernel_getsockname);
2401EXPORT_SYMBOL(kernel_getpeername);
2402EXPORT_SYMBOL(kernel_getsockopt);
2403EXPORT_SYMBOL(kernel_setsockopt);
2404EXPORT_SYMBOL(kernel_sendpage);
2405EXPORT_SYMBOL(kernel_sock_ioctl);
2406EXPORT_SYMBOL(kernel_sock_shutdown);
2407