linux-old/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro, <bir7@leland.Stanford.Edu>
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45 *              Tigran Aivazian :       Made listen(2) backlog sanity checks 
  46 *                                      protocol-independent
  47 *
  48 *
  49 *              This program is free software; you can redistribute it and/or
  50 *              modify it under the terms of the GNU General Public License
  51 *              as published by the Free Software Foundation; either version
  52 *              2 of the License, or (at your option) any later version.
  53 *
  54 *
  55 *      This module is effectively the top level interface to the BSD socket
  56 *      paradigm. 
  57 *
  58 */
  59
  60#include <linux/config.h>
  61#include <linux/mm.h>
  62#include <linux/smp_lock.h>
  63#include <linux/socket.h>
  64#include <linux/file.h>
  65#include <linux/net.h>
  66#include <linux/interrupt.h>
  67#include <linux/netdevice.h>
  68#include <linux/proc_fs.h>
  69#include <linux/wanrouter.h>
  70#include <linux/netlink.h>
  71#include <linux/rtnetlink.h>
  72#include <linux/init.h>
  73#include <linux/poll.h>
  74#include <linux/cache.h>
  75#include <linux/module.h>
  76#include <linux/highmem.h>
  77
  78#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
  79#include <linux/kmod.h>
  80#endif
  81
  82#include <asm/uaccess.h>
  83
  84#include <net/sock.h>
  85#include <net/scm.h>
  86#include <linux/netfilter.h>
  87
  88static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
  89static ssize_t sock_read(struct file *file, char *buf,
  90                         size_t size, loff_t *ppos);
  91static ssize_t sock_write(struct file *file, const char *buf,
  92                          size_t size, loff_t *ppos);
  93static int sock_mmap(struct file *file, struct vm_area_struct * vma);
  94
  95static int sock_close(struct inode *inode, struct file *file);
  96static unsigned int sock_poll(struct file *file,
  97                              struct poll_table_struct *wait);
  98static int sock_ioctl(struct inode *inode, struct file *file,
  99                      unsigned int cmd, unsigned long arg);
 100static int sock_fasync(int fd, struct file *filp, int on);
 101static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 102                          unsigned long count, loff_t *ppos);
 103static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 104                          unsigned long count, loff_t *ppos);
 105static ssize_t sock_sendpage(struct file *file, struct page *page,
 106                             int offset, size_t size, loff_t *ppos, int more);
 107
 108
 109/*
 110 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 111 *      in the operation structures but are done directly via the socketcall() multiplexor.
 112 */
 113
 114static struct file_operations socket_file_ops = {
 115        llseek:         no_llseek,
 116        read:           sock_read,
 117        write:          sock_write,
 118        poll:           sock_poll,
 119        ioctl:          sock_ioctl,
 120        mmap:           sock_mmap,
 121        open:           sock_no_open,   /* special open code to disallow open via /proc */
 122        release:        sock_close,
 123        fasync:         sock_fasync,
 124        readv:          sock_readv,
 125        writev:         sock_writev,
 126        sendpage:       sock_sendpage
 127};
 128
 129/*
 130 *      The protocol list. Each protocol is registered in here.
 131 */
 132
 133static struct net_proto_family *net_families[NPROTO];
 134
 135#ifdef CONFIG_SMP
 136static atomic_t net_family_lockct = ATOMIC_INIT(0);
 137static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
 138
 139/* The strategy is: modifications net_family vector are short, do not
 140   sleep and veeery rare, but read access should be free of any exclusive
 141   locks.
 142 */
 143
 144static void net_family_write_lock(void)
 145{
 146        spin_lock(&net_family_lock);
 147        while (atomic_read(&net_family_lockct) != 0) {
 148                spin_unlock(&net_family_lock);
 149
 150                yield();
 151
 152                spin_lock(&net_family_lock);
 153        }
 154}
 155
 156static __inline__ void net_family_write_unlock(void)
 157{
 158        spin_unlock(&net_family_lock);
 159}
 160
 161static __inline__ void net_family_read_lock(void)
 162{
 163        atomic_inc(&net_family_lockct);
 164        spin_unlock_wait(&net_family_lock);
 165}
 166
 167static __inline__ void net_family_read_unlock(void)
 168{
 169        atomic_dec(&net_family_lockct);
 170}
 171
 172#else
 173#define net_family_write_lock() do { } while(0)
 174#define net_family_write_unlock() do { } while(0)
 175#define net_family_read_lock() do { } while(0)
 176#define net_family_read_unlock() do { } while(0)
 177#endif
 178
 179
 180/*
 181 *      Statistics counters of the socket lists
 182 */
 183
 184static union {
 185        int     counter;
 186        char    __pad[SMP_CACHE_BYTES];
 187} sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
 188
 189/*
 190 *      Support routines. Move socket addresses back and forth across the kernel/user
 191 *      divide and look after the messy bits.
 192 */
 193
 194#define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
 195                                           16 for IP, 16 for IPX,
 196                                           24 for IPv6,
 197                                           about 80 for AX.25 
 198                                           must be at least one bigger than
 199                                           the AF_UNIX size (see net/unix/af_unix.c
 200                                           :unix_mkname()).  
 201                                         */
 202                                         
 203/**
 204 *      move_addr_to_kernel     -       copy a socket address into kernel space
 205 *      @uaddr: Address in user space
 206 *      @kaddr: Address in kernel space
 207 *      @ulen: Length in user space
 208 *
 209 *      The address is copied into kernel space. If the provided address is
 210 *      too long an error code of -EINVAL is returned. If the copy gives
 211 *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 212 */
 213
 214int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 215{
 216        if(ulen<0||ulen>MAX_SOCK_ADDR)
 217                return -EINVAL;
 218        if(ulen==0)
 219                return 0;
 220        if(copy_from_user(kaddr,uaddr,ulen))
 221                return -EFAULT;
 222        return 0;
 223}
 224
 225/**
 226 *      move_addr_to_user       -       copy an address to user space
 227 *      @kaddr: kernel space address
 228 *      @klen: length of address in kernel
 229 *      @uaddr: user space address
 230 *      @ulen: pointer to user length field
 231 *
 232 *      The value pointed to by ulen on entry is the buffer length available.
 233 *      This is overwritten with the buffer space used. -EINVAL is returned
 234 *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 235 *      is returned if either the buffer or the length field are not
 236 *      accessible.
 237 *      After copying the data up to the limit the user specifies, the true
 238 *      length of the data is written over the length limit the user
 239 *      specified. Zero is returned for a success.
 240 */
 241 
 242int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 243{
 244        int err;
 245        int len;
 246
 247        if((err=get_user(len, ulen)))
 248                return err;
 249        if(len>klen)
 250                len=klen;
 251        if(len<0 || len> MAX_SOCK_ADDR)
 252                return -EINVAL;
 253        if(len)
 254        {
 255                if(copy_to_user(uaddr,kaddr,len))
 256                        return -EFAULT;
 257        }
 258        /*
 259         *      "fromlen shall refer to the value before truncation.."
 260         *                      1003.1g
 261         */
 262        return __put_user(klen, ulen);
 263}
 264
 265#define SOCKFS_MAGIC 0x534F434B
 266static int sockfs_statfs(struct super_block *sb, struct statfs *buf)
 267{
 268        buf->f_type = SOCKFS_MAGIC;
 269        buf->f_bsize = 1024;
 270        buf->f_namelen = 255;
 271        return 0;
 272}
 273
 274static struct super_operations sockfs_ops = {
 275        statfs:         sockfs_statfs,
 276};
 277
 278static struct super_block * sockfs_read_super(struct super_block *sb, void *data, int silent)
 279{
 280        struct inode *root = new_inode(sb);
 281        if (!root)
 282                return NULL;
 283        root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 284        root->i_uid = root->i_gid = 0;
 285        root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
 286        sb->s_blocksize = 1024;
 287        sb->s_blocksize_bits = 10;
 288        sb->s_magic = SOCKFS_MAGIC;
 289        sb->s_op        = &sockfs_ops;
 290        sb->s_root = d_alloc(NULL, &(const struct qstr) { "socket:", 7, 0 });
 291        if (!sb->s_root) {
 292                iput(root);
 293                return NULL;
 294        }
 295        sb->s_root->d_sb = sb;
 296        sb->s_root->d_parent = sb->s_root;
 297        d_instantiate(sb->s_root, root);
 298        return sb;
 299}
 300
 301static struct vfsmount *sock_mnt;
 302static DECLARE_FSTYPE(sock_fs_type, "sockfs", sockfs_read_super, FS_NOMOUNT);
 303static int sockfs_delete_dentry(struct dentry *dentry)
 304{
 305        return 1;
 306}
 307static struct dentry_operations sockfs_dentry_operations = {
 308        d_delete:       sockfs_delete_dentry,
 309};
 310
 311/*
 312 *      Obtains the first available file descriptor and sets it up for use.
 313 *
 314 *      This function creates file structure and maps it to fd space
 315 *      of current process. On success it returns file descriptor
 316 *      and file struct implicitly stored in sock->file.
 317 *      Note that another thread may close file descriptor before we return
 318 *      from this function. We use the fact that now we do not refer
 319 *      to socket after mapping. If one day we will need it, this
 320 *      function will increment ref. count on file by 1.
 321 *
 322 *      In any case returned fd MAY BE not valid!
 323 *      This race condition is unavoidable
 324 *      with shared fd spaces, we cannot solve it inside kernel,
 325 *      but we take care of internal coherence yet.
 326 */
 327
 328int sock_map_fd(struct socket *sock)
 329{
 330        int fd;
 331        struct qstr this;
 332        char name[32];
 333
 334        /*
 335         *      Find a file descriptor suitable for return to the user. 
 336         */
 337
 338        fd = get_unused_fd();
 339        if (fd >= 0) {
 340                struct file *file = get_empty_filp();
 341
 342                if (!file) {
 343                        put_unused_fd(fd);
 344                        fd = -ENFILE;
 345                        goto out;
 346                }
 347
 348                sprintf(name, "[%lu]", sock->inode->i_ino);
 349                this.name = name;
 350                this.len = strlen(name);
 351                this.hash = sock->inode->i_ino;
 352
 353                file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
 354                if (!file->f_dentry) {
 355                        put_filp(file);
 356                        put_unused_fd(fd);
 357                        fd = -ENOMEM;
 358                        goto out;
 359                }
 360                file->f_dentry->d_op = &sockfs_dentry_operations;
 361                d_add(file->f_dentry, sock->inode);
 362                file->f_vfsmnt = mntget(sock_mnt);
 363
 364                sock->file = file;
 365                file->f_op = sock->inode->i_fop = &socket_file_ops;
 366                file->f_mode = 3;
 367                file->f_flags = O_RDWR;
 368                file->f_pos = 0;
 369                fd_install(fd, file);
 370        }
 371
 372out:
 373        return fd;
 374}
 375
 376extern __inline__ struct socket *socki_lookup(struct inode *inode)
 377{
 378        return &inode->u.socket_i;
 379}
 380
 381/**
 382 *      sockfd_lookup   -       Go from a file number to its socket slot
 383 *      @fd: file handle
 384 *      @err: pointer to an error code return
 385 *
 386 *      The file handle passed in is locked and the socket it is bound
 387 *      too is returned. If an error occurs the err pointer is overwritten
 388 *      with a negative errno code and NULL is returned. The function checks
 389 *      for both invalid handles and passing a handle which is not a socket.
 390 *
 391 *      On a success the socket object pointer is returned.
 392 */
 393
 394struct socket *sockfd_lookup(int fd, int *err)
 395{
 396        struct file *file;
 397        struct inode *inode;
 398        struct socket *sock;
 399
 400        if (!(file = fget(fd)))
 401        {
 402                *err = -EBADF;
 403                return NULL;
 404        }
 405
 406        inode = file->f_dentry->d_inode;
 407        if (!inode->i_sock || !(sock = socki_lookup(inode)))
 408        {
 409                *err = -ENOTSOCK;
 410                fput(file);
 411                return NULL;
 412        }
 413
 414        if (sock->file != file) {
 415                printk(KERN_ERR "socki_lookup: socket file changed!\n");
 416                sock->file = file;
 417        }
 418        return sock;
 419}
 420
 421extern __inline__ void sockfd_put(struct socket *sock)
 422{
 423        fput(sock->file);
 424}
 425
 426/**
 427 *      sock_alloc      -       allocate a socket
 428 *      
 429 *      Allocate a new inode and socket object. The two are bound together
 430 *      and initialised. The socket is then returned. If we are out of inodes
 431 *      NULL is returned.
 432 */
 433
 434struct socket *sock_alloc(void)
 435{
 436        struct inode * inode;
 437        struct socket * sock;
 438
 439        inode = new_inode(sock_mnt->mnt_sb);
 440        if (!inode)
 441                return NULL;
 442
 443        inode->i_dev = NODEV;
 444        sock = socki_lookup(inode);
 445
 446        inode->i_mode = S_IFSOCK|S_IRWXUGO;
 447        inode->i_sock = 1;
 448        inode->i_uid = current->fsuid;
 449        inode->i_gid = current->fsgid;
 450
 451        sock->inode = inode;
 452        init_waitqueue_head(&sock->wait);
 453        sock->fasync_list = NULL;
 454        sock->state = SS_UNCONNECTED;
 455        sock->flags = 0;
 456        sock->ops = NULL;
 457        sock->sk = NULL;
 458        sock->file = NULL;
 459
 460        sockets_in_use[smp_processor_id()].counter++;
 461        return sock;
 462}
 463
 464/*
 465 *      In theory you can't get an open on this inode, but /proc provides
 466 *      a back door. Remember to keep it shut otherwise you'll let the
 467 *      creepy crawlies in.
 468 */
 469  
 470static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 471{
 472        return -ENXIO;
 473}
 474
 475/**
 476 *      sock_release    -       close a socket
 477 *      @sock: socket to close
 478 *
 479 *      The socket is released from the protocol stack if it has a release
 480 *      callback, and the inode is then released if the socket is bound to
 481 *      an inode not a file. 
 482 */
 483 
 484void sock_release(struct socket *sock)
 485{
 486        if (sock->ops) 
 487                sock->ops->release(sock);
 488
 489        if (sock->fasync_list)
 490                printk(KERN_ERR "sock_release: fasync list not empty!\n");
 491
 492        sockets_in_use[smp_processor_id()].counter--;
 493        if (!sock->file) {
 494                iput(sock->inode);
 495                return;
 496        }
 497        sock->file=NULL;
 498}
 499
 500int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 501{
 502        int err;
 503        struct scm_cookie scm;
 504
 505        err = scm_send(sock, msg, &scm);
 506        if (err >= 0) {
 507                err = sock->ops->sendmsg(sock, msg, size, &scm);
 508                scm_destroy(&scm);
 509        }
 510        return err;
 511}
 512
 513int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 514{
 515        struct scm_cookie scm;
 516
 517        memset(&scm, 0, sizeof(scm));
 518
 519        size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
 520        if (size >= 0)
 521                scm_recv(sock, msg, &scm, flags);
 522
 523        return size;
 524}
 525
 526
 527/*
 528 *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
 529 *      area ubuf...ubuf+size-1 is writable before asking the protocol.
 530 */
 531
 532static ssize_t sock_read(struct file *file, char *ubuf,
 533                         size_t size, loff_t *ppos)
 534{
 535        struct socket *sock;
 536        struct iovec iov;
 537        struct msghdr msg;
 538        int flags;
 539
 540        if (ppos != &file->f_pos)
 541                return -ESPIPE;
 542        if (size==0)            /* Match SYS5 behaviour */
 543                return 0;
 544
 545        sock = socki_lookup(file->f_dentry->d_inode); 
 546
 547        msg.msg_name=NULL;
 548        msg.msg_namelen=0;
 549        msg.msg_iov=&iov;
 550        msg.msg_iovlen=1;
 551        msg.msg_control=NULL;
 552        msg.msg_controllen=0;
 553        iov.iov_base=ubuf;
 554        iov.iov_len=size;
 555        flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 556
 557        return sock_recvmsg(sock, &msg, size, flags);
 558}
 559
 560
 561/*
 562 *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1
 563 *      is readable by the user process.
 564 */
 565
 566static ssize_t sock_write(struct file *file, const char *ubuf,
 567                          size_t size, loff_t *ppos)
 568{
 569        struct socket *sock;
 570        struct msghdr msg;
 571        struct iovec iov;
 572        
 573        if (ppos != &file->f_pos)
 574                return -ESPIPE;
 575        if(size==0)             /* Match SYS5 behaviour */
 576                return 0;
 577
 578        sock = socki_lookup(file->f_dentry->d_inode); 
 579
 580        msg.msg_name=NULL;
 581        msg.msg_namelen=0;
 582        msg.msg_iov=&iov;
 583        msg.msg_iovlen=1;
 584        msg.msg_control=NULL;
 585        msg.msg_controllen=0;
 586        msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 587        if (sock->type == SOCK_SEQPACKET)
 588                msg.msg_flags |= MSG_EOR;
 589        iov.iov_base=(void *)ubuf;
 590        iov.iov_len=size;
 591        
 592        return sock_sendmsg(sock, &msg, size);
 593}
 594
 595ssize_t sock_sendpage(struct file *file, struct page *page,
 596                      int offset, size_t size, loff_t *ppos, int more)
 597{
 598        struct socket *sock;
 599        int flags;
 600
 601        if (ppos != &file->f_pos)
 602                return -ESPIPE;
 603
 604        sock = socki_lookup(file->f_dentry->d_inode);
 605
 606        flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 607        if (more)
 608                flags |= MSG_MORE;
 609
 610        return sock->ops->sendpage(sock, page, offset, size, flags);
 611}
 612
 613int sock_readv_writev(int type, struct inode * inode, struct file * file,
 614                      const struct iovec * iov, long count, long size)
 615{
 616        struct msghdr msg;
 617        struct socket *sock;
 618
 619        sock = socki_lookup(inode);
 620
 621        msg.msg_name = NULL;
 622        msg.msg_namelen = 0;
 623        msg.msg_control = NULL;
 624        msg.msg_controllen = 0;
 625        msg.msg_iov = (struct iovec *) iov;
 626        msg.msg_iovlen = count;
 627        msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 628
 629        /* read() does a VERIFY_WRITE */
 630        if (type == VERIFY_WRITE)
 631                return sock_recvmsg(sock, &msg, size, msg.msg_flags);
 632
 633        if (sock->type == SOCK_SEQPACKET)
 634                msg.msg_flags |= MSG_EOR;
 635
 636        return sock_sendmsg(sock, &msg, size);
 637}
 638
 639static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 640                          unsigned long count, loff_t *ppos)
 641{
 642        size_t tot_len = 0;
 643        int i;
 644        for (i = 0 ; i < count ; i++)
 645                tot_len += vector[i].iov_len;
 646        return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode,
 647                                 file, vector, count, tot_len);
 648}
 649        
 650static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 651                           unsigned long count, loff_t *ppos)
 652{
 653        size_t tot_len = 0;
 654        int i;
 655        for (i = 0 ; i < count ; i++)
 656                tot_len += vector[i].iov_len;
 657        return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode,
 658                                 file, vector, count, tot_len);
 659}
 660
 661/*
 662 *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
 663 *      with it - that's up to the protocol still.
 664 */
 665
 666int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 667           unsigned long arg)
 668{
 669        struct socket *sock;
 670        int err;
 671
 672        unlock_kernel();
 673        sock = socki_lookup(inode);
 674        err = sock->ops->ioctl(sock, cmd, arg);
 675        lock_kernel();
 676
 677        return err;
 678}
 679
 680
 681/* No kernel lock held - perfect */
 682static unsigned int sock_poll(struct file *file, poll_table * wait)
 683{
 684        struct socket *sock;
 685
 686        /*
 687         *      We can't return errors to poll, so it's either yes or no. 
 688         */
 689        sock = socki_lookup(file->f_dentry->d_inode);
 690        return sock->ops->poll(file, sock, wait);
 691}
 692
 693static int sock_mmap(struct file * file, struct vm_area_struct * vma)
 694{
 695        struct socket *sock = socki_lookup(file->f_dentry->d_inode);
 696
 697        return sock->ops->mmap(file, sock, vma);
 698}
 699
 700int sock_close(struct inode *inode, struct file *filp)
 701{
 702        /*
 703         *      It was possible the inode is NULL we were 
 704         *      closing an unfinished socket. 
 705         */
 706
 707        if (!inode)
 708        {
 709                printk(KERN_DEBUG "sock_close: NULL inode\n");
 710                return 0;
 711        }
 712        sock_fasync(-1, filp, 0);
 713        sock_release(socki_lookup(inode));
 714        return 0;
 715}
 716
 717/*
 718 *      Update the socket async list
 719 *
 720 *      Fasync_list locking strategy.
 721 *
 722 *      1. fasync_list is modified only under process context socket lock
 723 *         i.e. under semaphore.
 724 *      2. fasync_list is used under read_lock(&sk->callback_lock)
 725 *         or under socket lock.
 726 *      3. fasync_list can be used from softirq context, so that
 727 *         modification under socket lock have to be enhanced with
 728 *         write_lock_bh(&sk->callback_lock).
 729 *                                                      --ANK (990710)
 730 */
 731
 732static int sock_fasync(int fd, struct file *filp, int on)
 733{
 734        struct fasync_struct *fa, *fna=NULL, **prev;
 735        struct socket *sock;
 736        struct sock *sk;
 737
 738        if (on)
 739        {
 740                fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 741                if(fna==NULL)
 742                        return -ENOMEM;
 743        }
 744
 745        sock = socki_lookup(filp->f_dentry->d_inode);
 746        
 747        if ((sk=sock->sk) == NULL) {
 748                if (fna)
 749                        kfree(fna);
 750                return -EINVAL;
 751        }
 752
 753        lock_sock(sk);
 754
 755        prev=&(sock->fasync_list);
 756
 757        for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
 758                if (fa->fa_file==filp)
 759                        break;
 760
 761        if(on)
 762        {
 763                if(fa!=NULL)
 764                {
 765                        write_lock_bh(&sk->callback_lock);
 766                        fa->fa_fd=fd;
 767                        write_unlock_bh(&sk->callback_lock);
 768
 769                        kfree(fna);
 770                        goto out;
 771                }
 772                fna->fa_file=filp;
 773                fna->fa_fd=fd;
 774                fna->magic=FASYNC_MAGIC;
 775                fna->fa_next=sock->fasync_list;
 776                write_lock_bh(&sk->callback_lock);
 777                sock->fasync_list=fna;
 778                write_unlock_bh(&sk->callback_lock);
 779        }
 780        else
 781        {
 782                if (fa!=NULL)
 783                {
 784                        write_lock_bh(&sk->callback_lock);
 785                        *prev=fa->fa_next;
 786                        write_unlock_bh(&sk->callback_lock);
 787                        kfree(fa);
 788                }
 789        }
 790
 791out:
 792        release_sock(sock->sk);
 793        return 0;
 794}
 795
 796/* This function may be called only under socket lock or callback_lock */
 797
 798int sock_wake_async(struct socket *sock, int how, int band)
 799{
 800        if (!sock || !sock->fasync_list)
 801                return -1;
 802        switch (how)
 803        {
 804        case 1:
 805                
 806                if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 807                        break;
 808                goto call_kill;
 809        case 2:
 810                if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
 811                        break;
 812                /* fall through */
 813        case 0:
 814        call_kill:
 815                __kill_fasync(sock->fasync_list, SIGIO, band);
 816                break;
 817        case 3:
 818                __kill_fasync(sock->fasync_list, SIGURG, band);
 819        }
 820        return 0;
 821}
 822
 823
 824int sock_create(int family, int type, int protocol, struct socket **res)
 825{
 826        int i;
 827        struct socket *sock;
 828
 829        /*
 830         *      Check protocol is in range
 831         */
 832        if (family < 0 || family >= NPROTO)
 833                return -EAFNOSUPPORT;
 834        if (type < 0 || type >= SOCK_MAX)
 835                return -EINVAL;
 836
 837        /* Compatibility.
 838
 839           This uglymoron is moved from INET layer to here to avoid
 840           deadlock in module load.
 841         */
 842        if (family == PF_INET && type == SOCK_PACKET) {
 843                static int warned; 
 844                if (!warned) {
 845                        warned = 1;
 846                        printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
 847                }
 848                family = PF_PACKET;
 849        }
 850                
 851#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 852        /* Attempt to load a protocol module if the find failed. 
 853         * 
 854         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
 855         * requested real, full-featured networking support upon configuration.
 856         * Otherwise module support will break!
 857         */
 858        if (net_families[family]==NULL)
 859        {
 860                char module_name[30];
 861                sprintf(module_name,"net-pf-%d",family);
 862                request_module(module_name);
 863        }
 864#endif
 865
 866        net_family_read_lock();
 867        if (net_families[family] == NULL) {
 868                i = -EAFNOSUPPORT;
 869                goto out;
 870        }
 871
 872/*
 873 *      Allocate the socket and allow the family to set things up. if
 874 *      the protocol is 0, the family is instructed to select an appropriate
 875 *      default.
 876 */
 877
 878        if (!(sock = sock_alloc())) 
 879        {
 880                printk(KERN_WARNING "socket: no more sockets\n");
 881                i = -ENFILE;            /* Not exactly a match, but its the
 882                                           closest posix thing */
 883                goto out;
 884        }
 885
 886        sock->type  = type;
 887
 888        if ((i = net_families[family]->create(sock, protocol)) < 0) 
 889        {
 890                sock_release(sock);
 891                goto out;
 892        }
 893
 894        *res = sock;
 895
 896out:
 897        net_family_read_unlock();
 898        return i;
 899}
 900
 901asmlinkage long sys_socket(int family, int type, int protocol)
 902{
 903        int retval;
 904        struct socket *sock;
 905
 906        retval = sock_create(family, type, protocol, &sock);
 907        if (retval < 0)
 908                goto out;
 909
 910        retval = sock_map_fd(sock);
 911        if (retval < 0)
 912                goto out_release;
 913
 914out:
 915        /* It may be already another descriptor 8) Not kernel problem. */
 916        return retval;
 917
 918out_release:
 919        sock_release(sock);
 920        return retval;
 921}
 922
 923/*
 924 *      Create a pair of connected sockets.
 925 */
 926
 927asmlinkage long sys_socketpair(int family, int type, int protocol, int usockvec[2])
 928{
 929        struct socket *sock1, *sock2;
 930        int fd1, fd2, err;
 931
 932        /*
 933         * Obtain the first socket and check if the underlying protocol
 934         * supports the socketpair call.
 935         */
 936
 937        err = sock_create(family, type, protocol, &sock1);
 938        if (err < 0)
 939                goto out;
 940
 941        err = sock_create(family, type, protocol, &sock2);
 942        if (err < 0)
 943                goto out_release_1;
 944
 945        err = sock1->ops->socketpair(sock1, sock2);
 946        if (err < 0) 
 947                goto out_release_both;
 948
 949        fd1 = fd2 = -1;
 950
 951        err = sock_map_fd(sock1);
 952        if (err < 0)
 953                goto out_release_both;
 954        fd1 = err;
 955
 956        err = sock_map_fd(sock2);
 957        if (err < 0)
 958                goto out_close_1;
 959        fd2 = err;
 960
 961        /* fd1 and fd2 may be already another descriptors.
 962         * Not kernel problem.
 963         */
 964
 965        err = put_user(fd1, &usockvec[0]); 
 966        if (!err)
 967                err = put_user(fd2, &usockvec[1]);
 968        if (!err)
 969                return 0;
 970
 971        sys_close(fd2);
 972        sys_close(fd1);
 973        return err;
 974
 975out_close_1:
 976        sock_release(sock2);
 977        sys_close(fd1);
 978        return err;
 979
 980out_release_both:
 981        sock_release(sock2);
 982out_release_1:
 983        sock_release(sock1);
 984out:
 985        return err;
 986}
 987
 988
 989/*
 990 *      Bind a name to a socket. Nothing much to do here since it's
 991 *      the protocol's responsibility to handle the local address.
 992 *
 993 *      We move the socket address to kernel space before we call
 994 *      the protocol layer (having also checked the address is ok).
 995 */
 996
 997asmlinkage long sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
 998{
 999        struct socket *sock;
1000        char address[MAX_SOCK_ADDR];
1001        int err;
1002
1003        if((sock = sockfd_lookup(fd,&err))!=NULL)
1004        {
1005                if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
1006                        err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
1007                sockfd_put(sock);
1008        }                       
1009        return err;
1010}
1011
1012
1013/*
1014 *      Perform a listen. Basically, we allow the protocol to do anything
1015 *      necessary for a listen, and if that works, we mark the socket as
1016 *      ready for listening.
1017 */
1018
1019int sysctl_somaxconn = SOMAXCONN;
1020
1021asmlinkage long sys_listen(int fd, int backlog)
1022{
1023        struct socket *sock;
1024        int err;
1025        
1026        if ((sock = sockfd_lookup(fd, &err)) != NULL) {
1027                if ((unsigned) backlog > sysctl_somaxconn)
1028                        backlog = sysctl_somaxconn;
1029                err=sock->ops->listen(sock, backlog);
1030                sockfd_put(sock);
1031        }
1032        return err;
1033}
1034
1035
1036/*
1037 *      For accept, we attempt to create a new socket, set up the link
1038 *      with the client, wake up the client, then return the new
1039 *      connected fd. We collect the address of the connector in kernel
1040 *      space and move it to user at the very end. This is unclean because
1041 *      we open the socket then return an error.
1042 *
1043 *      1003.1g adds the ability to recvmsg() to query connection pending
1044 *      status to recvmsg. We need to add that support in a way thats
1045 *      clean when we restucture accept also.
1046 */
1047
1048asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
1049{
1050        struct socket *sock, *newsock;
1051        int err, len;
1052        char address[MAX_SOCK_ADDR];
1053
1054        sock = sockfd_lookup(fd, &err);
1055        if (!sock)
1056                goto out;
1057
1058        err = -ENFILE;
1059        if (!(newsock = sock_alloc())) 
1060                goto out_put;
1061
1062        newsock->type = sock->type;
1063        newsock->ops = sock->ops;
1064
1065        err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1066        if (err < 0)
1067                goto out_release;
1068
1069        if (upeer_sockaddr) {
1070                if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
1071                        err = -ECONNABORTED;
1072                        goto out_release;
1073                }
1074                err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
1075                if (err < 0)
1076                        goto out_release;
1077        }
1078
1079        /* File flags are not inherited via accept() unlike another OSes. */
1080
1081        if ((err = sock_map_fd(newsock)) < 0)
1082                goto out_release;
1083
1084out_put:
1085        sockfd_put(sock);
1086out:
1087        return err;
1088
1089out_release:
1090        sock_release(newsock);
1091        goto out_put;
1092}
1093
1094
1095/*
1096 *      Attempt to connect to a socket with the server address.  The address
1097 *      is in user space so we verify it is OK and move it to kernel space.
1098 *
1099 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1100 *      break bindings
1101 *
1102 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1103 *      other SEQPACKET protocols that take time to connect() as it doesn't
1104 *      include the -EINPROGRESS status for such sockets.
1105 */
1106
1107asmlinkage long sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
1108{
1109        struct socket *sock;
1110        char address[MAX_SOCK_ADDR];
1111        int err;
1112
1113        sock = sockfd_lookup(fd, &err);
1114        if (!sock)
1115                goto out;
1116        err = move_addr_to_kernel(uservaddr, addrlen, address);
1117        if (err < 0)
1118                goto out_put;
1119        err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
1120                                 sock->file->f_flags);
1121out_put:
1122        sockfd_put(sock);
1123out:
1124        return err;
1125}
1126
1127/*
1128 *      Get the local address ('name') of a socket object. Move the obtained
1129 *      name to user space.
1130 */
1131
1132asmlinkage long sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1133{
1134        struct socket *sock;
1135        char address[MAX_SOCK_ADDR];
1136        int len, err;
1137        
1138        sock = sockfd_lookup(fd, &err);
1139        if (!sock)
1140                goto out;
1141        err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1142        if (err)
1143                goto out_put;
1144        err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1145
1146out_put:
1147        sockfd_put(sock);
1148out:
1149        return err;
1150}
1151
1152/*
1153 *      Get the remote address ('name') of a socket object. Move the obtained
1154 *      name to user space.
1155 */
1156
1157asmlinkage long sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1158{
1159        struct socket *sock;
1160        char address[MAX_SOCK_ADDR];
1161        int len, err;
1162
1163        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1164        {
1165                err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
1166                if (!err)
1167                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
1168                sockfd_put(sock);
1169        }
1170        return err;
1171}
1172
1173/*
1174 *      Send a datagram to a given address. We move the address into kernel
1175 *      space and check the user space data area is readable before invoking
1176 *      the protocol.
1177 */
1178
1179asmlinkage long sys_sendto(int fd, void * buff, size_t len, unsigned flags,
1180                           struct sockaddr *addr, int addr_len)
1181{
1182        struct socket *sock;
1183        char address[MAX_SOCK_ADDR];
1184        int err;
1185        struct msghdr msg;
1186        struct iovec iov;
1187        
1188        sock = sockfd_lookup(fd, &err);
1189        if (!sock)
1190                goto out;
1191        iov.iov_base=buff;
1192        iov.iov_len=len;
1193        msg.msg_name=NULL;
1194        msg.msg_iov=&iov;
1195        msg.msg_iovlen=1;
1196        msg.msg_control=NULL;
1197        msg.msg_controllen=0;
1198        msg.msg_namelen=0;
1199        if(addr)
1200        {
1201                err = move_addr_to_kernel(addr, addr_len, address);
1202                if (err < 0)
1203                        goto out_put;
1204                msg.msg_name=address;
1205                msg.msg_namelen=addr_len;
1206        }
1207        if (sock->file->f_flags & O_NONBLOCK)
1208                flags |= MSG_DONTWAIT;
1209        msg.msg_flags = flags;
1210        err = sock_sendmsg(sock, &msg, len);
1211
1212out_put:                
1213        sockfd_put(sock);
1214out:
1215        return err;
1216}
1217
1218/*
1219 *      Send a datagram down a socket. 
1220 */
1221
1222asmlinkage long sys_send(int fd, void * buff, size_t len, unsigned flags)
1223{
1224        return sys_sendto(fd, buff, len, flags, NULL, 0);
1225}
1226
1227/*
1228 *      Receive a frame from the socket and optionally record the address of the 
1229 *      sender. We verify the buffers are writable and if needed move the
1230 *      sender address from kernel to user space.
1231 */
1232
1233asmlinkage long sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
1234                             struct sockaddr *addr, int *addr_len)
1235{
1236        struct socket *sock;
1237        struct iovec iov;
1238        struct msghdr msg;
1239        char address[MAX_SOCK_ADDR];
1240        int err,err2;
1241
1242        sock = sockfd_lookup(fd, &err);
1243        if (!sock)
1244                goto out;
1245
1246        msg.msg_control=NULL;
1247        msg.msg_controllen=0;
1248        msg.msg_iovlen=1;
1249        msg.msg_iov=&iov;
1250        iov.iov_len=size;
1251        iov.iov_base=ubuf;
1252        msg.msg_name=address;
1253        msg.msg_namelen=MAX_SOCK_ADDR;
1254        if (sock->file->f_flags & O_NONBLOCK)
1255                flags |= MSG_DONTWAIT;
1256        err=sock_recvmsg(sock, &msg, size, flags);
1257
1258        if(err >= 0 && addr != NULL)
1259        {
1260                err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1261                if(err2<0)
1262                        err=err2;
1263        }
1264        sockfd_put(sock);                       
1265out:
1266        return err;
1267}
1268
1269/*
1270 *      Receive a datagram from a socket. 
1271 */
1272
1273asmlinkage long sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1274{
1275        return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1276}
1277
1278/*
1279 *      Set a socket option. Because we don't know the option lengths we have
1280 *      to pass the user mode parameter for the protocols to sort out.
1281 */
1282
1283asmlinkage long sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1284{
1285        int err;
1286        struct socket *sock;
1287
1288        if (optlen < 0)
1289                return -EINVAL;
1290                        
1291        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1292        {
1293                if (level == SOL_SOCKET)
1294                        err=sock_setsockopt(sock,level,optname,optval,optlen);
1295                else
1296                        err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1297                sockfd_put(sock);
1298        }
1299        return err;
1300}
1301
1302/*
1303 *      Get a socket option. Because we don't know the option lengths we have
1304 *      to pass a user mode parameter for the protocols to sort out.
1305 */
1306
1307asmlinkage long sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1308{
1309        int err;
1310        struct socket *sock;
1311
1312        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1313        {
1314                if (level == SOL_SOCKET)
1315                        err=sock_getsockopt(sock,level,optname,optval,optlen);
1316                else
1317                        err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1318                sockfd_put(sock);
1319        }
1320        return err;
1321}
1322
1323
1324/*
1325 *      Shutdown a socket.
1326 */
1327
1328asmlinkage long sys_shutdown(int fd, int how)
1329{
1330        int err;
1331        struct socket *sock;
1332
1333        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1334        {
1335                err=sock->ops->shutdown(sock, how);
1336                sockfd_put(sock);
1337        }
1338        return err;
1339}
1340
1341/*
1342 *      BSD sendmsg interface
1343 */
1344
1345asmlinkage long sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1346{
1347        struct socket *sock;
1348        char address[MAX_SOCK_ADDR];
1349        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1350        unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1351        unsigned char *ctl_buf = ctl;
1352        struct msghdr msg_sys;
1353        int err, ctl_len, iov_size, total_len;
1354        
1355        err = -EFAULT;
1356        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1357                goto out; 
1358
1359        sock = sockfd_lookup(fd, &err);
1360        if (!sock) 
1361                goto out;
1362
1363        /* do not move before msg_sys is valid */
1364        err = -EMSGSIZE;
1365        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1366                goto out_put;
1367
1368        /* Check whether to allocate the iovec area*/
1369        err = -ENOMEM;
1370        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1371        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1372                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1373                if (!iov)
1374                        goto out_put;
1375        }
1376
1377        /* This will also move the address data into kernel space */
1378        err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1379        if (err < 0) 
1380                goto out_freeiov;
1381        total_len = err;
1382
1383        err = -ENOBUFS;
1384
1385        if (msg_sys.msg_controllen > INT_MAX)
1386                goto out_freeiov;
1387        ctl_len = msg_sys.msg_controllen; 
1388        if (ctl_len) 
1389        {
1390                if (ctl_len > sizeof(ctl))
1391                {
1392                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1393                        if (ctl_buf == NULL) 
1394                                goto out_freeiov;
1395                }
1396                err = -EFAULT;
1397                if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
1398                        goto out_freectl;
1399                msg_sys.msg_control = ctl_buf;
1400        }
1401        msg_sys.msg_flags = flags;
1402
1403        if (sock->file->f_flags & O_NONBLOCK)
1404                msg_sys.msg_flags |= MSG_DONTWAIT;
1405        err = sock_sendmsg(sock, &msg_sys, total_len);
1406
1407out_freectl:
1408        if (ctl_buf != ctl)    
1409                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1410out_freeiov:
1411        if (iov != iovstack)
1412                sock_kfree_s(sock->sk, iov, iov_size);
1413out_put:
1414        sockfd_put(sock);
1415out:       
1416        return err;
1417}
1418
1419/*
1420 *      BSD recvmsg interface
1421 */
1422
1423asmlinkage long sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1424{
1425        struct socket *sock;
1426        struct iovec iovstack[UIO_FASTIOV];
1427        struct iovec *iov=iovstack;
1428        struct msghdr msg_sys;
1429        unsigned long cmsg_ptr;
1430        int err, iov_size, total_len, len;
1431
1432        /* kernel mode address */
1433        char addr[MAX_SOCK_ADDR];
1434
1435        /* user mode address pointers */
1436        struct sockaddr *uaddr;
1437        int *uaddr_len;
1438        
1439        err=-EFAULT;
1440        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1441                goto out;
1442
1443        sock = sockfd_lookup(fd, &err);
1444        if (!sock)
1445                goto out;
1446
1447        err = -EMSGSIZE;
1448        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1449                goto out_put;
1450        
1451        /* Check whether to allocate the iovec area*/
1452        err = -ENOMEM;
1453        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1454        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1455                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1456                if (!iov)
1457                        goto out_put;
1458        }
1459
1460        /*
1461         *      Save the user-mode address (verify_iovec will change the
1462         *      kernel msghdr to use the kernel address space)
1463         */
1464         
1465        uaddr = msg_sys.msg_name;
1466        uaddr_len = &msg->msg_namelen;
1467        err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1468        if (err < 0)
1469                goto out_freeiov;
1470        total_len=err;
1471
1472        cmsg_ptr = (unsigned long)msg_sys.msg_control;
1473        msg_sys.msg_flags = 0;
1474        
1475        if (sock->file->f_flags & O_NONBLOCK)
1476                flags |= MSG_DONTWAIT;
1477        err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1478        if (err < 0)
1479                goto out_freeiov;
1480        len = err;
1481
1482        if (uaddr != NULL) {
1483                err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1484                if (err < 0)
1485                        goto out_freeiov;
1486        }
1487        err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1488        if (err)
1489                goto out_freeiov;
1490        err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
1491                                                         &msg->msg_controllen);
1492        if (err)
1493                goto out_freeiov;
1494        err = len;
1495
1496out_freeiov:
1497        if (iov != iovstack)
1498                sock_kfree_s(sock->sk, iov, iov_size);
1499out_put:
1500        sockfd_put(sock);
1501out:
1502        return err;
1503}
1504
1505
1506/*
1507 *      Perform a file control on a socket file descriptor.
1508 *
1509 *      Doesn't acquire a fd lock, because no network fcntl
1510 *      function sleeps currently.
1511 */
1512
1513int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1514{
1515        struct socket *sock;
1516
1517        sock = socki_lookup (filp->f_dentry->d_inode);
1518        if (sock && sock->ops)
1519                return sock_no_fcntl(sock, cmd, arg);
1520        return(-EINVAL);
1521}
1522
1523/* Argument list sizes for sys_socketcall */
1524#define AL(x) ((x) * sizeof(unsigned long))
1525static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1526                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1527                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1528#undef AL
1529
1530/*
1531 *      System call vectors. 
1532 *
1533 *      Argument checking cleaned up. Saved 20% in size.
1534 *  This function doesn't need to set the kernel lock because
1535 *  it is set by the callees. 
1536 */
1537
1538asmlinkage long sys_socketcall(int call, unsigned long *args)
1539{
1540        unsigned long a[6];
1541        unsigned long a0,a1;
1542        int err;
1543
1544        if(call<1||call>SYS_RECVMSG)
1545                return -EINVAL;
1546
1547        /* copy_from_user should be SMP safe. */
1548        if (copy_from_user(a, args, nargs[call]))
1549                return -EFAULT;
1550                
1551        a0=a[0];
1552        a1=a[1];
1553        
1554        switch(call) 
1555        {
1556                case SYS_SOCKET:
1557                        err = sys_socket(a0,a1,a[2]);
1558                        break;
1559                case SYS_BIND:
1560                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1561                        break;
1562                case SYS_CONNECT:
1563                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1564                        break;
1565                case SYS_LISTEN:
1566                        err = sys_listen(a0,a1);
1567                        break;
1568                case SYS_ACCEPT:
1569                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1570                        break;
1571                case SYS_GETSOCKNAME:
1572                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1573                        break;
1574                case SYS_GETPEERNAME:
1575                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1576                        break;
1577                case SYS_SOCKETPAIR:
1578                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1579                        break;
1580                case SYS_SEND:
1581                        err = sys_send(a0, (void *)a1, a[2], a[3]);
1582                        break;
1583                case SYS_SENDTO:
1584                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
1585                                         (struct sockaddr *)a[4], a[5]);
1586                        break;
1587                case SYS_RECV:
1588                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
1589                        break;
1590                case SYS_RECVFROM:
1591                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1592                                           (struct sockaddr *)a[4], (int *)a[5]);
1593                        break;
1594                case SYS_SHUTDOWN:
1595                        err = sys_shutdown(a0,a1);
1596                        break;
1597                case SYS_SETSOCKOPT:
1598                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1599                        break;
1600                case SYS_GETSOCKOPT:
1601                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1602                        break;
1603                case SYS_SENDMSG:
1604                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1605                        break;
1606                case SYS_RECVMSG:
1607                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1608                        break;
1609                default:
1610                        err = -EINVAL;
1611                        break;
1612        }
1613        return err;
1614}
1615
1616/*
1617 *      This function is called by a protocol handler that wants to
1618 *      advertise its address family, and have it linked into the
1619 *      SOCKET module.
1620 */
1621
1622int sock_register(struct net_proto_family *ops)
1623{
1624        int err;
1625
1626        if (ops->family >= NPROTO) {
1627                printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1628                return -ENOBUFS;
1629        }
1630        net_family_write_lock();
1631        err = -EEXIST;
1632        if (net_families[ops->family] == NULL) {
1633                net_families[ops->family]=ops;
1634                err = 0;
1635        }
1636        net_family_write_unlock();
1637        return err;
1638}
1639
1640/*
1641 *      This function is called by a protocol handler that wants to
1642 *      remove its address family, and have it unlinked from the
1643 *      SOCKET module.
1644 */
1645
1646int sock_unregister(int family)
1647{
1648        if (family < 0 || family >= NPROTO)
1649                return -1;
1650
1651        net_family_write_lock();
1652        net_families[family]=NULL;
1653        net_family_write_unlock();
1654        return 0;
1655}
1656
1657
1658extern void sk_init(void);
1659
1660#ifdef CONFIG_WAN_ROUTER
1661extern void wanrouter_init(void);
1662#endif
1663
1664#ifdef CONFIG_BLUEZ
1665extern void bluez_init(void);
1666#endif
1667
1668void __init sock_init(void)
1669{
1670        int i;
1671
1672        printk(KERN_INFO "Linux NET4.0 for Linux 2.4\n");
1673        printk(KERN_INFO "Based upon Swansea University Computer Society NET3.039\n");
1674
1675        /*
1676         *      Initialize all address (protocol) families. 
1677         */
1678         
1679        for (i = 0; i < NPROTO; i++) 
1680                net_families[i] = NULL;
1681
1682        /*
1683         *      Initialize sock SLAB cache.
1684         */
1685         
1686        sk_init();
1687
1688#ifdef SLAB_SKB
1689        /*
1690         *      Initialize skbuff SLAB cache 
1691         */
1692        skb_init();
1693#endif
1694
1695        /*
1696         *      Wan router layer. 
1697         */
1698
1699#ifdef CONFIG_WAN_ROUTER         
1700        wanrouter_init();
1701#endif
1702
1703        /*
1704         *      Initialize the protocols module. 
1705         */
1706
1707        register_filesystem(&sock_fs_type);
1708        sock_mnt = kern_mount(&sock_fs_type);
1709        /* The real protocol initialization is performed when
1710         *  do_initcalls is run.  
1711         */
1712
1713
1714        /*
1715         * The netlink device handler may be needed early.
1716         */
1717
1718#ifdef CONFIG_NET
1719        rtnetlink_init();
1720#endif
1721#ifdef CONFIG_NETLINK_DEV
1722        init_netlink();
1723#endif
1724#ifdef CONFIG_NETFILTER
1725        netfilter_init();
1726#endif
1727
1728#ifdef CONFIG_BLUEZ
1729        bluez_init();
1730#endif
1731}
1732
1733int socket_get_info(char *buffer, char **start, off_t offset, int length)
1734{
1735        int len, cpu;
1736        int counter = 0;
1737
1738        for (cpu=0; cpu<smp_num_cpus; cpu++)
1739                counter += sockets_in_use[cpu_logical_map(cpu)].counter;
1740
1741        /* It can be negative, by the way. 8) */
1742        if (counter < 0)
1743                counter = 0;
1744
1745        len = sprintf(buffer, "sockets: used %d\n", counter);
1746        if (offset >= len)
1747        {
1748                *start = buffer;
1749                return 0;
1750        }
1751        *start = buffer + offset;
1752        len -= offset;
1753        if (len > length)
1754                len = length;
1755        if (len < 0)
1756                len = 0;
1757        return len;
1758}
1759
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.