linux-old/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro, <bir7@leland.Stanford.Edu>
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45 *              Tigran Aivazian :       Made listen(2) backlog sanity checks 
  46 *                                      protocol-independent
  47 *
  48 *
  49 *              This program is free software; you can redistribute it and/or
  50 *              modify it under the terms of the GNU General Public License
  51 *              as published by the Free Software Foundation; either version
  52 *              2 of the License, or (at your option) any later version.
  53 *
  54 *
  55 *      This module is effectively the top level interface to the BSD socket
  56 *      paradigm. 
  57 *
  58 */
  59
  60#include <linux/config.h>
  61#include <linux/mm.h>
  62#include <linux/smp_lock.h>
  63#include <linux/socket.h>
  64#include <linux/file.h>
  65#include <linux/net.h>
  66#include <linux/interrupt.h>
  67#include <linux/netdevice.h>
  68#include <linux/proc_fs.h>
  69#include <linux/wanrouter.h>
  70#include <linux/netlink.h>
  71#include <linux/rtnetlink.h>
  72#include <linux/init.h>
  73#include <linux/poll.h>
  74#include <linux/cache.h>
  75#include <linux/module.h>
  76#include <linux/highmem.h>
  77
  78#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
  79#include <linux/kmod.h>
  80#endif
  81
  82#include <asm/uaccess.h>
  83
  84#include <net/sock.h>
  85#include <net/scm.h>
  86#include <linux/netfilter.h>
  87
  88static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
  89static loff_t sock_lseek(struct file *file, loff_t offset, int whence);
  90static ssize_t sock_read(struct file *file, char *buf,
  91                         size_t size, loff_t *ppos);
  92static ssize_t sock_write(struct file *file, const char *buf,
  93                          size_t size, loff_t *ppos);
  94static int sock_mmap(struct file *file, struct vm_area_struct * vma);
  95
  96static int sock_close(struct inode *inode, struct file *file);
  97static unsigned int sock_poll(struct file *file,
  98                              struct poll_table_struct *wait);
  99static int sock_ioctl(struct inode *inode, struct file *file,
 100                      unsigned int cmd, unsigned long arg);
 101static int sock_fasync(int fd, struct file *filp, int on);
 102static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 103                          unsigned long count, loff_t *ppos);
 104static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 105                          unsigned long count, loff_t *ppos);
 106static ssize_t sock_sendpage(struct file *file, struct page *page,
 107                             int offset, size_t size, loff_t *ppos, int more);
 108
 109
 110/*
 111 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 112 *      in the operation structures but are done directly via the socketcall() multiplexor.
 113 */
 114
 115static struct file_operations socket_file_ops = {
 116        llseek:         sock_lseek,
 117        read:           sock_read,
 118        write:          sock_write,
 119        poll:           sock_poll,
 120        ioctl:          sock_ioctl,
 121        mmap:           sock_mmap,
 122        open:           sock_no_open,   /* special open code to disallow open via /proc */
 123        release:        sock_close,
 124        fasync:         sock_fasync,
 125        readv:          sock_readv,
 126        writev:         sock_writev,
 127        sendpage:       sock_sendpage
 128};
 129
 130/*
 131 *      The protocol list. Each protocol is registered in here.
 132 */
 133
 134static struct net_proto_family *net_families[NPROTO];
 135
 136#ifdef CONFIG_SMP
 137static atomic_t net_family_lockct = ATOMIC_INIT(0);
 138static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
 139
 140/* The strategy is: modifications net_family vector are short, do not
 141   sleep and veeery rare, but read access should be free of any exclusive
 142   locks.
 143 */
 144
 145static void net_family_write_lock(void)
 146{
 147        spin_lock(&net_family_lock);
 148        while (atomic_read(&net_family_lockct) != 0) {
 149                spin_unlock(&net_family_lock);
 150
 151                current->policy |= SCHED_YIELD;
 152                schedule();
 153
 154                spin_lock(&net_family_lock);
 155        }
 156}
 157
 158static __inline__ void net_family_write_unlock(void)
 159{
 160        spin_unlock(&net_family_lock);
 161}
 162
 163static __inline__ void net_family_read_lock(void)
 164{
 165        atomic_inc(&net_family_lockct);
 166        spin_unlock_wait(&net_family_lock);
 167}
 168
 169static __inline__ void net_family_read_unlock(void)
 170{
 171        atomic_dec(&net_family_lockct);
 172}
 173
 174#else
 175#define net_family_write_lock() do { } while(0)
 176#define net_family_write_unlock() do { } while(0)
 177#define net_family_read_lock() do { } while(0)
 178#define net_family_read_unlock() do { } while(0)
 179#endif
 180
 181
 182/*
 183 *      Statistics counters of the socket lists
 184 */
 185
 186static union {
 187        int     counter;
 188        char    __pad[SMP_CACHE_BYTES];
 189} sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
 190
 191/*
 192 *      Support routines. Move socket addresses back and forth across the kernel/user
 193 *      divide and look after the messy bits.
 194 */
 195
 196#define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
 197                                           16 for IP, 16 for IPX,
 198                                           24 for IPv6,
 199                                           about 80 for AX.25 
 200                                           must be at least one bigger than
 201                                           the AF_UNIX size (see net/unix/af_unix.c
 202                                           :unix_mkname()).  
 203                                         */
 204                                         
 205/**
 206 *      move_addr_to_kernel     -       copy a socket address into kernel space
 207 *      @uaddr: Address in user space
 208 *      @kaddr: Address in kernel space
 209 *      @ulen: Length in user space
 210 *
 211 *      The address is copied into kernel space. If the provided address is
 212 *      too long an error code of -EINVAL is returned. If the copy gives
 213 *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 214 */
 215
 216int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 217{
 218        if(ulen<0||ulen>MAX_SOCK_ADDR)
 219                return -EINVAL;
 220        if(ulen==0)
 221                return 0;
 222        if(copy_from_user(kaddr,uaddr,ulen))
 223                return -EFAULT;
 224        return 0;
 225}
 226
 227/**
 228 *      move_addr_to_user       -       copy an address to user space
 229 *      @kaddr: kernel space address
 230 *      @klen: length of address in kernel
 231 *      @uaddr: user space address
 232 *      @ulen: pointer to user length field
 233 *
 234 *      The value pointed to by ulen on entry is the buffer length available.
 235 *      This is overwritten with the buffer space used. -EINVAL is returned
 236 *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 237 *      is returned if either the buffer or the length field are not
 238 *      accessible.
 239 *      After copying the data up to the limit the user specifies, the true
 240 *      length of the data is written over the length limit the user
 241 *      specified. Zero is returned for a success.
 242 */
 243 
 244int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 245{
 246        int err;
 247        int len;
 248
 249        if((err=get_user(len, ulen)))
 250                return err;
 251        if(len>klen)
 252                len=klen;
 253        if(len<0 || len> MAX_SOCK_ADDR)
 254                return -EINVAL;
 255        if(len)
 256        {
 257                if(copy_to_user(uaddr,kaddr,len))
 258                        return -EFAULT;
 259        }
 260        /*
 261         *      "fromlen shall refer to the value before truncation.."
 262         *                      1003.1g
 263         */
 264        return __put_user(klen, ulen);
 265}
 266
 267#define SOCKFS_MAGIC 0x534F434B
 268static int sockfs_statfs(struct super_block *sb, struct statfs *buf)
 269{
 270        buf->f_type = SOCKFS_MAGIC;
 271        buf->f_bsize = 1024;
 272        buf->f_namelen = 255;
 273        return 0;
 274}
 275
 276static struct super_operations sockfs_ops = {
 277        statfs:         sockfs_statfs,
 278};
 279
 280static struct super_block * sockfs_read_super(struct super_block *sb, void *data, int silent)
 281{
 282        struct inode *root = new_inode(sb);
 283        if (!root)
 284                return NULL;
 285        root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 286        root->i_uid = root->i_gid = 0;
 287        root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
 288        sb->s_blocksize = 1024;
 289        sb->s_blocksize_bits = 10;
 290        sb->s_magic = SOCKFS_MAGIC;
 291        sb->s_op        = &sockfs_ops;
 292        sb->s_root = d_alloc(NULL, &(const struct qstr) { "socket:", 7, 0 });
 293        if (!sb->s_root) {
 294                iput(root);
 295                return NULL;
 296        }
 297        sb->s_root->d_sb = sb;
 298        sb->s_root->d_parent = sb->s_root;
 299        d_instantiate(sb->s_root, root);
 300        return sb;
 301}
 302
 303static struct vfsmount *sock_mnt;
 304static DECLARE_FSTYPE(sock_fs_type, "sockfs", sockfs_read_super, FS_NOMOUNT);
 305static int sockfs_delete_dentry(struct dentry *dentry)
 306{
 307        return 1;
 308}
 309static struct dentry_operations sockfs_dentry_operations = {
 310        d_delete:       sockfs_delete_dentry,
 311};
 312
 313/*
 314 *      Obtains the first available file descriptor and sets it up for use.
 315 *
 316 *      This functions creates file structure and maps it to fd space
 317 *      of current process. On success it returns file descriptor
 318 *      and file struct implicitly stored in sock->file.
 319 *      Note that another thread may close file descriptor before we return
 320 *      from this function. We use the fact that now we do not refer
 321 *      to socket after mapping. If one day we will need it, this
 322 *      function will inincrement ref. count on file by 1.
 323 *
 324 *      In any case returned fd MAY BE not valid!
 325 *      This race condition is inavoidable
 326 *      with shared fd spaces, we cannot solve is inside kernel,
 327 *      but we take care of internal coherence yet.
 328 */
 329
 330static int sock_map_fd(struct socket *sock)
 331{
 332        int fd;
 333        struct qstr this;
 334        char name[32];
 335
 336        /*
 337         *      Find a file descriptor suitable for return to the user. 
 338         */
 339
 340        fd = get_unused_fd();
 341        if (fd >= 0) {
 342                struct file *file = get_empty_filp();
 343
 344                if (!file) {
 345                        put_unused_fd(fd);
 346                        fd = -ENFILE;
 347                        goto out;
 348                }
 349
 350                sprintf(name, "[%lu]", sock->inode->i_ino);
 351                this.name = name;
 352                this.len = strlen(name);
 353                this.hash = sock->inode->i_ino;
 354
 355                file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
 356                if (!file->f_dentry) {
 357                        put_filp(file);
 358                        put_unused_fd(fd);
 359                        fd = -ENOMEM;
 360                        goto out;
 361                }
 362                file->f_dentry->d_op = &sockfs_dentry_operations;
 363                d_add(file->f_dentry, sock->inode);
 364                file->f_vfsmnt = mntget(sock_mnt);
 365
 366                sock->file = file;
 367                file->f_op = sock->inode->i_fop = &socket_file_ops;
 368                file->f_mode = 3;
 369                file->f_flags = O_RDWR;
 370                file->f_pos = 0;
 371                fd_install(fd, file);
 372        }
 373
 374out:
 375        return fd;
 376}
 377
 378extern __inline__ struct socket *socki_lookup(struct inode *inode)
 379{
 380        return &inode->u.socket_i;
 381}
 382
 383/**
 384 *      sockfd_lookup   -       Go from a file number to its socket slot
 385 *      @fd: file handle
 386 *      @err: pointer to an error code return
 387 *
 388 *      The file handle passed in is locked and the socket it is bound
 389 *      too is returned. If an error occurs the err pointer is overwritten
 390 *      with a negative errno code and NULL is returned. The function checks
 391 *      for both invalid handles and passing a handle which is not a socket.
 392 *
 393 *      On a success the socket object pointer is returned.
 394 */
 395
 396struct socket *sockfd_lookup(int fd, int *err)
 397{
 398        struct file *file;
 399        struct inode *inode;
 400        struct socket *sock;
 401
 402        if (!(file = fget(fd)))
 403        {
 404                *err = -EBADF;
 405                return NULL;
 406        }
 407
 408        inode = file->f_dentry->d_inode;
 409        if (!inode->i_sock || !(sock = socki_lookup(inode)))
 410        {
 411                *err = -ENOTSOCK;
 412                fput(file);
 413                return NULL;
 414        }
 415
 416        if (sock->file != file) {
 417                printk(KERN_ERR "socki_lookup: socket file changed!\n");
 418                sock->file = file;
 419        }
 420        return sock;
 421}
 422
 423extern __inline__ void sockfd_put(struct socket *sock)
 424{
 425        fput(sock->file);
 426}
 427
 428/**
 429 *      sock_alloc      -       allocate a socket
 430 *      
 431 *      Allocate a new inode and socket object. The two are bound together
 432 *      and initialised. The socket is then returned. If we are out of inodes
 433 *      NULL is returned.
 434 */
 435
 436struct socket *sock_alloc(void)
 437{
 438        struct inode * inode;
 439        struct socket * sock;
 440
 441        inode = get_empty_inode();
 442        if (!inode)
 443                return NULL;
 444
 445        inode->i_sb = sock_mnt->mnt_sb;
 446        sock = socki_lookup(inode);
 447
 448        inode->i_mode = S_IFSOCK|S_IRWXUGO;
 449        inode->i_sock = 1;
 450        inode->i_uid = current->fsuid;
 451        inode->i_gid = current->fsgid;
 452
 453        sock->inode = inode;
 454        init_waitqueue_head(&sock->wait);
 455        sock->fasync_list = NULL;
 456        sock->state = SS_UNCONNECTED;
 457        sock->flags = 0;
 458        sock->ops = NULL;
 459        sock->sk = NULL;
 460        sock->file = NULL;
 461
 462        sockets_in_use[smp_processor_id()].counter++;
 463        return sock;
 464}
 465
 466/*
 467 *      In theory you can't get an open on this inode, but /proc provides
 468 *      a back door. Remember to keep it shut otherwise you'll let the
 469 *      creepy crawlies in.
 470 */
 471  
 472static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 473{
 474        return -ENXIO;
 475}
 476
 477/**
 478 *      sock_release    -       close a socket
 479 *      @sock: socket to close
 480 *
 481 *      The socket is released from the protocol stack if it has a release
 482 *      callback, and the inode is then released if the socket is bound to
 483 *      an inode not a file. 
 484 */
 485 
 486void sock_release(struct socket *sock)
 487{
 488        if (sock->ops) 
 489                sock->ops->release(sock);
 490
 491        if (sock->fasync_list)
 492                printk(KERN_ERR "sock_release: fasync list not empty!\n");
 493
 494        sockets_in_use[smp_processor_id()].counter--;
 495        if (!sock->file) {
 496                iput(sock->inode);
 497                return;
 498        }
 499        sock->file=NULL;
 500}
 501
 502int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 503{
 504        int err;
 505        struct scm_cookie scm;
 506
 507        err = scm_send(sock, msg, &scm);
 508        if (err >= 0) {
 509                err = sock->ops->sendmsg(sock, msg, size, &scm);
 510                scm_destroy(&scm);
 511        }
 512        return err;
 513}
 514
 515int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 516{
 517        struct scm_cookie scm;
 518
 519        memset(&scm, 0, sizeof(scm));
 520
 521        size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
 522        if (size >= 0)
 523                scm_recv(sock, msg, &scm, flags);
 524
 525        return size;
 526}
 527
 528
 529/*
 530 *      Sockets are not seekable.
 531 */
 532
 533static loff_t sock_lseek(struct file *file, loff_t offset, int whence)
 534{
 535        return -ESPIPE;
 536}
 537
 538/*
 539 *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
 540 *      area ubuf...ubuf+size-1 is writable before asking the protocol.
 541 */
 542
 543static ssize_t sock_read(struct file *file, char *ubuf,
 544                         size_t size, loff_t *ppos)
 545{
 546        struct socket *sock;
 547        struct iovec iov;
 548        struct msghdr msg;
 549        int flags;
 550
 551        if (ppos != &file->f_pos)
 552                return -ESPIPE;
 553        if (size==0)            /* Match SYS5 behaviour */
 554                return 0;
 555
 556        sock = socki_lookup(file->f_dentry->d_inode); 
 557
 558        msg.msg_name=NULL;
 559        msg.msg_namelen=0;
 560        msg.msg_iov=&iov;
 561        msg.msg_iovlen=1;
 562        msg.msg_control=NULL;
 563        msg.msg_controllen=0;
 564        iov.iov_base=ubuf;
 565        iov.iov_len=size;
 566        flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 567
 568        return sock_recvmsg(sock, &msg, size, flags);
 569}
 570
 571
 572/*
 573 *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1
 574 *      is readable by the user process.
 575 */
 576
 577static ssize_t sock_write(struct file *file, const char *ubuf,
 578                          size_t size, loff_t *ppos)
 579{
 580        struct socket *sock;
 581        struct msghdr msg;
 582        struct iovec iov;
 583        
 584        if (ppos != &file->f_pos)
 585                return -ESPIPE;
 586        if(size==0)             /* Match SYS5 behaviour */
 587                return 0;
 588
 589        sock = socki_lookup(file->f_dentry->d_inode); 
 590
 591        msg.msg_name=NULL;
 592        msg.msg_namelen=0;
 593        msg.msg_iov=&iov;
 594        msg.msg_iovlen=1;
 595        msg.msg_control=NULL;
 596        msg.msg_controllen=0;
 597        msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 598        if (sock->type == SOCK_SEQPACKET)
 599                msg.msg_flags |= MSG_EOR;
 600        iov.iov_base=(void *)ubuf;
 601        iov.iov_len=size;
 602        
 603        return sock_sendmsg(sock, &msg, size);
 604}
 605
 606ssize_t sock_sendpage(struct file *file, struct page *page,
 607                      int offset, size_t size, loff_t *ppos, int more)
 608{
 609        struct socket *sock;
 610        int flags;
 611
 612        if (ppos != &file->f_pos)
 613                return -ESPIPE;
 614
 615        sock = socki_lookup(file->f_dentry->d_inode);
 616
 617        flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 618        if (more)
 619                flags |= MSG_MORE;
 620
 621        return sock->ops->sendpage(sock, page, offset, size, flags);
 622}
 623
 624int sock_readv_writev(int type, struct inode * inode, struct file * file,
 625                      const struct iovec * iov, long count, long size)
 626{
 627        struct msghdr msg;
 628        struct socket *sock;
 629
 630        sock = socki_lookup(inode);
 631
 632        msg.msg_name = NULL;
 633        msg.msg_namelen = 0;
 634        msg.msg_control = NULL;
 635        msg.msg_controllen = 0;
 636        msg.msg_iov = (struct iovec *) iov;
 637        msg.msg_iovlen = count;
 638        msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 639
 640        /* read() does a VERIFY_WRITE */
 641        if (type == VERIFY_WRITE)
 642                return sock_recvmsg(sock, &msg, size, msg.msg_flags);
 643
 644        if (sock->type == SOCK_SEQPACKET)
 645                msg.msg_flags |= MSG_EOR;
 646
 647        return sock_sendmsg(sock, &msg, size);
 648}
 649
 650static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 651                          unsigned long count, loff_t *ppos)
 652{
 653        size_t tot_len = 0;
 654        int i;
 655        for (i = 0 ; i < count ; i++)
 656                tot_len += vector[i].iov_len;
 657        return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode,
 658                                 file, vector, count, tot_len);
 659}
 660        
 661static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 662                           unsigned long count, loff_t *ppos)
 663{
 664        size_t tot_len = 0;
 665        int i;
 666        for (i = 0 ; i < count ; i++)
 667                tot_len += vector[i].iov_len;
 668        return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode,
 669                                 file, vector, count, tot_len);
 670}
 671
 672/*
 673 *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
 674 *      with it - that's up to the protocol still.
 675 */
 676
 677int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 678           unsigned long arg)
 679{
 680        struct socket *sock;
 681        int err;
 682
 683        unlock_kernel();
 684        sock = socki_lookup(inode);
 685        err = sock->ops->ioctl(sock, cmd, arg);
 686        lock_kernel();
 687
 688        return err;
 689}
 690
 691
 692/* No kernel lock held - perfect */
 693static unsigned int sock_poll(struct file *file, poll_table * wait)
 694{
 695        struct socket *sock;
 696
 697        /*
 698         *      We can't return errors to poll, so it's either yes or no. 
 699         */
 700        sock = socki_lookup(file->f_dentry->d_inode);
 701        return sock->ops->poll(file, sock, wait);
 702}
 703
 704static int sock_mmap(struct file * file, struct vm_area_struct * vma)
 705{
 706        struct socket *sock = socki_lookup(file->f_dentry->d_inode);
 707
 708        return sock->ops->mmap(file, sock, vma);
 709}
 710
 711int sock_close(struct inode *inode, struct file *filp)
 712{
 713        /*
 714         *      It was possible the inode is NULL we were 
 715         *      closing an unfinished socket. 
 716         */
 717
 718        if (!inode)
 719        {
 720                printk(KERN_DEBUG "sock_close: NULL inode\n");
 721                return 0;
 722        }
 723        sock_fasync(-1, filp, 0);
 724        sock_release(socki_lookup(inode));
 725        return 0;
 726}
 727
 728/*
 729 *      Update the socket async list
 730 *
 731 *      Fasync_list locking strategy.
 732 *
 733 *      1. fasync_list is modified only under process context socket lock
 734 *         i.e. under semaphore.
 735 *      2. fasync_list is used under read_lock(&sk->callback_lock)
 736 *         or under socket lock.
 737 *      3. fasync_list can be used from softirq context, so that
 738 *         modification under socket lock have to be enhanced with
 739 *         write_lock_bh(&sk->callback_lock).
 740 *                                                      --ANK (990710)
 741 */
 742
 743static int sock_fasync(int fd, struct file *filp, int on)
 744{
 745        struct fasync_struct *fa, *fna=NULL, **prev;
 746        struct socket *sock;
 747        struct sock *sk;
 748
 749        if (on)
 750        {
 751                fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 752                if(fna==NULL)
 753                        return -ENOMEM;
 754        }
 755
 756
 757        sock = socki_lookup(filp->f_dentry->d_inode);
 758        
 759        if ((sk=sock->sk) == NULL)
 760                return -EINVAL;
 761
 762        lock_sock(sk);
 763
 764        prev=&(sock->fasync_list);
 765
 766        for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
 767                if (fa->fa_file==filp)
 768                        break;
 769
 770        if(on)
 771        {
 772                if(fa!=NULL)
 773                {
 774                        write_lock_bh(&sk->callback_lock);
 775                        fa->fa_fd=fd;
 776                        write_unlock_bh(&sk->callback_lock);
 777
 778                        kfree(fna);
 779                        goto out;
 780                }
 781                fna->fa_file=filp;
 782                fna->fa_fd=fd;
 783                fna->magic=FASYNC_MAGIC;
 784                fna->fa_next=sock->fasync_list;
 785                write_lock_bh(&sk->callback_lock);
 786                sock->fasync_list=fna;
 787                write_unlock_bh(&sk->callback_lock);
 788        }
 789        else
 790        {
 791                if (fa!=NULL)
 792                {
 793                        write_lock_bh(&sk->callback_lock);
 794                        *prev=fa->fa_next;
 795                        write_unlock_bh(&sk->callback_lock);
 796                        kfree(fa);
 797                }
 798        }
 799
 800out:
 801        release_sock(sock->sk);
 802        return 0;
 803}
 804
 805/* This function may be called only under socket lock or callback_lock */
 806
 807int sock_wake_async(struct socket *sock, int how, int band)
 808{
 809        if (!sock || !sock->fasync_list)
 810                return -1;
 811        switch (how)
 812        {
 813        case 1:
 814                
 815                if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 816                        break;
 817                goto call_kill;
 818        case 2:
 819                if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
 820                        break;
 821                /* fall through */
 822        case 0:
 823        call_kill:
 824                __kill_fasync(sock->fasync_list, SIGIO, band);
 825                break;
 826        case 3:
 827                __kill_fasync(sock->fasync_list, SIGURG, band);
 828        }
 829        return 0;
 830}
 831
 832
 833int sock_create(int family, int type, int protocol, struct socket **res)
 834{
 835        int i;
 836        struct socket *sock;
 837
 838        /*
 839         *      Check protocol is in range
 840         */
 841        if (family < 0 || family >= NPROTO)
 842                return -EAFNOSUPPORT;
 843        if (type < 0 || type >= SOCK_MAX)
 844                return -EINVAL;
 845
 846        /* Compatibility.
 847
 848           This uglymoron is moved from INET layer to here to avoid
 849           deadlock in module load.
 850         */
 851        if (family == PF_INET && type == SOCK_PACKET) {
 852                static int warned; 
 853                if (!warned) {
 854                        warned = 1;
 855                        printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
 856                }
 857                family = PF_PACKET;
 858        }
 859                
 860#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 861        /* Attempt to load a protocol module if the find failed. 
 862         * 
 863         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
 864         * requested real, full-featured networking support upon configuration.
 865         * Otherwise module support will break!
 866         */
 867        if (net_families[family]==NULL)
 868        {
 869                char module_name[30];
 870                sprintf(module_name,"net-pf-%d",family);
 871                request_module(module_name);
 872        }
 873#endif
 874
 875        net_family_read_lock();
 876        if (net_families[family] == NULL) {
 877                i = -EAFNOSUPPORT;
 878                goto out;
 879        }
 880
 881/*
 882 *      Allocate the socket and allow the family to set things up. if
 883 *      the protocol is 0, the family is instructed to select an appropriate
 884 *      default.
 885 */
 886
 887        if (!(sock = sock_alloc())) 
 888        {
 889                printk(KERN_WARNING "socket: no more sockets\n");
 890                i = -ENFILE;            /* Not exactly a match, but its the
 891                                           closest posix thing */
 892                goto out;
 893        }
 894
 895        sock->type  = type;
 896
 897        if ((i = net_families[family]->create(sock, protocol)) < 0) 
 898        {
 899                sock_release(sock);
 900                goto out;
 901        }
 902
 903        *res = sock;
 904
 905out:
 906        net_family_read_unlock();
 907        return i;
 908}
 909
 910asmlinkage long sys_socket(int family, int type, int protocol)
 911{
 912        int retval;
 913        struct socket *sock;
 914
 915        retval = sock_create(family, type, protocol, &sock);
 916        if (retval < 0)
 917                goto out;
 918
 919        retval = sock_map_fd(sock);
 920        if (retval < 0)
 921                goto out_release;
 922
 923out:
 924        /* It may be already another descriptor 8) Not kernel problem. */
 925        return retval;
 926
 927out_release:
 928        sock_release(sock);
 929        return retval;
 930}
 931
 932/*
 933 *      Create a pair of connected sockets.
 934 */
 935
 936asmlinkage long sys_socketpair(int family, int type, int protocol, int usockvec[2])
 937{
 938        struct socket *sock1, *sock2;
 939        int fd1, fd2, err;
 940
 941        /*
 942         * Obtain the first socket and check if the underlying protocol
 943         * supports the socketpair call.
 944         */
 945
 946        err = sock_create(family, type, protocol, &sock1);
 947        if (err < 0)
 948                goto out;
 949
 950        err = sock_create(family, type, protocol, &sock2);
 951        if (err < 0)
 952                goto out_release_1;
 953
 954        err = sock1->ops->socketpair(sock1, sock2);
 955        if (err < 0) 
 956                goto out_release_both;
 957
 958        fd1 = fd2 = -1;
 959
 960        err = sock_map_fd(sock1);
 961        if (err < 0)
 962                goto out_release_both;
 963        fd1 = err;
 964
 965        err = sock_map_fd(sock2);
 966        if (err < 0)
 967                goto out_close_1;
 968        fd2 = err;
 969
 970        /* fd1 and fd2 may be already another descriptors.
 971         * Not kernel problem.
 972         */
 973
 974        err = put_user(fd1, &usockvec[0]); 
 975        if (!err)
 976                err = put_user(fd2, &usockvec[1]);
 977        if (!err)
 978                return 0;
 979
 980        sys_close(fd2);
 981        sys_close(fd1);
 982        return err;
 983
 984out_close_1:
 985        sock_release(sock2);
 986        sys_close(fd1);
 987        return err;
 988
 989out_release_both:
 990        sock_release(sock2);
 991out_release_1:
 992        sock_release(sock1);
 993out:
 994        return err;
 995}
 996
 997
 998/*
 999 *      Bind a name to a socket. Nothing much to do here since it's
1000 *      the protocol's responsibility to handle the local address.
1001 *
1002 *      We move the socket address to kernel space before we call
1003 *      the protocol layer (having also checked the address is ok).
1004 */
1005
1006asmlinkage long sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
1007{
1008        struct socket *sock;
1009        char address[MAX_SOCK_ADDR];
1010        int err;
1011
1012        if((sock = sockfd_lookup(fd,&err))!=NULL)
1013        {
1014                if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
1015                        err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
1016                sockfd_put(sock);
1017        }                       
1018        return err;
1019}
1020
1021
1022/*
1023 *      Perform a listen. Basically, we allow the protocol to do anything
1024 *      necessary for a listen, and if that works, we mark the socket as
1025 *      ready for listening.
1026 */
1027
1028asmlinkage long sys_listen(int fd, int backlog)
1029{
1030        struct socket *sock;
1031        int err;
1032        
1033        if ((sock = sockfd_lookup(fd, &err)) != NULL) {
1034                if ((unsigned) backlog > SOMAXCONN)
1035                        backlog = SOMAXCONN;
1036                err=sock->ops->listen(sock, backlog);
1037                sockfd_put(sock);
1038        }
1039        return err;
1040}
1041
1042
1043/*
1044 *      For accept, we attempt to create a new socket, set up the link
1045 *      with the client, wake up the client, then return the new
1046 *      connected fd. We collect the address of the connector in kernel
1047 *      space and move it to user at the very end. This is unclean because
1048 *      we open the socket then return an error.
1049 *
1050 *      1003.1g adds the ability to recvmsg() to query connection pending
1051 *      status to recvmsg. We need to add that support in a way thats
1052 *      clean when we restucture accept also.
1053 */
1054
1055asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
1056{
1057        struct socket *sock, *newsock;
1058        int err, len;
1059        char address[MAX_SOCK_ADDR];
1060
1061        sock = sockfd_lookup(fd, &err);
1062        if (!sock)
1063                goto out;
1064
1065        err = -EMFILE;
1066        if (!(newsock = sock_alloc())) 
1067                goto out_put;
1068
1069        newsock->type = sock->type;
1070        newsock->ops = sock->ops;
1071
1072        err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1073        if (err < 0)
1074                goto out_release;
1075
1076        if (upeer_sockaddr) {
1077                if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
1078                        err = -ECONNABORTED;
1079                        goto out_release;
1080                }
1081                err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
1082                if (err < 0)
1083                        goto out_release;
1084        }
1085
1086        /* File flags are not inherited via accept() unlike another OSes. */
1087
1088        if ((err = sock_map_fd(newsock)) < 0)
1089                goto out_release;
1090
1091out_put:
1092        sockfd_put(sock);
1093out:
1094        return err;
1095
1096out_release:
1097        sock_release(newsock);
1098        goto out_put;
1099}
1100
1101
1102/*
1103 *      Attempt to connect to a socket with the server address.  The address
1104 *      is in user space so we verify it is OK and move it to kernel space.
1105 *
1106 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1107 *      break bindings
1108 *
1109 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1110 *      other SEQPACKET protocols that take time to connect() as it doesn't
1111 *      include the -EINPROGRESS status for such sockets.
1112 */
1113
1114asmlinkage long sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
1115{
1116        struct socket *sock;
1117        char address[MAX_SOCK_ADDR];
1118        int err;
1119
1120        sock = sockfd_lookup(fd, &err);
1121        if (!sock)
1122                goto out;
1123        err = move_addr_to_kernel(uservaddr, addrlen, address);
1124        if (err < 0)
1125                goto out_put;
1126        err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
1127                                 sock->file->f_flags);
1128out_put:
1129        sockfd_put(sock);
1130out:
1131        return err;
1132}
1133
1134/*
1135 *      Get the local address ('name') of a socket object. Move the obtained
1136 *      name to user space.
1137 */
1138
1139asmlinkage long sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1140{
1141        struct socket *sock;
1142        char address[MAX_SOCK_ADDR];
1143        int len, err;
1144        
1145        sock = sockfd_lookup(fd, &err);
1146        if (!sock)
1147                goto out;
1148        err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1149        if (err)
1150                goto out_put;
1151        err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1152
1153out_put:
1154        sockfd_put(sock);
1155out:
1156        return err;
1157}
1158
1159/*
1160 *      Get the remote address ('name') of a socket object. Move the obtained
1161 *      name to user space.
1162 */
1163
1164asmlinkage long sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1165{
1166        struct socket *sock;
1167        char address[MAX_SOCK_ADDR];
1168        int len, err;
1169
1170        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1171        {
1172                err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
1173                if (!err)
1174                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
1175                sockfd_put(sock);
1176        }
1177        return err;
1178}
1179
1180/*
1181 *      Send a datagram to a given address. We move the address into kernel
1182 *      space and check the user space data area is readable before invoking
1183 *      the protocol.
1184 */
1185
1186asmlinkage long sys_sendto(int fd, void * buff, size_t len, unsigned flags,
1187                           struct sockaddr *addr, int addr_len)
1188{
1189        struct socket *sock;
1190        char address[MAX_SOCK_ADDR];
1191        int err;
1192        struct msghdr msg;
1193        struct iovec iov;
1194        
1195        sock = sockfd_lookup(fd, &err);
1196        if (!sock)
1197                goto out;
1198        iov.iov_base=buff;
1199        iov.iov_len=len;
1200        msg.msg_name=NULL;
1201        msg.msg_iov=&iov;
1202        msg.msg_iovlen=1;
1203        msg.msg_control=NULL;
1204        msg.msg_controllen=0;
1205        msg.msg_namelen=0;
1206        if(addr)
1207        {
1208                err = move_addr_to_kernel(addr, addr_len, address);
1209                if (err < 0)
1210                        goto out_put;
1211                msg.msg_name=address;
1212                msg.msg_namelen=addr_len;
1213        }
1214        if (sock->file->f_flags & O_NONBLOCK)
1215                flags |= MSG_DONTWAIT;
1216        msg.msg_flags = flags;
1217        err = sock_sendmsg(sock, &msg, len);
1218
1219out_put:                
1220        sockfd_put(sock);
1221out:
1222        return err;
1223}
1224
1225/*
1226 *      Send a datagram down a socket. 
1227 */
1228
1229asmlinkage long sys_send(int fd, void * buff, size_t len, unsigned flags)
1230{
1231        return sys_sendto(fd, buff, len, flags, NULL, 0);
1232}
1233
1234/*
1235 *      Receive a frame from the socket and optionally record the address of the 
1236 *      sender. We verify the buffers are writable and if needed move the
1237 *      sender address from kernel to user space.
1238 */
1239
1240asmlinkage long sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
1241                             struct sockaddr *addr, int *addr_len)
1242{
1243        struct socket *sock;
1244        struct iovec iov;
1245        struct msghdr msg;
1246        char address[MAX_SOCK_ADDR];
1247        int err,err2;
1248
1249        sock = sockfd_lookup(fd, &err);
1250        if (!sock)
1251                goto out;
1252
1253        msg.msg_control=NULL;
1254        msg.msg_controllen=0;
1255        msg.msg_iovlen=1;
1256        msg.msg_iov=&iov;
1257        iov.iov_len=size;
1258        iov.iov_base=ubuf;
1259        msg.msg_name=address;
1260        msg.msg_namelen=MAX_SOCK_ADDR;
1261        if (sock->file->f_flags & O_NONBLOCK)
1262                flags |= MSG_DONTWAIT;
1263        err=sock_recvmsg(sock, &msg, size, flags);
1264
1265        if(err >= 0 && addr != NULL && msg.msg_namelen)
1266        {
1267                err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1268                if(err2<0)
1269                        err=err2;
1270        }
1271        sockfd_put(sock);                       
1272out:
1273        return err;
1274}
1275
1276/*
1277 *      Receive a datagram from a socket. 
1278 */
1279
1280asmlinkage long sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1281{
1282        return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1283}
1284
1285/*
1286 *      Set a socket option. Because we don't know the option lengths we have
1287 *      to pass the user mode parameter for the protocols to sort out.
1288 */
1289
1290asmlinkage long sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1291{
1292        int err;
1293        struct socket *sock;
1294
1295        if (optlen < 0)
1296                return -EINVAL;
1297                        
1298        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1299        {
1300                if (level == SOL_SOCKET)
1301                        err=sock_setsockopt(sock,level,optname,optval,optlen);
1302                else
1303                        err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1304                sockfd_put(sock);
1305        }
1306        return err;
1307}
1308
1309/*
1310 *      Get a socket option. Because we don't know the option lengths we have
1311 *      to pass a user mode parameter for the protocols to sort out.
1312 */
1313
1314asmlinkage long sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1315{
1316        int err;
1317        struct socket *sock;
1318
1319        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1320        {
1321                if (level == SOL_SOCKET)
1322                        err=sock_getsockopt(sock,level,optname,optval,optlen);
1323                else
1324                        err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1325                sockfd_put(sock);
1326        }
1327        return err;
1328}
1329
1330
1331/*
1332 *      Shutdown a socket.
1333 */
1334
1335asmlinkage long sys_shutdown(int fd, int how)
1336{
1337        int err;
1338        struct socket *sock;
1339
1340        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1341        {
1342                err=sock->ops->shutdown(sock, how);
1343                sockfd_put(sock);
1344        }
1345        return err;
1346}
1347
1348/*
1349 *      BSD sendmsg interface
1350 */
1351
1352asmlinkage long sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1353{
1354        struct socket *sock;
1355        char address[MAX_SOCK_ADDR];
1356        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1357        unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1358        unsigned char *ctl_buf = ctl;
1359        struct msghdr msg_sys;
1360        int err, ctl_len, iov_size, total_len;
1361        
1362        err = -EFAULT;
1363        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1364                goto out; 
1365
1366        sock = sockfd_lookup(fd, &err);
1367        if (!sock) 
1368                goto out;
1369
1370        /* do not move before msg_sys is valid */
1371        err = -EINVAL;
1372        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1373                goto out_put;
1374
1375        /* Check whether to allocate the iovec area*/
1376        err = -ENOMEM;
1377        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1378        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1379                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1380                if (!iov)
1381                        goto out_put;
1382        }
1383
1384        /* This will also move the address data into kernel space */
1385        err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1386        if (err < 0) 
1387                goto out_freeiov;
1388        total_len = err;
1389
1390        err = -ENOBUFS;
1391
1392        if (msg_sys.msg_controllen > INT_MAX)
1393                goto out_freeiov;
1394        ctl_len = msg_sys.msg_controllen; 
1395        if (ctl_len) 
1396        {
1397                if (ctl_len > sizeof(ctl))
1398                {
1399                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1400                        if (ctl_buf == NULL) 
1401                                goto out_freeiov;
1402                }
1403                err = -EFAULT;
1404                if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
1405                        goto out_freectl;
1406                msg_sys.msg_control = ctl_buf;
1407        }
1408        msg_sys.msg_flags = flags;
1409
1410        if (sock->file->f_flags & O_NONBLOCK)
1411                msg_sys.msg_flags |= MSG_DONTWAIT;
1412        err = sock_sendmsg(sock, &msg_sys, total_len);
1413
1414out_freectl:
1415        if (ctl_buf != ctl)    
1416                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1417out_freeiov:
1418        if (iov != iovstack)
1419                sock_kfree_s(sock->sk, iov, iov_size);
1420out_put:
1421        sockfd_put(sock);
1422out:       
1423        return err;
1424}
1425
1426/*
1427 *      BSD recvmsg interface
1428 */
1429
1430asmlinkage long sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1431{
1432        struct socket *sock;
1433        struct iovec iovstack[UIO_FASTIOV];
1434        struct iovec *iov=iovstack;
1435        struct msghdr msg_sys;
1436        unsigned long cmsg_ptr;
1437        int err, iov_size, total_len, len;
1438
1439        /* kernel mode address */
1440        char addr[MAX_SOCK_ADDR];
1441
1442        /* user mode address pointers */
1443        struct sockaddr *uaddr;
1444        int *uaddr_len;
1445        
1446        err=-EFAULT;
1447        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1448                goto out;
1449
1450        sock = sockfd_lookup(fd, &err);
1451        if (!sock)
1452                goto out;
1453
1454        err = -EINVAL;
1455        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1456                goto out_put;
1457        
1458        /* Check whether to allocate the iovec area*/
1459        err = -ENOMEM;
1460        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1461        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1462                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1463                if (!iov)
1464                        goto out_put;
1465        }
1466
1467        /*
1468         *      Save the user-mode address (verify_iovec will change the
1469         *      kernel msghdr to use the kernel address space)
1470         */
1471         
1472        uaddr = msg_sys.msg_name;
1473        uaddr_len = &msg->msg_namelen;
1474        err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1475        if (err < 0)
1476                goto out_freeiov;
1477        total_len=err;
1478
1479        cmsg_ptr = (unsigned long)msg_sys.msg_control;
1480        msg_sys.msg_flags = 0;
1481        
1482        if (sock->file->f_flags & O_NONBLOCK)
1483                flags |= MSG_DONTWAIT;
1484        err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1485        if (err < 0)
1486                goto out_freeiov;
1487        len = err;
1488
1489        if (uaddr != NULL && msg_sys.msg_namelen) {
1490                err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1491                if (err < 0)
1492                        goto out_freeiov;
1493        }
1494        err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1495        if (err)
1496                goto out_freeiov;
1497        err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
1498                                                         &msg->msg_controllen);
1499        if (err)
1500                goto out_freeiov;
1501        err = len;
1502
1503out_freeiov:
1504        if (iov != iovstack)
1505                sock_kfree_s(sock->sk, iov, iov_size);
1506out_put:
1507        sockfd_put(sock);
1508out:
1509        return err;
1510}
1511
1512
1513/*
1514 *      Perform a file control on a socket file descriptor.
1515 *
1516 *      Doesn't acquire a fd lock, because no network fcntl
1517 *      function sleeps currently.
1518 */
1519
1520int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1521{
1522        struct socket *sock;
1523
1524        sock = socki_lookup (filp->f_dentry->d_inode);
1525        if (sock && sock->ops)
1526                return sock_no_fcntl(sock, cmd, arg);
1527        return(-EINVAL);
1528}
1529
1530/* Argument list sizes for sys_socketcall */
1531#define AL(x) ((x) * sizeof(unsigned long))
1532static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1533                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1534                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1535#undef AL
1536
1537/*
1538 *      System call vectors. 
1539 *
1540 *      Argument checking cleaned up. Saved 20% in size.
1541 *  This function doesn't need to set the kernel lock because
1542 *  it is set by the callees. 
1543 */
1544
1545asmlinkage long sys_socketcall(int call, unsigned long *args)
1546{
1547        unsigned long a[6];
1548        unsigned long a0,a1;
1549        int err;
1550
1551        if(call<1||call>SYS_RECVMSG)
1552                return -EINVAL;
1553
1554        /* copy_from_user should be SMP safe. */
1555        if (copy_from_user(a, args, nargs[call]))
1556                return -EFAULT;
1557                
1558        a0=a[0];
1559        a1=a[1];
1560        
1561        switch(call) 
1562        {
1563                case SYS_SOCKET:
1564                        err = sys_socket(a0,a1,a[2]);
1565                        break;
1566                case SYS_BIND:
1567                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1568                        break;
1569                case SYS_CONNECT:
1570                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1571                        break;
1572                case SYS_LISTEN:
1573                        err = sys_listen(a0,a1);
1574                        break;
1575                case SYS_ACCEPT:
1576                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1577                        break;
1578                case SYS_GETSOCKNAME:
1579                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1580                        break;
1581                case SYS_GETPEERNAME:
1582                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1583                        break;
1584                case SYS_SOCKETPAIR:
1585                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1586                        break;
1587                case SYS_SEND:
1588                        err = sys_send(a0, (void *)a1, a[2], a[3]);
1589                        break;
1590                case SYS_SENDTO:
1591                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
1592                                         (struct sockaddr *)a[4], a[5]);
1593                        break;
1594                case SYS_RECV:
1595                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
1596                        break;
1597                case SYS_RECVFROM:
1598                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1599                                           (struct sockaddr *)a[4], (int *)a[5]);
1600                        break;
1601                case SYS_SHUTDOWN:
1602                        err = sys_shutdown(a0,a1);
1603                        break;
1604                case SYS_SETSOCKOPT:
1605                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1606                        break;
1607                case SYS_GETSOCKOPT:
1608                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1609                        break;
1610                case SYS_SENDMSG:
1611                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1612                        break;
1613                case SYS_RECVMSG:
1614                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1615                        break;
1616                default:
1617                        err = -EINVAL;
1618                        break;
1619        }
1620        return err;
1621}
1622
1623/*
1624 *      This function is called by a protocol handler that wants to
1625 *      advertise its address family, and have it linked into the
1626 *      SOCKET module.
1627 */
1628
1629int sock_register(struct net_proto_family *ops)
1630{
1631        int err;
1632
1633        if (ops->family >= NPROTO) {
1634                printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1635                return -ENOBUFS;
1636        }
1637        net_family_write_lock();
1638        err = -EEXIST;
1639        if (net_families[ops->family] == NULL) {
1640                net_families[ops->family]=ops;
1641                err = 0;
1642        }
1643        net_family_write_unlock();
1644        return err;
1645}
1646
1647/*
1648 *      This function is called by a protocol handler that wants to
1649 *      remove its address family, and have it unlinked from the
1650 *      SOCKET module.
1651 */
1652
1653int sock_unregister(int family)
1654{
1655        if (family < 0 || family >= NPROTO)
1656                return -1;
1657
1658        net_family_write_lock();
1659        net_families[family]=NULL;
1660        net_family_write_unlock();
1661        return 0;
1662}
1663
1664
1665extern void sk_init(void);
1666
1667#ifdef CONFIG_WAN_ROUTER
1668extern void wanrouter_init(void);
1669#endif
1670
1671#ifdef CONFIG_BLUEZ
1672extern void bluez_init(void);
1673#endif
1674
1675void __init sock_init(void)
1676{
1677        int i;
1678
1679        printk(KERN_INFO "Linux NET4.0 for Linux 2.4\n");
1680        printk(KERN_INFO "Based upon Swansea University Computer Society NET3.039\n");
1681
1682        /*
1683         *      Initialize all address (protocol) families. 
1684         */
1685         
1686        for (i = 0; i < NPROTO; i++) 
1687                net_families[i] = NULL;
1688
1689        /*
1690         *      Initialize sock SLAB cache.
1691         */
1692         
1693        sk_init();
1694
1695#ifdef SLAB_SKB
1696        /*
1697         *      Initialize skbuff SLAB cache 
1698         */
1699        skb_init();
1700#endif
1701
1702        /*
1703         *      Wan router layer. 
1704         */
1705
1706#ifdef CONFIG_WAN_ROUTER         
1707        wanrouter_init();
1708#endif
1709
1710        /*
1711         *      Initialize the protocols module. 
1712         */
1713
1714        register_filesystem(&sock_fs_type);
1715        sock_mnt = kern_mount(&sock_fs_type);
1716        /* The real protocol initialization is performed when
1717         *  do_initcalls is run.  
1718         */
1719
1720
1721        /*
1722         * The netlink device handler may be needed early.
1723         */
1724
1725#ifdef CONFIG_NET
1726        rtnetlink_init();
1727#endif
1728#ifdef CONFIG_NETLINK_DEV
1729        init_netlink();
1730#endif
1731#ifdef CONFIG_NETFILTER
1732        netfilter_init();
1733#endif
1734
1735#ifdef CONFIG_BLUEZ
1736        bluez_init();
1737#endif
1738}
1739
1740int socket_get_info(char *buffer, char **start, off_t offset, int length)
1741{
1742        int len, cpu;
1743        int counter = 0;
1744
1745        for (cpu=0; cpu<smp_num_cpus; cpu++)
1746                counter += sockets_in_use[cpu_logical_map(cpu)].counter;
1747
1748        /* It can be negative, by the way. 8) */
1749        if (counter < 0)
1750                counter = 0;
1751
1752        len = sprintf(buffer, "sockets: used %d\n", counter);
1753        if (offset >= len)
1754        {
1755                *start = buffer;
1756                return 0;
1757        }
1758        *start = buffer + offset;
1759        len -= offset;
1760        if (len > length)
1761                len = length;
1762        if (len < 0)
1763                len = 0;
1764        return len;
1765}
1766
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.