linux-old/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro, <bir7@leland.Stanford.Edu>
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *
  45 *
  46 *              This program is free software; you can redistribute it and/or
  47 *              modify it under the terms of the GNU General Public License
  48 *              as published by the Free Software Foundation; either version
  49 *              2 of the License, or (at your option) any later version.
  50 *
  51 *
  52 *      This module is effectively the top level interface to the BSD socket
  53 *      paradigm. 
  54 *
  55 */
  56
  57#include <linux/config.h>
  58#include <linux/signal.h>
  59#include <linux/errno.h>
  60#include <linux/sched.h>
  61#include <linux/mm.h>
  62#include <linux/smp.h>
  63#include <linux/smp_lock.h>
  64#include <linux/kernel.h>
  65#include <linux/major.h>
  66#include <linux/stat.h>
  67#include <linux/socket.h>
  68#include <linux/fcntl.h>
  69#include <linux/file.h>
  70#include <linux/net.h>
  71#include <linux/interrupt.h>
  72#include <linux/netdevice.h>
  73#include <linux/proc_fs.h>
  74#include <linux/firewall.h>
  75#include <linux/wanrouter.h>
  76#include <linux/init.h>
  77
  78#if defined(CONFIG_KERNELD) && defined(CONFIG_NET)
  79#include <linux/kerneld.h>
  80#endif
  81
  82#include <net/netlink.h>
  83
  84#include <asm/system.h>
  85#include <asm/uaccess.h>
  86
  87#include <linux/inet.h>
  88#include <linux/netdevice.h>
  89#include <net/ip.h>
  90#include <net/protocol.h>
  91#include <net/rarp.h>
  92#include <net/tcp.h>
  93#include <net/udp.h>
  94#include <linux/skbuff.h>
  95#include <net/sock.h>
  96#include <net/scm.h>
  97
  98
  99static long long sock_lseek(struct file *file, long long offset, int whence);
 100static long sock_read(struct inode *inode, struct file *file,
 101                      char *buf, unsigned long size);
 102static long sock_write(struct inode *inode, struct file *file,
 103                       const char *buf, unsigned long size);
 104
 105static int sock_close(struct inode *inode, struct file *file);
 106static unsigned int sock_poll(struct file *file, poll_table *wait);
 107static int sock_ioctl(struct inode *inode, struct file *file,
 108                      unsigned int cmd, unsigned long arg);
 109static int sock_fasync(struct file *filp, int on);
 110
 111
 112/*
 113 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 114 *      in the operation structures but are done directly via the socketcall() multiplexor.
 115 */
 116
 117static struct file_operations socket_file_ops = {
 118        sock_lseek,
 119        sock_read,
 120        sock_write,
 121        NULL,                   /* readdir */
 122        sock_poll,
 123        sock_ioctl,
 124        NULL,                   /* mmap */
 125        NULL,                   /* no special open code... */
 126        sock_close,
 127        NULL,                   /* no fsync */
 128        sock_fasync
 129};
 130
 131/*
 132 *      The protocol list. Each protocol is registered in here.
 133 */
 134
 135struct net_proto_family *net_families[NPROTO];
 136
 137/*
 138 *      Statistics counters of the socket lists
 139 */
 140
 141static int sockets_in_use  = 0;
 142
 143/*
 144 *      Support routines. Move socket addresses back and forth across the kernel/user
 145 *      divide and look after the messy bits.
 146 */
 147
 148#define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
 149                                           16 for IP, 16 for IPX,
 150                                           24 for IPv6,
 151                                           about 80 for AX.25 
 152                                           must be at least one bigger than
 153                                           the AF_UNIX size (see net/unix/af_unix.c
 154                                           :unix_mkname()).  
 155                                         */
 156 
 157int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 158{
 159        if(ulen<0||ulen>MAX_SOCK_ADDR)
 160                return -EINVAL;
 161        if(ulen==0)
 162                return 0;
 163        if(copy_from_user(kaddr,uaddr,ulen))
 164                return -EFAULT;
 165        return 0;
 166}
 167
 168int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 169{
 170        int err;
 171        int len;
 172
 173        if((err=get_user(len, ulen)))
 174                return err;
 175        if(len>klen)
 176                len=klen;
 177        if(len<0 || len> MAX_SOCK_ADDR)
 178                return -EINVAL;
 179        if(len)
 180        {
 181                if(copy_to_user(uaddr,kaddr,len))
 182                        return -EFAULT;
 183        }
 184        /*
 185         *      "fromlen shall refer to the value before truncation.."
 186         *                      1003.1g
 187         */
 188        return __put_user(klen, ulen);
 189}
 190
 191/*
 192 *      Obtains the first available file descriptor and sets it up for use. 
 193 */
 194
 195static int get_fd(struct inode *inode)
 196{
 197        int fd;
 198
 199        /*
 200         *      Find a file descriptor suitable for return to the user. 
 201         */
 202
 203        fd = get_unused_fd();
 204        if (fd >= 0) {
 205                struct file *file = get_empty_filp();
 206
 207                if (!file) {
 208                        put_unused_fd(fd);
 209                        return -ENFILE;
 210                }
 211
 212                file->f_dentry = d_alloc_root(inode, NULL);
 213                if (!file->f_dentry) {
 214                        put_filp(file);
 215                        put_unused_fd(fd);
 216                        return -ENOMEM;
 217                }
 218
 219                /*
 220                 * The socket maintains a reference to the inode, so we
 221                 * have to increment the count.
 222                 */
 223                inode->i_count++;
 224
 225                current->files->fd[fd] = file;
 226                file->f_op = &socket_file_ops;
 227                file->f_mode = 3;
 228                file->f_flags = O_RDWR;
 229                file->f_pos = 0;
 230        }
 231        return fd;
 232}
 233
 234extern __inline__ struct socket *socki_lookup(struct inode *inode)
 235{
 236        return &inode->u.socket_i;
 237}
 238
 239/*
 240 *      Go from a file number to its socket slot.
 241 */
 242
 243extern __inline__ struct socket *sockfd_lookup(int fd, int *err)
 244{
 245        struct file *file;
 246        struct inode *inode;
 247
 248        if (!(file = fget(fd)))
 249        {
 250                *err = -EBADF;
 251                return NULL;
 252        }
 253
 254        inode = file->f_dentry->d_inode;
 255        if (!inode || !inode->i_sock || !socki_lookup(inode))
 256        {
 257                *err = -ENOTSOCK;
 258                fput(file);
 259                return NULL;
 260        }
 261
 262        return socki_lookup(inode);
 263}
 264
 265extern __inline__ void sockfd_put(struct socket *sock)
 266{
 267        fput(sock->file);
 268}
 269
 270/*
 271 *      Allocate a socket.
 272 */
 273
 274struct socket *sock_alloc(void)
 275{
 276        struct inode * inode;
 277        struct socket * sock;
 278
 279        inode = get_empty_inode();
 280        if (!inode)
 281                return NULL;
 282
 283        sock = socki_lookup(inode);
 284
 285        inode->i_mode = S_IFSOCK;
 286        inode->i_sock = 1;
 287        inode->i_uid = current->uid;
 288        inode->i_gid = current->gid;
 289
 290        sock->inode = inode;
 291        init_waitqueue(&sock->wait);
 292        sock->fasync_list = NULL;
 293        sock->state = SS_UNCONNECTED;
 294        sock->flags = 0;
 295        sock->ops = NULL;
 296        sock->sk = NULL;
 297        sock->file = NULL;
 298
 299        sockets_in_use++;
 300        return sock;
 301}
 302
 303void sock_release(struct socket *sock)
 304{
 305        int oldstate;
 306
 307        if ((oldstate = sock->state) != SS_UNCONNECTED)
 308                sock->state = SS_DISCONNECTING;
 309
 310        if (sock->ops) 
 311                sock->ops->release(sock, NULL);
 312
 313        --sockets_in_use;       /* Bookkeeping.. */
 314        sock->file=NULL;
 315        iput(sock->inode);
 316}
 317
 318int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 319{
 320        int err;
 321        struct scm_cookie scm;
 322
 323        err = scm_send(sock, msg, &scm);
 324        if (err < 0)
 325                return err;
 326
 327        err = sock->ops->sendmsg(sock, msg, size, &scm);
 328
 329        scm_destroy(&scm);
 330
 331        return err;
 332}
 333
 334int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 335{
 336        struct scm_cookie scm;
 337
 338        memset(&scm, 0, sizeof(scm));
 339
 340        size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
 341
 342        if (size < 0)
 343                return size;
 344
 345        scm_recv(sock, msg, &scm, flags);
 346
 347        return size;
 348}
 349
 350
 351/*
 352 *      Sockets are not seekable.
 353 */
 354
 355static long long sock_lseek(struct file *file,long long offset, int whence)
 356{
 357        return -ESPIPE;
 358}
 359
 360/*
 361 *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
 362 *      area ubuf...ubuf+size-1 is writable before asking the protocol.
 363 */
 364
 365static long sock_read(struct inode *inode, struct file *file,
 366                      char *ubuf, unsigned long size)
 367{
 368        struct socket *sock;
 369        struct iovec iov;
 370        struct msghdr msg;
 371
 372        sock = socki_lookup(inode); 
 373  
 374        if (size==0)            /* Match SYS5 behaviour */
 375                return 0;
 376
 377        msg.msg_name=NULL;
 378        msg.msg_namelen=0;
 379        msg.msg_iov=&iov;
 380        msg.msg_iovlen=1;
 381        msg.msg_control=NULL;
 382        msg.msg_controllen=0;
 383        iov.iov_base=ubuf;
 384        iov.iov_len=size;
 385
 386        return sock_recvmsg(sock, &msg, size,
 387                            !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT);
 388}
 389
 390
 391/*
 392 *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1 is
 393 *      readable by the user process.
 394 */
 395
 396static long sock_write(struct inode *inode, struct file *file,
 397                       const char *ubuf, unsigned long size)
 398{
 399        struct socket *sock;
 400        struct msghdr msg;
 401        struct iovec iov;
 402        
 403        sock = socki_lookup(inode); 
 404
 405        if(size==0)             /* Match SYS5 behaviour */
 406                return 0;
 407
 408        msg.msg_name=NULL;
 409        msg.msg_namelen=0;
 410        msg.msg_iov=&iov;
 411        msg.msg_iovlen=1;
 412        msg.msg_control=NULL;
 413        msg.msg_controllen=0;
 414        msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 415        iov.iov_base=(void *)ubuf;
 416        iov.iov_len=size;
 417        
 418        return sock_sendmsg(sock, &msg, size);
 419}
 420
 421int sock_readv_writev(int type, struct inode * inode, struct file * file,
 422                      const struct iovec * iov, long count, long size)
 423{
 424        struct msghdr msg;
 425        struct socket *sock;
 426
 427        sock = socki_lookup(inode);
 428
 429        msg.msg_name = NULL;
 430        msg.msg_namelen = 0;
 431        msg.msg_control = NULL;
 432        msg.msg_controllen = 0;
 433        msg.msg_iov = (struct iovec *) iov;
 434        msg.msg_iovlen = count;
 435        msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 436
 437        /* read() does a VERIFY_WRITE */
 438        if (type == VERIFY_WRITE)
 439                return sock_recvmsg(sock, &msg, size, msg.msg_flags);
 440        return sock_sendmsg(sock, &msg, size);
 441}
 442
 443
 444/*
 445 *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
 446 *      with it - that's up to the protocol still.
 447 */
 448
 449int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 450           unsigned long arg)
 451{
 452        struct socket *sock = socki_lookup(inode);
 453        return sock->ops->ioctl(sock, cmd, arg);
 454}
 455
 456
 457static unsigned int sock_poll(struct file *file, poll_table * wait)
 458{
 459        struct socket *sock;
 460
 461        sock = socki_lookup(file->f_dentry->d_inode);
 462
 463        /*
 464         *      We can't return errors to poll, so it's either yes or no. 
 465         */
 466
 467        return sock->ops->poll(sock, wait);
 468}
 469
 470
 471int sock_close(struct inode *inode, struct file *filp)
 472{
 473        /*
 474         *      It was possible the inode is NULL we were 
 475         *      closing an unfinished socket. 
 476         */
 477
 478        if (!inode)
 479        {
 480                printk(KERN_DEBUG "sock_close: NULL inode\n");
 481                return 0;
 482        }
 483        sock_fasync(filp, 0);
 484        sock_release(socki_lookup(inode));
 485        return 0;
 486}
 487
 488/*
 489 *      Update the socket async list
 490 */
 491 
 492static int sock_fasync(struct file *filp, int on)
 493{
 494        struct fasync_struct *fa, *fna=NULL, **prev;
 495        struct socket *sock;
 496        unsigned long flags;
 497        
 498        if (on)
 499        {
 500                fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 501                if(fna==NULL)
 502                        return -ENOMEM;
 503        }
 504
 505        sock = socki_lookup(filp->f_dentry->d_inode);
 506        
 507        prev=&(sock->fasync_list);
 508        
 509        save_flags(flags);
 510        cli();
 511        
 512        for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
 513                if (fa->fa_file==filp)
 514                        break;
 515        
 516        if(on)
 517        {
 518                if(fa!=NULL)
 519                {
 520                        kfree_s(fna,sizeof(struct fasync_struct));
 521                        restore_flags(flags);
 522                        return 0;
 523                }
 524                fna->fa_file=filp;
 525                fna->magic=FASYNC_MAGIC;
 526                fna->fa_next=sock->fasync_list;
 527                sock->fasync_list=fna;
 528        }
 529        else
 530        {
 531                if (fa!=NULL)
 532                {
 533                        *prev=fa->fa_next;
 534                        kfree_s(fa,sizeof(struct fasync_struct));
 535                }
 536        }
 537        restore_flags(flags);
 538        return 0;
 539}
 540
 541int sock_wake_async(struct socket *sock, int how)
 542{
 543        if (!sock || !sock->fasync_list)
 544                return -1;
 545        switch (how)
 546        {
 547                case 0:
 548                        kill_fasync(sock->fasync_list, SIGIO);
 549                        break;
 550                case 1:
 551                        if (!(sock->flags & SO_WAITDATA))
 552                                kill_fasync(sock->fasync_list, SIGIO);
 553                        break;
 554                case 2:
 555                        if (sock->flags & SO_NOSPACE)
 556                        {
 557                                kill_fasync(sock->fasync_list, SIGIO);
 558                                sock->flags &= ~SO_NOSPACE;
 559                        }
 560                        break;
 561        }
 562        return 0;
 563}
 564
 565
 566int sock_create(int family, int type, int protocol, struct socket **res)
 567{
 568        int i;
 569        struct socket *sock;
 570
 571        /*
 572         *      Check protocol is in range
 573         */
 574        if(family<0||family>=NPROTO)
 575                return -EINVAL;
 576                
 577#if defined(CONFIG_KERNELD) && defined(CONFIG_NET)
 578        /* Attempt to load a protocol module if the find failed. 
 579         * 
 580         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
 581         * requested real, full-featured networking support upon configuration.
 582         * Otherwise module support will break!
 583         */
 584        if (net_families[family]==NULL)
 585        {
 586                char module_name[30];
 587                sprintf(module_name,"net-pf-%d",family);
 588                request_module(module_name);
 589        }
 590#endif
 591
 592        if (net_families[family]==NULL)
 593                return -EINVAL;
 594
 595/*
 596 *      Check that this is a type that we know how to manipulate and
 597 *      the protocol makes sense here. The family can still reject the
 598 *      protocol later.
 599 */
 600  
 601        if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
 602             type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM &&
 603#ifdef CONFIG_XTP
 604                type != SOCK_WEB  &&
 605#endif
 606             type != SOCK_PACKET) || protocol < 0)
 607                        return -EINVAL;
 608
 609/*
 610 *      Allocate the socket and allow the family to set things up. if
 611 *      the protocol is 0, the family is instructed to select an appropriate
 612 *      default.
 613 */
 614
 615        if (!(sock = sock_alloc())) 
 616        {
 617                printk(KERN_WARNING "socket: no more sockets\n");
 618                return -ENFILE;         /* Not exactly a match, but its the
 619                                           closest posix thing */
 620        }
 621
 622        sock->type   = type;
 623
 624        if ((i = net_families[family]->create(sock, protocol)) < 0) 
 625        {
 626                sock_release(sock);
 627                return i;
 628        }
 629
 630        *res = sock;
 631        return 0;
 632}
 633
 634asmlinkage int sys_socket(int family, int type, int protocol)
 635{
 636        int retval;
 637        struct socket *sock;
 638
 639        lock_kernel();
 640
 641        retval = sock_create(family, type, protocol, &sock);
 642        if (retval < 0)
 643                goto out;
 644
 645        retval = get_fd(sock->inode);
 646        if (retval < 0) {
 647                sock_release(sock);
 648                goto out;
 649        }
 650
 651        sock->file = current->files->fd[retval];
 652out:
 653        unlock_kernel();
 654        return retval;
 655}
 656
 657/*
 658 *      Create a pair of connected sockets.
 659 */
 660
 661asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2])
 662{
 663        int fd1, fd2, i;
 664        struct socket *sock1=NULL, *sock2=NULL;
 665        int err;
 666
 667        lock_kernel();
 668
 669        /*
 670         * Obtain the first socket and check if the underlying protocol
 671         * supports the socketpair call.
 672         */
 673
 674        if ((fd1 = sys_socket(family, type, protocol)) < 0) {
 675                err = fd1;
 676                goto out;
 677        }
 678
 679        sock1 = sockfd_lookup(fd1, &err);
 680        if (!sock1)
 681                goto out;
 682        /*
 683         *      Now grab another socket and try to connect the two together. 
 684         */
 685        err = -EINVAL;
 686        if ((fd2 = sys_socket(family, type, protocol)) < 0) 
 687        {
 688                sys_close(fd1);
 689                goto out;
 690        }
 691
 692        sock2 = sockfd_lookup(fd2,&err);
 693        if (!sock2)
 694                goto out;
 695        if ((i = sock1->ops->socketpair(sock1, sock2)) < 0) 
 696        {
 697                sys_close(fd1);
 698                sys_close(fd2);
 699                err = i;
 700        }
 701        else
 702        {
 703                err = put_user(fd1, &usockvec[0]); 
 704                if (!err) 
 705                        err = put_user(fd2, &usockvec[1]);
 706                if (err) {
 707                        sys_close(fd1);
 708                        sys_close(fd2);
 709                }
 710        }
 711out:
 712        if(sock1)
 713                sockfd_put(sock1);
 714        if(sock2)
 715                sockfd_put(sock2);
 716        unlock_kernel();
 717        return err;
 718}
 719
 720
 721/*
 722 *      Bind a name to a socket. Nothing much to do here since it's
 723 *      the protocol's responsibility to handle the local address.
 724 *
 725 *      We move the socket address to kernel space before we call
 726 *      the protocol layer (having also checked the address is ok).
 727 */
 728 
 729asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
 730{
 731        struct socket *sock;
 732        char address[MAX_SOCK_ADDR];
 733        int err;
 734
 735        lock_kernel();
 736        if((sock = sockfd_lookup(fd,&err))!=NULL)
 737        {
 738                if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
 739                        err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
 740                sockfd_put(sock);
 741        }                       
 742        unlock_kernel();
 743        return err;
 744}
 745
 746
 747/*
 748 *      Perform a listen. Basically, we allow the protocol to do anything
 749 *      necessary for a listen, and if that works, we mark the socket as
 750 *      ready for listening.
 751 */
 752
 753asmlinkage int sys_listen(int fd, int backlog)
 754{
 755        struct socket *sock;
 756        int err;
 757        
 758        lock_kernel();
 759        if((sock = sockfd_lookup(fd, &err))!=NULL)
 760        {
 761                err=sock->ops->listen(sock, backlog);
 762                sockfd_put(sock);
 763        }
 764        unlock_kernel();
 765        return err;
 766}
 767
 768
 769/*
 770 *      For accept, we attempt to create a new socket, set up the link
 771 *      with the client, wake up the client, then return the new
 772 *      connected fd. We collect the address of the connector in kernel
 773 *      space and move it to user at the very end. This is unclean because
 774 *      we open the socket then return an error.
 775 *
 776 *      1003.1g adds the ability to recvmsg() to query connection pending
 777 *      status to recvmsg. We need to add that support in a way thats
 778 *      clean when we restucture accept also.
 779 */
 780
 781asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
 782{
 783        struct inode *inode;
 784        struct socket *sock, *newsock;
 785        int err;
 786        char address[MAX_SOCK_ADDR];
 787        int len;
 788
 789        lock_kernel();
 790restart:
 791        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 792        {
 793                if (!(newsock = sock_alloc())) 
 794                {
 795                        err=-EMFILE;
 796                        goto out;
 797                }
 798
 799                inode = newsock->inode;
 800                newsock->type = sock->type;
 801
 802                if ((err = sock->ops->dup(newsock, sock)) < 0) 
 803                {
 804                        sock_release(newsock);
 805                        goto out;
 806                }
 807
 808                err = newsock->ops->accept(sock, newsock, current->files->fd[fd]->f_flags);
 809
 810                if (err < 0)
 811                {
 812                        sock_release(newsock);
 813                        goto out;
 814                }
 815                newsock = socki_lookup(inode);
 816
 817                if ((err = get_fd(inode)) < 0) 
 818                {
 819                        sock_release(newsock);
 820                        err=-EINVAL;
 821                        goto out;
 822                }
 823
 824                newsock->file = current->files->fd[err];
 825        
 826                if (upeer_sockaddr)
 827                {
 828                        /* Handle the race where the accept works and we
 829                           then getname after it has closed again */
 830                        if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1)<0)
 831                        {
 832                                sys_close(err);
 833                                goto restart;
 834                        }
 835                        move_addr_to_user(address,len, upeer_sockaddr, upeer_addrlen);
 836                }
 837out:
 838                sockfd_put(sock);               
 839        }
 840        unlock_kernel();
 841        return err;
 842}
 843
 844
 845/*
 846 *      Attempt to connect to a socket with the server address.  The address
 847 *      is in user space so we verify it is OK and move it to kernel space.
 848 *
 849 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
 850 *      break bindings
 851 *
 852 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
 853 *      other SEQPACKET protocols that take time to connect() as it doesn't
 854 *      include the -EINPROGRESS status for such sockets.
 855 */
 856 
 857asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
 858{
 859        struct socket *sock;
 860        char address[MAX_SOCK_ADDR];
 861        int err;
 862
 863        lock_kernel();
 864        if ((sock = sockfd_lookup(fd,&err))!=NULL)
 865        {
 866                if((err=move_addr_to_kernel(uservaddr,addrlen,address))>=0)
 867                        err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
 868                             current->files->fd[fd]->f_flags);
 869                sockfd_put(sock);
 870        }
 871        unlock_kernel();
 872        return err;
 873}
 874
 875/*
 876 *      Get the local address ('name') of a socket object. Move the obtained
 877 *      name to user space.
 878 */
 879
 880asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 881{
 882        struct socket *sock;
 883        char address[MAX_SOCK_ADDR];
 884        int len;
 885        int err;
 886        
 887        lock_kernel();
 888        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 889        {
 890                if((err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 0))==0)
 891                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
 892                sockfd_put(sock);
 893        }
 894        unlock_kernel();
 895        return err;
 896}
 897
 898/*
 899 *      Get the remote address ('name') of a socket object. Move the obtained
 900 *      name to user space.
 901 */
 902 
 903asmlinkage int sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 904{
 905        struct socket *sock;
 906        char address[MAX_SOCK_ADDR];
 907        int len;
 908        int err;
 909
 910        lock_kernel();
 911        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 912        {
 913                if((err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 1))==0)
 914                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
 915                sockfd_put(sock);
 916        }
 917        unlock_kernel();
 918        return err;
 919}
 920
 921/*
 922 *      Send a datagram down a socket. The datagram as with write() is
 923 *      in user space. We check it can be read.
 924 */
 925
 926asmlinkage int sys_send(int fd, void * buff, size_t len, unsigned flags)
 927{
 928        struct socket *sock;
 929        int err;
 930        struct msghdr msg;
 931        struct iovec iov;
 932
 933        lock_kernel();
 934        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 935        {
 936                if(len>=0)
 937                {
 938                        iov.iov_base=buff;
 939                        iov.iov_len=len;
 940                        msg.msg_name=NULL;
 941                        msg.msg_namelen=0;
 942                        msg.msg_iov=&iov;
 943                        msg.msg_iovlen=1;
 944                        msg.msg_control=NULL;
 945                        msg.msg_controllen=0;
 946                        if (current->files->fd[fd]->f_flags & O_NONBLOCK)
 947                                flags |= MSG_DONTWAIT;
 948                        msg.msg_flags=flags;
 949                        err=sock_sendmsg(sock, &msg, len);
 950                }
 951                else
 952                        err=-EINVAL;
 953                sockfd_put(sock);
 954        }
 955        unlock_kernel();
 956        return err;
 957}
 958
 959/*
 960 *      Send a datagram to a given address. We move the address into kernel
 961 *      space and check the user space data area is readable before invoking
 962 *      the protocol.
 963 */
 964
 965asmlinkage int sys_sendto(int fd, void * buff, size_t len, unsigned flags,
 966           struct sockaddr *addr, int addr_len)
 967{
 968        struct socket *sock;
 969        char address[MAX_SOCK_ADDR];
 970        int err;
 971        struct msghdr msg;
 972        struct iovec iov;
 973        
 974        lock_kernel();
 975        if ((sock = sockfd_lookup(fd,&err))!=NULL)
 976        {
 977                iov.iov_base=buff;
 978                iov.iov_len=len;
 979                msg.msg_name=NULL;
 980                msg.msg_iov=&iov;
 981                msg.msg_iovlen=1;
 982                msg.msg_control=NULL;
 983                msg.msg_controllen=0;
 984                msg.msg_namelen=addr_len;
 985                if(addr)
 986                {
 987                        err=move_addr_to_kernel(addr,addr_len,address);
 988                        if (err < 0)
 989                                goto bad;
 990                        msg.msg_name=address;
 991                }
 992                if (current->files->fd[fd]->f_flags & O_NONBLOCK)
 993                        flags |= MSG_DONTWAIT;
 994                msg.msg_flags=flags;
 995                err=sock_sendmsg(sock, &msg, len);
 996bad:            
 997                sockfd_put(sock);
 998        }
 999        unlock_kernel();
1000        return err;
1001}
1002
1003
1004
1005/*
1006 *      Receive a frame from the socket and optionally record the address of the 
1007 *      sender. We verify the buffers are writable and if needed move the
1008 *      sender address from kernel to user space.
1009 */
1010
1011asmlinkage int sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
1012             struct sockaddr *addr, int *addr_len)
1013{
1014        struct socket *sock;
1015        struct iovec iov;
1016        struct msghdr msg;
1017        char address[MAX_SOCK_ADDR];
1018        int err,err2;
1019
1020        lock_kernel();
1021        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1022        {  
1023                msg.msg_control=NULL;
1024                msg.msg_controllen=0;
1025                msg.msg_iovlen=1;
1026                msg.msg_iov=&iov;
1027                iov.iov_len=size;
1028                iov.iov_base=ubuf;
1029                msg.msg_name=address;
1030                msg.msg_namelen=MAX_SOCK_ADDR;
1031                err=sock_recvmsg(sock, &msg, size,
1032                          (current->files->fd[fd]->f_flags & O_NONBLOCK) ? (flags | MSG_DONTWAIT) : flags);
1033                if(err>=0 && addr!=NULL)
1034                {
1035                        err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1036                        if(err2<0)
1037                                err=err2;
1038                }
1039                sockfd_put(sock);                       
1040        }               
1041        unlock_kernel();
1042        return err;
1043}
1044
1045/*
1046 *      Receive a datagram from a socket. 
1047 */
1048
1049asmlinkage int sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1050{
1051        return sys_recvfrom(fd,ubuf,size,flags, NULL, NULL);
1052}
1053
1054/*
1055 *      Set a socket option. Because we don't know the option lengths we have
1056 *      to pass the user mode parameter for the protocols to sort out.
1057 */
1058 
1059asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1060{
1061        int err;
1062        struct socket *sock;
1063        
1064        lock_kernel();
1065        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1066        {
1067                if (level == SOL_SOCKET)
1068                        err=sock_setsockopt(sock,level,optname,optval,optlen);
1069                else
1070                        err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1071                sockfd_put(sock);
1072        }
1073        unlock_kernel();
1074        return err;
1075}
1076
1077/*
1078 *      Get a socket option. Because we don't know the option lengths we have
1079 *      to pass a user mode parameter for the protocols to sort out.
1080 */
1081
1082asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1083{
1084        int err;
1085        struct socket *sock;
1086
1087        lock_kernel();
1088        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1089        {
1090                if (level == SOL_SOCKET)
1091                        err=sock_getsockopt(sock,level,optname,optval,optlen);
1092                else
1093                        err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1094                sockfd_put(sock);
1095        }
1096        unlock_kernel();
1097        return err;
1098}
1099
1100
1101/*
1102 *      Shutdown a socket.
1103 */
1104 
1105asmlinkage int sys_shutdown(int fd, int how)
1106{
1107        int err;
1108        struct socket *sock;
1109
1110        lock_kernel();
1111        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1112        {
1113                err=sock->ops->shutdown(sock, how);
1114                sockfd_put(sock);
1115        }
1116        unlock_kernel();
1117        return err;
1118}
1119
1120/*
1121 *      BSD sendmsg interface
1122 */
1123 
1124asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1125{
1126        struct socket *sock;
1127        char address[MAX_SOCK_ADDR];
1128        struct iovec iov[UIO_FASTIOV];
1129        unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1130        struct msghdr msg_sys;
1131        int err= -EINVAL;
1132        int total_len;
1133        unsigned char *ctl_buf = ctl;
1134        
1135        lock_kernel();
1136
1137        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1138        {
1139                err=-EFAULT;
1140                goto out; 
1141        }
1142        /* do not move before msg_sys is valid */
1143        if (msg_sys.msg_iovlen>UIO_MAXIOV)
1144                goto out;
1145        /* This will also move the address data into kernel space */
1146        err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1147        if (err < 0)
1148                goto out;
1149        total_len=err;
1150
1151        if (msg_sys.msg_controllen) 
1152        {
1153                /* XXX We just limit the buffer and assume that the 
1154                 * skbuff accounting stops it from going too far.
1155                 * I hope this is correct.
1156                 */
1157                if (msg_sys.msg_controllen > sizeof(ctl) &&
1158                        msg_sys.msg_controllen <= 256)
1159                {
1160                        ctl_buf = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
1161                        if (ctl_buf == NULL) 
1162                        {
1163                                err = -ENOBUFS;
1164                                goto failed2;
1165                        }
1166                }
1167                if (copy_from_user(ctl_buf, msg_sys.msg_control, 
1168                                            msg_sys.msg_controllen)) {
1169                        err = -EFAULT;
1170                        goto failed;
1171                }
1172                msg_sys.msg_control = ctl_buf;
1173        }
1174        msg_sys.msg_flags = flags;
1175        if (current->files->fd[fd]->f_flags & O_NONBLOCK)
1176                msg_sys.msg_flags |= MSG_DONTWAIT;
1177
1178        if ((sock = sockfd_lookup(fd,&err))!=NULL)
1179        {
1180                err = sock_sendmsg(sock, &msg_sys, total_len);
1181                sockfd_put(sock);
1182        }
1183
1184failed:
1185        if (ctl_buf != ctl)
1186                kfree_s(ctl_buf, msg_sys.msg_controllen);
1187failed2:
1188        if (msg_sys.msg_iov != iov)
1189                kfree(msg_sys.msg_iov);
1190out:
1191        unlock_kernel();
1192        return err;
1193}
1194
1195/*
1196 *      BSD recvmsg interface
1197 */
1198 
1199asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1200{
1201        struct socket *sock;
1202        struct iovec iovstack[UIO_FASTIOV];
1203        struct iovec *iov=iovstack;
1204        struct msghdr msg_sys;
1205        unsigned long cmsg_ptr;
1206        int err;
1207        int total_len;
1208        int len = 0;
1209
1210        /* kernel mode address */
1211        char addr[MAX_SOCK_ADDR];
1212
1213        /* user mode address pointers */
1214        struct sockaddr *uaddr;
1215        int *uaddr_len;
1216        
1217        lock_kernel();
1218        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1219        {
1220                err=-EFAULT;
1221                goto out;
1222        }
1223        if (msg_sys.msg_iovlen>UIO_MAXIOV)
1224        {
1225                err=-EINVAL;
1226                goto out;
1227        }
1228        
1229        /*
1230         *      Save the user-mode address (verify_iovec will change the
1231         *      kernel msghdr to use the kernel address space)
1232         */
1233         
1234        uaddr = msg_sys.msg_name;
1235        uaddr_len = &msg->msg_namelen;
1236        err=verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1237        if (err<0)
1238                goto out;
1239
1240        total_len=err;
1241
1242        cmsg_ptr = (unsigned long)msg_sys.msg_control;
1243        msg_sys.msg_flags = 0;
1244        
1245        if (current->files->fd[fd]->f_flags&O_NONBLOCK)
1246                flags |= MSG_DONTWAIT;
1247
1248        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1249        {
1250                err=sock_recvmsg(sock, &msg_sys, total_len, flags);
1251                if(err>=0)
1252                        len=err;
1253                sockfd_put(sock);
1254        }
1255        if (msg_sys.msg_iov != iov)
1256                kfree(msg_sys.msg_iov);
1257
1258        if (uaddr != NULL && err>=0)
1259                err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1260        if (err>=0) {
1261                err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1262                if (!err)
1263                        err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
1264                                                         &msg->msg_controllen);
1265        }
1266out:
1267        unlock_kernel();
1268        if(err<0)
1269                return err;
1270        return len;
1271}
1272
1273
1274/*
1275 *      Perform a file control on a socket file descriptor.
1276 *
1277 *      FIXME: does this need an fd lock ?
1278 */
1279
1280int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1281{
1282        struct socket *sock;
1283
1284        sock = socki_lookup (filp->f_dentry->d_inode);
1285        if (sock && sock->ops)
1286                return sock->ops->fcntl(sock, cmd, arg);
1287        return(-EINVAL);
1288}
1289
1290/* Argument list sizes for sys_socketcall */
1291#define AL(x) ((x) * sizeof(unsigned long))
1292static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1293                                                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1294                                                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1295#undef AL
1296
1297/*
1298 *      System call vectors. 
1299 *
1300 *      Argument checking cleaned up. Saved 20% in size.
1301 *  This function doesn't need to set the kernel lock because
1302 *  it is set by the callees. 
1303 */
1304
1305asmlinkage int sys_socketcall(int call, unsigned long *args)
1306{
1307        unsigned long a[6];
1308        unsigned long a0,a1;
1309        int err;
1310
1311        if(call<1||call>SYS_RECVMSG)
1312                return -EINVAL;
1313
1314        /* copy_from_user should be SMP safe. */
1315        if (copy_from_user(a, args, nargs[call]))
1316                return -EFAULT;
1317                
1318        a0=a[0];
1319        a1=a[1];
1320        
1321        switch(call) 
1322        {
1323                case SYS_SOCKET:
1324                        err = sys_socket(a0,a1,a[2]);
1325                        break;
1326                case SYS_BIND:
1327                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1328                        break;
1329                case SYS_CONNECT:
1330                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1331                        break;
1332                case SYS_LISTEN:
1333                        err = sys_listen(a0,a1);
1334                        break;
1335                case SYS_ACCEPT:
1336                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1337                        break;
1338                case SYS_GETSOCKNAME:
1339                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1340                        break;
1341                case SYS_GETPEERNAME:
1342                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1343                        break;
1344                case SYS_SOCKETPAIR:
1345                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1346                        break;
1347                case SYS_SEND:
1348                        err = sys_send(a0, (void *)a1, a[2], a[3]);
1349                        break;
1350                case SYS_SENDTO:
1351                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
1352                                         (struct sockaddr *)a[4], a[5]);
1353                        break;
1354                case SYS_RECV:
1355                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
1356                        break;
1357                case SYS_RECVFROM:
1358                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1359                                           (struct sockaddr *)a[4], (int *)a[5]);
1360                        break;
1361                case SYS_SHUTDOWN:
1362                        err = sys_shutdown(a0,a1);
1363                        break;
1364                case SYS_SETSOCKOPT:
1365                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1366                        break;
1367                case SYS_GETSOCKOPT:
1368                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1369                        break;
1370                case SYS_SENDMSG:
1371                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1372                        break;
1373                case SYS_RECVMSG:
1374                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1375                        break;
1376                default:
1377                        err = -EINVAL;
1378                        break;
1379        }
1380        return err;
1381}
1382
1383/*
1384 *      This function is called by a protocol handler that wants to
1385 *      advertise its address family, and have it linked into the
1386 *      SOCKET module.
1387 */
1388 
1389int sock_register(struct net_proto_family *ops)
1390{
1391        if (ops->family < 0 || ops->family >= NPROTO)
1392                return -1;
1393
1394        net_families[ops->family]=ops;
1395        return 0;
1396}
1397
1398/*
1399 *      This function is called by a protocol handler that wants to
1400 *      remove its address family, and have it unlinked from the
1401 *      SOCKET module.
1402 */
1403 
1404int sock_unregister(int family)
1405{
1406        if (family < 0 || family >= NPROTO)
1407                return -1;
1408
1409        net_families[family]=NULL;
1410        return 0;
1411}
1412
1413__initfunc(void proto_init(void))
1414{
1415        extern struct net_proto protocols[];    /* Network protocols */
1416        struct net_proto *pro;
1417
1418        /* Kick all configured protocols. */
1419        pro = protocols;
1420        while (pro->name != NULL) 
1421        {
1422                (*pro->init_func)(pro);
1423                pro++;
1424        }
1425        /* We're all done... */
1426}
1427
1428extern void sk_init(void);
1429
1430__initfunc(void sock_init(void))
1431{
1432        int i;
1433
1434        printk(KERN_INFO "Swansea University Computer Society NET3.039 for Linux 2.1\n");
1435
1436        /*
1437         *      Initialize all address (protocol) families. 
1438         */
1439         
1440        for (i = 0; i < NPROTO; i++) 
1441                net_families[i] = NULL;
1442
1443        /*
1444         *      Initialize sock SLAB cache.
1445         */
1446         
1447        sk_init();
1448        
1449        /*
1450         *      The netlink device handler may be needed early.
1451         */
1452
1453#ifdef CONFIG_NETLINK
1454        init_netlink();
1455#endif
1456
1457        /*
1458         *      Wan router layer. 
1459         */
1460
1461#ifdef CONFIG_WAN_ROUTER         
1462        wanrouter_init();
1463#endif
1464
1465        /*
1466         *      Attach the firewall module if configured
1467         */
1468         
1469#ifdef CONFIG_FIREWALL   
1470        fwchain_init();
1471#endif
1472
1473        /*
1474         *      Initialize the protocols module. 
1475         */
1476
1477        proto_init();
1478}
1479
1480int socket_get_info(char *buffer, char **start, off_t offset, int length)
1481{
1482        int len = sprintf(buffer, "sockets: used %d\n", sockets_in_use);
1483        if (offset >= len)
1484        {
1485                *start = buffer;
1486                return 0;
1487        }
1488        *start = buffer + offset;
1489        len -= offset;
1490        if (len > length)
1491                len = length;
1492        return len;
1493}
1494
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.