linux-old/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro, <bir7@leland.Stanford.Edu>
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *
  45 *
  46 *              This program is free software; you can redistribute it and/or
  47 *              modify it under the terms of the GNU General Public License
  48 *              as published by the Free Software Foundation; either version
  49 *              2 of the License, or (at your option) any later version.
  50 *
  51 *
  52 *      This module is effectively the top level interface to the BSD socket
  53 *      paradigm. 
  54 *
  55 */
  56
  57#include <linux/config.h>
  58#include <linux/signal.h>
  59#include <linux/errno.h>
  60#include <linux/sched.h>
  61#include <linux/mm.h>
  62#include <linux/smp.h>
  63#include <linux/smp_lock.h>
  64#include <linux/kernel.h>
  65#include <linux/major.h>
  66#include <linux/stat.h>
  67#include <linux/socket.h>
  68#include <linux/fcntl.h>
  69#include <linux/file.h>
  70#include <linux/net.h>
  71#include <linux/interrupt.h>
  72#include <linux/netdevice.h>
  73#include <linux/proc_fs.h>
  74#include <linux/firewall.h>
  75#include <linux/wanrouter.h>
  76#include <linux/init.h>
  77
  78#if defined(CONFIG_KERNELD) && defined(CONFIG_NET)
  79#include <linux/kerneld.h>
  80#endif
  81
  82#include <net/netlink.h>
  83
  84#include <asm/system.h>
  85#include <asm/uaccess.h>
  86
  87#include <linux/inet.h>
  88#include <linux/netdevice.h>
  89#include <net/ip.h>
  90#include <net/protocol.h>
  91#include <net/rarp.h>
  92#include <net/tcp.h>
  93#include <net/udp.h>
  94#include <linux/skbuff.h>
  95#include <net/sock.h>
  96#include <net/scm.h>
  97
  98
  99static long long sock_lseek(struct inode *inode, struct file *file,
 100                            long long offset, int whence);
 101static long sock_read(struct inode *inode, struct file *file,
 102                      char *buf, unsigned long size);
 103static long sock_write(struct inode *inode, struct file *file,
 104                       const char *buf, unsigned long size);
 105
 106static int sock_close(struct inode *inode, struct file *file);
 107static unsigned int sock_poll(struct file *file, poll_table *wait);
 108static int sock_ioctl(struct inode *inode, struct file *file,
 109                      unsigned int cmd, unsigned long arg);
 110static int sock_fasync(struct inode *inode, struct file *filp, int on);
 111
 112
 113/*
 114 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 115 *      in the operation structures but are done directly via the socketcall() multiplexor.
 116 */
 117
 118static struct file_operations socket_file_ops = {
 119        sock_lseek,
 120        sock_read,
 121        sock_write,
 122        NULL,                   /* readdir */
 123        sock_poll,
 124        sock_ioctl,
 125        NULL,                   /* mmap */
 126        NULL,                   /* no special open code... */
 127        sock_close,
 128        NULL,                   /* no fsync */
 129        sock_fasync
 130};
 131
 132/*
 133 *      The protocol list. Each protocol is registered in here.
 134 */
 135
 136struct net_proto_family *net_families[NPROTO];
 137
 138/*
 139 *      Statistics counters of the socket lists
 140 */
 141
 142static int sockets_in_use  = 0;
 143
 144/*
 145 *      Support routines. Move socket addresses back and forth across the kernel/user
 146 *      divide and look after the messy bits.
 147 */
 148
 149#define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
 150                                           16 for IP, 16 for IPX,
 151                                           24 for IPv6,
 152                                           about 80 for AX.25 
 153                                           must be at least one bigger than
 154                                           the AF_UNIX size (see net/unix/af_unix.c
 155                                           :unix_mkname()).  
 156                                         */
 157 
 158int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 159{
 160        if(ulen<0||ulen>MAX_SOCK_ADDR)
 161                return -EINVAL;
 162        if(ulen==0)
 163                return 0;
 164        if(copy_from_user(kaddr,uaddr,ulen))
 165                return -EFAULT;
 166        return 0;
 167}
 168
 169int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 170{
 171        int err;
 172        int len;
 173
 174        if((err=get_user(len, ulen)))
 175                return err;
 176        if(len>klen)
 177                len=klen;
 178        if(len<0 || len> MAX_SOCK_ADDR)
 179                return -EINVAL;
 180        if(len)
 181        {
 182                if(copy_to_user(uaddr,kaddr,len))
 183                        return -EFAULT;
 184        }
 185        /*
 186         *      "fromlen shall refer to the value before truncation.."
 187         *                      1003.1g
 188         */
 189        return __put_user(klen, ulen);
 190}
 191
 192/*
 193 *      Obtains the first available file descriptor and sets it up for use. 
 194 */
 195
 196static int get_fd(struct inode *inode)
 197{
 198        int fd;
 199
 200        /*
 201         *      Find a file descriptor suitable for return to the user. 
 202         */
 203
 204        fd = get_unused_fd();
 205        if (fd >= 0) {
 206                struct file *file = get_empty_filp();
 207
 208                if (!file) {
 209                        put_unused_fd(fd);
 210                        return -ENFILE;
 211                }
 212
 213                file->f_dentry = d_alloc_root(inode, NULL);
 214                if (!file->f_dentry) {
 215                        put_filp(file);
 216                        put_unused_fd(fd);
 217                        return -ENOMEM;
 218                }
 219
 220                /*
 221                 * The socket maintains a reference to the inode, so we
 222                 * have to increment the count.
 223                 */
 224                inode->i_count++;
 225
 226                current->files->fd[fd] = file;
 227                file->f_op = &socket_file_ops;
 228                file->f_mode = 3;
 229                file->f_flags = O_RDWR;
 230                file->f_pos = 0;
 231        }
 232        return fd;
 233}
 234
 235extern __inline__ struct socket *socki_lookup(struct inode *inode)
 236{
 237        return &inode->u.socket_i;
 238}
 239
 240/*
 241 *      Go from a file number to its socket slot.
 242 */
 243
 244extern __inline__ struct socket *sockfd_lookup(int fd, int *err)
 245{
 246        struct file *file;
 247        struct inode *inode;
 248
 249        if (!(file = fget(fd)))
 250        {
 251                *err = -EBADF;
 252                return NULL;
 253        }
 254
 255        inode = file->f_dentry->d_inode;
 256        if (!inode || !inode->i_sock || !socki_lookup(inode))
 257        {
 258                *err = -ENOTSOCK;
 259                fput(file);
 260                return NULL;
 261        }
 262
 263        return socki_lookup(inode);
 264}
 265
 266extern __inline__ void sockfd_put(struct socket *sock)
 267{
 268        fput(sock->file);
 269}
 270
 271/*
 272 *      Allocate a socket.
 273 */
 274
 275struct socket *sock_alloc(void)
 276{
 277        struct inode * inode;
 278        struct socket * sock;
 279
 280        inode = get_empty_inode();
 281        if (!inode)
 282                return NULL;
 283
 284        sock = socki_lookup(inode);
 285
 286        inode->i_mode = S_IFSOCK;
 287        inode->i_sock = 1;
 288        inode->i_uid = current->uid;
 289        inode->i_gid = current->gid;
 290
 291        sock->inode = inode;
 292        init_waitqueue(&sock->wait);
 293        sock->fasync_list = NULL;
 294        sock->state = SS_UNCONNECTED;
 295        sock->flags = 0;
 296        sock->ops = NULL;
 297        sock->sk = NULL;
 298        sock->file = NULL;
 299
 300        sockets_in_use++;
 301        return sock;
 302}
 303
 304void sock_release(struct socket *sock)
 305{
 306        int oldstate;
 307
 308        if ((oldstate = sock->state) != SS_UNCONNECTED)
 309                sock->state = SS_DISCONNECTING;
 310
 311        if (sock->ops) 
 312                sock->ops->release(sock, NULL);
 313
 314        --sockets_in_use;       /* Bookkeeping.. */
 315        sock->file=NULL;
 316        iput(sock->inode);
 317}
 318
 319int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 320{
 321        int err;
 322        struct scm_cookie scm;
 323
 324        err = scm_send(sock, msg, &scm);
 325        if (err < 0)
 326                return err;
 327
 328        err = sock->ops->sendmsg(sock, msg, size, &scm);
 329
 330        scm_destroy(&scm);
 331
 332        return err;
 333}
 334
 335int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 336{
 337        struct scm_cookie scm;
 338
 339        memset(&scm, 0, sizeof(scm));
 340
 341        size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
 342
 343        if (size < 0)
 344                return size;
 345
 346        scm_recv(sock, msg, &scm, flags);
 347
 348        return size;
 349}
 350
 351
 352/*
 353 *      Sockets are not seekable.
 354 */
 355
 356static long long sock_lseek(struct inode *inode, struct file *file,
 357                            long long offset, int whence)
 358{
 359        return -ESPIPE;
 360}
 361
 362/*
 363 *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
 364 *      area ubuf...ubuf+size-1 is writable before asking the protocol.
 365 */
 366
 367static long sock_read(struct inode *inode, struct file *file,
 368                      char *ubuf, unsigned long size)
 369{
 370        struct socket *sock;
 371        struct iovec iov;
 372        struct msghdr msg;
 373
 374        sock = socki_lookup(inode); 
 375  
 376        if (size==0)            /* Match SYS5 behaviour */
 377                return 0;
 378
 379        msg.msg_name=NULL;
 380        msg.msg_namelen=0;
 381        msg.msg_iov=&iov;
 382        msg.msg_iovlen=1;
 383        msg.msg_control=NULL;
 384        msg.msg_controllen=0;
 385        iov.iov_base=ubuf;
 386        iov.iov_len=size;
 387
 388        return sock_recvmsg(sock, &msg, size,
 389                            !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT);
 390}
 391
 392
 393/*
 394 *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1 is
 395 *      readable by the user process.
 396 */
 397
 398static long sock_write(struct inode *inode, struct file *file,
 399                       const char *ubuf, unsigned long size)
 400{
 401        struct socket *sock;
 402        struct msghdr msg;
 403        struct iovec iov;
 404        
 405        sock = socki_lookup(inode); 
 406
 407        if(size==0)             /* Match SYS5 behaviour */
 408                return 0;
 409
 410        msg.msg_name=NULL;
 411        msg.msg_namelen=0;
 412        msg.msg_iov=&iov;
 413        msg.msg_iovlen=1;
 414        msg.msg_control=NULL;
 415        msg.msg_controllen=0;
 416        msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 417        iov.iov_base=(void *)ubuf;
 418        iov.iov_len=size;
 419        
 420        return sock_sendmsg(sock, &msg, size);
 421}
 422
 423int sock_readv_writev(int type, struct inode * inode, struct file * file,
 424                      const struct iovec * iov, long count, long size)
 425{
 426        struct msghdr msg;
 427        struct socket *sock;
 428
 429        sock = socki_lookup(inode);
 430
 431        msg.msg_name = NULL;
 432        msg.msg_namelen = 0;
 433        msg.msg_control = NULL;
 434        msg.msg_controllen = 0;
 435        msg.msg_iov = (struct iovec *) iov;
 436        msg.msg_iovlen = count;
 437        msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 438
 439        /* read() does a VERIFY_WRITE */
 440        if (type == VERIFY_WRITE)
 441                return sock_recvmsg(sock, &msg, size, msg.msg_flags);
 442        return sock_sendmsg(sock, &msg, size);
 443}
 444
 445
 446/*
 447 *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
 448 *      with it - that's up to the protocol still.
 449 */
 450
 451int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 452           unsigned long arg)
 453{
 454        struct socket *sock = socki_lookup(inode);
 455        return sock->ops->ioctl(sock, cmd, arg);
 456}
 457
 458
 459static unsigned int sock_poll(struct file *file, poll_table * wait)
 460{
 461        struct socket *sock;
 462
 463        sock = socki_lookup(file->f_dentry->d_inode);
 464
 465        /*
 466         *      We can't return errors to poll, so it's either yes or no. 
 467         */
 468
 469        return sock->ops->poll(sock, wait);
 470}
 471
 472
 473int sock_close(struct inode *inode, struct file *filp)
 474{
 475        /*
 476         *      It was possible the inode is NULL we were 
 477         *      closing an unfinished socket. 
 478         */
 479
 480        if (!inode)
 481        {
 482                printk(KERN_DEBUG "sock_close: NULL inode\n");
 483                return 0;
 484        }
 485        sock_fasync(inode, filp, 0);
 486        sock_release(socki_lookup(inode));
 487        return 0;
 488}
 489
 490/*
 491 *      Update the socket async list
 492 */
 493 
 494static int sock_fasync(struct inode *inode, struct file *filp, int on)
 495{
 496        struct fasync_struct *fa, *fna=NULL, **prev;
 497        struct socket *sock;
 498        unsigned long flags;
 499        
 500        if (on)
 501        {
 502                fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 503                if(fna==NULL)
 504                        return -ENOMEM;
 505        }
 506
 507        sock = socki_lookup(inode);
 508        
 509        prev=&(sock->fasync_list);
 510        
 511        save_flags(flags);
 512        cli();
 513        
 514        for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
 515                if (fa->fa_file==filp)
 516                        break;
 517        
 518        if(on)
 519        {
 520                if(fa!=NULL)
 521                {
 522                        kfree_s(fna,sizeof(struct fasync_struct));
 523                        restore_flags(flags);
 524                        return 0;
 525                }
 526                fna->fa_file=filp;
 527                fna->magic=FASYNC_MAGIC;
 528                fna->fa_next=sock->fasync_list;
 529                sock->fasync_list=fna;
 530        }
 531        else
 532        {
 533                if (fa!=NULL)
 534                {
 535                        *prev=fa->fa_next;
 536                        kfree_s(fa,sizeof(struct fasync_struct));
 537                }
 538        }
 539        restore_flags(flags);
 540        return 0;
 541}
 542
 543int sock_wake_async(struct socket *sock, int how)
 544{
 545        if (!sock || !sock->fasync_list)
 546                return -1;
 547        switch (how)
 548        {
 549                case 0:
 550                        kill_fasync(sock->fasync_list, SIGIO);
 551                        break;
 552                case 1:
 553                        if (!(sock->flags & SO_WAITDATA))
 554                                kill_fasync(sock->fasync_list, SIGIO);
 555                        break;
 556                case 2:
 557                        if (sock->flags & SO_NOSPACE)
 558                        {
 559                                kill_fasync(sock->fasync_list, SIGIO);
 560                                sock->flags &= ~SO_NOSPACE;
 561                        }
 562                        break;
 563        }
 564        return 0;
 565}
 566
 567
 568int sock_create(int family, int type, int protocol, struct socket **res)
 569{
 570        int i;
 571        struct socket *sock;
 572
 573        /*
 574         *      Check protocol is in range
 575         */
 576        if(family<0||family>=NPROTO)
 577                return -EINVAL;
 578                
 579#if defined(CONFIG_KERNELD) && defined(CONFIG_NET)
 580        /* Attempt to load a protocol module if the find failed. 
 581         * 
 582         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
 583         * requested real, full-featured networking support upon configuration.
 584         * Otherwise module support will break!
 585         */
 586        if (net_families[family]==NULL)
 587        {
 588                char module_name[30];
 589                sprintf(module_name,"net-pf-%d",family);
 590                request_module(module_name);
 591        }
 592#endif
 593
 594        if (net_families[family]==NULL)
 595                return -EINVAL;
 596
 597/*
 598 *      Check that this is a type that we know how to manipulate and
 599 *      the protocol makes sense here. The family can still reject the
 600 *      protocol later.
 601 */
 602  
 603        if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
 604             type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM &&
 605#ifdef CONFIG_XTP
 606                type != SOCK_WEB  &&
 607#endif
 608             type != SOCK_PACKET) || protocol < 0)
 609                        return -EINVAL;
 610
 611/*
 612 *      Allocate the socket and allow the family to set things up. if
 613 *      the protocol is 0, the family is instructed to select an appropriate
 614 *      default.
 615 */
 616
 617        if (!(sock = sock_alloc())) 
 618        {
 619                printk(KERN_WARNING "socket: no more sockets\n");
 620                return -ENFILE;         /* Not exactly a match, but its the
 621                                           closest posix thing */
 622        }
 623
 624        sock->type   = type;
 625
 626        if ((i = net_families[family]->create(sock, protocol)) < 0) 
 627        {
 628                sock_release(sock);
 629                return i;
 630        }
 631
 632        *res = sock;
 633        return 0;
 634}
 635
 636asmlinkage int sys_socket(int family, int type, int protocol)
 637{
 638        int retval;
 639        struct socket *sock;
 640
 641        lock_kernel();
 642
 643        retval = sock_create(family, type, protocol, &sock);
 644        if (retval < 0)
 645                goto out;
 646
 647        retval = get_fd(sock->inode);
 648        if (retval < 0) {
 649                sock_release(sock);
 650                goto out;
 651        }
 652
 653        sock->file = current->files->fd[retval];
 654out:
 655        unlock_kernel();
 656        return retval;
 657}
 658
 659/*
 660 *      Create a pair of connected sockets.
 661 */
 662
 663asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2])
 664{
 665        int fd1, fd2, i;
 666        struct socket *sock1=NULL, *sock2=NULL;
 667        int err;
 668
 669        lock_kernel();
 670
 671        /*
 672         * Obtain the first socket and check if the underlying protocol
 673         * supports the socketpair call.
 674         */
 675
 676        if ((fd1 = sys_socket(family, type, protocol)) < 0) {
 677                err = fd1;
 678                goto out;
 679        }
 680
 681        sock1 = sockfd_lookup(fd1, &err);
 682        if (!sock1)
 683                goto out;
 684        /*
 685         *      Now grab another socket and try to connect the two together. 
 686         */
 687        err = -EINVAL;
 688        if ((fd2 = sys_socket(family, type, protocol)) < 0) 
 689        {
 690                sys_close(fd1);
 691                goto out;
 692        }
 693
 694        sock2 = sockfd_lookup(fd2,&err);
 695        if (!sock2)
 696                goto out;
 697        if ((i = sock1->ops->socketpair(sock1, sock2)) < 0) 
 698        {
 699                sys_close(fd1);
 700                sys_close(fd2);
 701                err = i;
 702        }
 703        else
 704        {
 705                err = put_user(fd1, &usockvec[0]); 
 706                if (!err) 
 707                        err = put_user(fd2, &usockvec[1]);
 708                if (err) {
 709                        sys_close(fd1);
 710                        sys_close(fd2);
 711                }
 712        }
 713out:
 714        if(sock1)
 715                sockfd_put(sock1);
 716        if(sock2)
 717                sockfd_put(sock2);
 718        unlock_kernel();
 719        return err;
 720}
 721
 722
 723/*
 724 *      Bind a name to a socket. Nothing much to do here since it's
 725 *      the protocol's responsibility to handle the local address.
 726 *
 727 *      We move the socket address to kernel space before we call
 728 *      the protocol layer (having also checked the address is ok).
 729 */
 730 
 731asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
 732{
 733        struct socket *sock;
 734        char address[MAX_SOCK_ADDR];
 735        int err;
 736
 737        lock_kernel();
 738        if((sock = sockfd_lookup(fd,&err))!=NULL)
 739        {
 740                if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
 741                        err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
 742                sockfd_put(sock);
 743        }                       
 744        unlock_kernel();
 745        return err;
 746}
 747
 748
 749/*
 750 *      Perform a listen. Basically, we allow the protocol to do anything
 751 *      necessary for a listen, and if that works, we mark the socket as
 752 *      ready for listening.
 753 */
 754
 755asmlinkage int sys_listen(int fd, int backlog)
 756{
 757        struct socket *sock;
 758        int err;
 759        
 760        lock_kernel();
 761        if((sock = sockfd_lookup(fd, &err))!=NULL)
 762        {
 763                err=sock->ops->listen(sock, backlog);
 764                sockfd_put(sock);
 765        }
 766        unlock_kernel();
 767        return err;
 768}
 769
 770
 771/*
 772 *      For accept, we attempt to create a new socket, set up the link
 773 *      with the client, wake up the client, then return the new
 774 *      connected fd. We collect the address of the connector in kernel
 775 *      space and move it to user at the very end. This is unclean because
 776 *      we open the socket then return an error.
 777 *
 778 *      1003.1g adds the ability to recvmsg() to query connection pending
 779 *      status to recvmsg. We need to add that support in a way thats
 780 *      clean when we restucture accept also.
 781 */
 782
 783asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
 784{
 785        struct inode *inode;
 786        struct socket *sock, *newsock;
 787        int err;
 788        char address[MAX_SOCK_ADDR];
 789        int len;
 790
 791        lock_kernel();
 792restart:
 793        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 794        {
 795                if (!(newsock = sock_alloc())) 
 796                {
 797                        err=-EMFILE;
 798                        goto out;
 799                }
 800
 801                inode = newsock->inode;
 802                newsock->type = sock->type;
 803
 804                if ((err = sock->ops->dup(newsock, sock)) < 0) 
 805                {
 806                        sock_release(newsock);
 807                        goto out;
 808                }
 809
 810                err = newsock->ops->accept(sock, newsock, current->files->fd[fd]->f_flags);
 811
 812                if (err < 0)
 813                {
 814                        sock_release(newsock);
 815                        goto out;
 816                }
 817                newsock = socki_lookup(inode);
 818
 819                if ((err = get_fd(inode)) < 0) 
 820                {
 821                        sock_release(newsock);
 822                        err=-EINVAL;
 823                        goto out;
 824                }
 825
 826                newsock->file = current->files->fd[err];
 827        
 828                if (upeer_sockaddr)
 829                {
 830                        /* Handle the race where the accept works and we
 831                           then getname after it has closed again */
 832                        if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1)<0)
 833                        {
 834                                sys_close(err);
 835                                goto restart;
 836                        }
 837                        move_addr_to_user(address,len, upeer_sockaddr, upeer_addrlen);
 838                }
 839out:
 840                sockfd_put(sock);               
 841        }
 842        unlock_kernel();
 843        return err;
 844}
 845
 846
 847/*
 848 *      Attempt to connect to a socket with the server address.  The address
 849 *      is in user space so we verify it is OK and move it to kernel space.
 850 *
 851 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
 852 *      break bindings
 853 *
 854 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
 855 *      other SEQPACKET protocols that take time to connect() as it doesn't
 856 *      include the -EINPROGRESS status for such sockets.
 857 */
 858 
 859asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
 860{
 861        struct socket *sock;
 862        char address[MAX_SOCK_ADDR];
 863        int err;
 864
 865        lock_kernel();
 866        if ((sock = sockfd_lookup(fd,&err))!=NULL)
 867        {
 868                if((err=move_addr_to_kernel(uservaddr,addrlen,address))>=0)
 869                        err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
 870                             current->files->fd[fd]->f_flags);
 871                sockfd_put(sock);
 872        }
 873        unlock_kernel();
 874        return err;
 875}
 876
 877/*
 878 *      Get the local address ('name') of a socket object. Move the obtained
 879 *      name to user space.
 880 */
 881
 882asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 883{
 884        struct socket *sock;
 885        char address[MAX_SOCK_ADDR];
 886        int len;
 887        int err;
 888        
 889        lock_kernel();
 890        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 891        {
 892                if((err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 0))==0)
 893                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
 894                sockfd_put(sock);
 895        }
 896        unlock_kernel();
 897        return err;
 898}
 899
 900/*
 901 *      Get the remote address ('name') of a socket object. Move the obtained
 902 *      name to user space.
 903 */
 904 
 905asmlinkage int sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 906{
 907        struct socket *sock;
 908        char address[MAX_SOCK_ADDR];
 909        int len;
 910        int err;
 911
 912        lock_kernel();
 913        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 914        {
 915                if((err=sock->ops->getname(sock, (struct sockaddr *)address, &len, 1))==0)
 916                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
 917                sockfd_put(sock);
 918        }
 919        unlock_kernel();
 920        return err;
 921}
 922
 923/*
 924 *      Send a datagram down a socket. The datagram as with write() is
 925 *      in user space. We check it can be read.
 926 */
 927
 928asmlinkage int sys_send(int fd, void * buff, size_t len, unsigned flags)
 929{
 930        struct socket *sock;
 931        int err;
 932        struct msghdr msg;
 933        struct iovec iov;
 934
 935        lock_kernel();
 936        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 937        {
 938                if(len>=0)
 939                {
 940                        iov.iov_base=buff;
 941                        iov.iov_len=len;
 942                        msg.msg_name=NULL;
 943                        msg.msg_namelen=0;
 944                        msg.msg_iov=&iov;
 945                        msg.msg_iovlen=1;
 946                        msg.msg_control=NULL;
 947                        msg.msg_controllen=0;
 948                        if (current->files->fd[fd]->f_flags & O_NONBLOCK)
 949                                flags |= MSG_DONTWAIT;
 950                        msg.msg_flags=flags;
 951                        err=sock_sendmsg(sock, &msg, len);
 952                }
 953                else
 954                        err=-EINVAL;
 955                sockfd_put(sock);
 956        }
 957        unlock_kernel();
 958        return err;
 959}
 960
 961/*
 962 *      Send a datagram to a given address. We move the address into kernel
 963 *      space and check the user space data area is readable before invoking
 964 *      the protocol.
 965 */
 966
 967asmlinkage int sys_sendto(int fd, void * buff, size_t len, unsigned flags,
 968           struct sockaddr *addr, int addr_len)
 969{
 970        struct socket *sock;
 971        char address[MAX_SOCK_ADDR];
 972        int err;
 973        struct msghdr msg;
 974        struct iovec iov;
 975        
 976        lock_kernel();
 977        if ((sock = sockfd_lookup(fd,&err))!=NULL)
 978        {
 979                iov.iov_base=buff;
 980                iov.iov_len=len;
 981                msg.msg_name=NULL;
 982                msg.msg_iov=&iov;
 983                msg.msg_iovlen=1;
 984                msg.msg_control=NULL;
 985                msg.msg_controllen=0;
 986                msg.msg_namelen=addr_len;
 987                if(addr)
 988                {
 989                        err=move_addr_to_kernel(addr,addr_len,address);
 990                        if (err < 0)
 991                                goto bad;
 992                        msg.msg_name=address;
 993                }
 994                if (current->files->fd[fd]->f_flags & O_NONBLOCK)
 995                        flags |= MSG_DONTWAIT;
 996                msg.msg_flags=flags;
 997                err=sock_sendmsg(sock, &msg, len);
 998bad:            
 999                sockfd_put(sock);
1000        }
1001        unlock_kernel();
1002        return err;
1003}
1004
1005
1006
1007/*
1008 *      Receive a frame from the socket and optionally record the address of the 
1009 *      sender. We verify the buffers are writable and if needed move the
1010 *      sender address from kernel to user space.
1011 */
1012
1013asmlinkage int sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
1014             struct sockaddr *addr, int *addr_len)
1015{
1016        struct socket *sock;
1017        struct iovec iov;
1018        struct msghdr msg;
1019        char address[MAX_SOCK_ADDR];
1020        int err,err2;
1021
1022        lock_kernel();
1023        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1024        {  
1025                msg.msg_control=NULL;
1026                msg.msg_controllen=0;
1027                msg.msg_iovlen=1;
1028                msg.msg_iov=&iov;
1029                iov.iov_len=size;
1030                iov.iov_base=ubuf;
1031                msg.msg_name=address;
1032                msg.msg_namelen=MAX_SOCK_ADDR;
1033                err=sock_recvmsg(sock, &msg, size,
1034                          (current->files->fd[fd]->f_flags & O_NONBLOCK) ? (flags | MSG_DONTWAIT) : flags);
1035                if(err>=0 && addr!=NULL)
1036                {
1037                        err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1038                        if(err2<0)
1039                                err=err2;
1040                }
1041                sockfd_put(sock);                       
1042        }               
1043        unlock_kernel();
1044        return err;
1045}
1046
1047/*
1048 *      Receive a datagram from a socket. 
1049 */
1050
1051asmlinkage int sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1052{
1053        return sys_recvfrom(fd,ubuf,size,flags, NULL, NULL);
1054}
1055
1056/*
1057 *      Set a socket option. Because we don't know the option lengths we have
1058 *      to pass the user mode parameter for the protocols to sort out.
1059 */
1060 
1061asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1062{
1063        int err;
1064        struct socket *sock;
1065        
1066        lock_kernel();
1067        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1068        {
1069                if (level == SOL_SOCKET)
1070                        err=sock_setsockopt(sock,level,optname,optval,optlen);
1071                else
1072                        err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1073                sockfd_put(sock);
1074        }
1075        unlock_kernel();
1076        return err;
1077}
1078
1079/*
1080 *      Get a socket option. Because we don't know the option lengths we have
1081 *      to pass a user mode parameter for the protocols to sort out.
1082 */
1083
1084asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1085{
1086        int err;
1087        struct socket *sock;
1088
1089        lock_kernel();
1090        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1091        {
1092                if (level == SOL_SOCKET)
1093                        err=sock_getsockopt(sock,level,optname,optval,optlen);
1094                else
1095                        err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1096                sockfd_put(sock);
1097        }
1098        unlock_kernel();
1099        return err;
1100}
1101
1102
1103/*
1104 *      Shutdown a socket.
1105 */
1106 
1107asmlinkage int sys_shutdown(int fd, int how)
1108{
1109        int err;
1110        struct socket *sock;
1111
1112        lock_kernel();
1113        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1114        {
1115                err=sock->ops->shutdown(sock, how);
1116                sockfd_put(sock);
1117        }
1118        unlock_kernel();
1119        return err;
1120}
1121
1122/*
1123 *      BSD sendmsg interface
1124 */
1125 
1126asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1127{
1128        struct socket *sock;
1129        char address[MAX_SOCK_ADDR];
1130        struct iovec iov[UIO_FASTIOV];
1131        unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1132        struct msghdr msg_sys;
1133        int err= -EINVAL;
1134        int total_len;
1135        unsigned char *ctl_buf = ctl;
1136        
1137        lock_kernel();
1138
1139        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1140        {
1141                err=-EFAULT;
1142                goto out; 
1143        }
1144        /* do not move before msg_sys is valid */
1145        if (msg_sys.msg_iovlen>UIO_MAXIOV)
1146                goto out;
1147        /* This will also move the address data into kernel space */
1148        err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1149        if (err < 0)
1150                goto out;
1151        total_len=err;
1152
1153        if (msg_sys.msg_controllen) 
1154        {
1155                /* XXX We just limit the buffer and assume that the 
1156                 * skbuff accounting stops it from going too far.
1157                 * I hope this is correct.
1158                 */
1159                if (msg_sys.msg_controllen > sizeof(ctl) &&
1160                        msg_sys.msg_controllen <= 256)
1161                {
1162                        ctl_buf = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
1163                        if (ctl_buf == NULL) 
1164                        {
1165                                err = -ENOBUFS;
1166                                goto failed2;
1167                        }
1168                }
1169                if (copy_from_user(ctl_buf, msg_sys.msg_control, 
1170                                            msg_sys.msg_controllen)) {
1171                        err = -EFAULT;
1172                        goto failed;
1173                }
1174                msg_sys.msg_control = ctl_buf;
1175        }
1176        msg_sys.msg_flags = flags;
1177        if (current->files->fd[fd]->f_flags & O_NONBLOCK)
1178                msg_sys.msg_flags |= MSG_DONTWAIT;
1179
1180        if ((sock = sockfd_lookup(fd,&err))!=NULL)
1181        {
1182                err = sock_sendmsg(sock, &msg_sys, total_len);
1183                sockfd_put(sock);
1184        }
1185
1186failed:
1187        if (ctl_buf != ctl)
1188                kfree_s(ctl_buf, msg_sys.msg_controllen);
1189failed2:
1190        if (msg_sys.msg_iov != iov)
1191                kfree(msg_sys.msg_iov);
1192out:
1193        unlock_kernel();
1194        return err;
1195}
1196
1197/*
1198 *      BSD recvmsg interface
1199 */
1200 
1201asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1202{
1203        struct socket *sock;
1204        struct iovec iovstack[UIO_FASTIOV];
1205        struct iovec *iov=iovstack;
1206        struct msghdr msg_sys;
1207        unsigned long cmsg_ptr;
1208        int err;
1209        int total_len;
1210        int len = 0;
1211
1212        /* kernel mode address */
1213        char addr[MAX_SOCK_ADDR];
1214
1215        /* user mode address pointers */
1216        struct sockaddr *uaddr;
1217        int *uaddr_len;
1218        
1219        lock_kernel();
1220        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1221        {
1222                err=-EFAULT;
1223                goto out;
1224        }
1225        if (msg_sys.msg_iovlen>UIO_MAXIOV)
1226        {
1227                err=-EINVAL;
1228                goto out;
1229        }
1230        
1231        /*
1232         *      Save the user-mode address (verify_iovec will change the
1233         *      kernel msghdr to use the kernel address space)
1234         */
1235         
1236        uaddr = msg_sys.msg_name;
1237        uaddr_len = &msg->msg_namelen;
1238        err=verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1239        if (err<0)
1240                goto out;
1241
1242        total_len=err;
1243
1244        cmsg_ptr = (unsigned long)msg_sys.msg_control;
1245        msg_sys.msg_flags = 0;
1246        
1247        if (current->files->fd[fd]->f_flags&O_NONBLOCK)
1248                flags |= MSG_DONTWAIT;
1249
1250        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1251        {
1252                err=sock_recvmsg(sock, &msg_sys, total_len, flags);
1253                if(err>=0)
1254                        len=err;
1255                sockfd_put(sock);
1256        }
1257        if (msg_sys.msg_iov != iov)
1258                kfree(msg_sys.msg_iov);
1259
1260        if (uaddr != NULL && err>=0)
1261                err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1262        if (err>=0) {
1263                err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1264                if (!err)
1265                        err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
1266                                                         &msg->msg_controllen);
1267        }
1268out:
1269        unlock_kernel();
1270        if(err<0)
1271                return err;
1272        return len;
1273}
1274
1275
1276/*
1277 *      Perform a file control on a socket file descriptor.
1278 *
1279 *      FIXME: does this need an fd lock ?
1280 */
1281
1282int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1283{
1284        struct socket *sock;
1285
1286        sock = socki_lookup (filp->f_dentry->d_inode);
1287        if (sock && sock->ops)
1288                return sock->ops->fcntl(sock, cmd, arg);
1289        return(-EINVAL);
1290}
1291
1292/* Argument list sizes for sys_socketcall */
1293#define AL(x) ((x) * sizeof(unsigned long))
1294static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1295                                                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1296                                                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1297#undef AL
1298
1299/*
1300 *      System call vectors. 
1301 *
1302 *      Argument checking cleaned up. Saved 20% in size.
1303 *  This function doesn't need to set the kernel lock because
1304 *  it is set by the callees. 
1305 */
1306
1307asmlinkage int sys_socketcall(int call, unsigned long *args)
1308{
1309        unsigned long a[6];
1310        unsigned long a0,a1;
1311        int err;
1312
1313        if(call<1||call>SYS_RECVMSG)
1314                return -EINVAL;
1315
1316        /* copy_from_user should be SMP safe. */
1317        if (copy_from_user(a, args, nargs[call]))
1318                return -EFAULT;
1319                
1320        a0=a[0];
1321        a1=a[1];
1322        
1323        switch(call) 
1324        {
1325                case SYS_SOCKET:
1326                        err = sys_socket(a0,a1,a[2]);
1327                        break;
1328                case SYS_BIND:
1329                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1330                        break;
1331                case SYS_CONNECT:
1332                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1333                        break;
1334                case SYS_LISTEN:
1335                        err = sys_listen(a0,a1);
1336                        break;
1337                case SYS_ACCEPT:
1338                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1339                        break;
1340                case SYS_GETSOCKNAME:
1341                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1342                        break;
1343                case SYS_GETPEERNAME:
1344                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1345                        break;
1346                case SYS_SOCKETPAIR:
1347                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1348                        break;
1349                case SYS_SEND:
1350                        err = sys_send(a0, (void *)a1, a[2], a[3]);
1351                        break;
1352                case SYS_SENDTO:
1353                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
1354                                         (struct sockaddr *)a[4], a[5]);
1355                        break;
1356                case SYS_RECV:
1357                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
1358                        break;
1359                case SYS_RECVFROM:
1360                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1361                                           (struct sockaddr *)a[4], (int *)a[5]);
1362                        break;
1363                case SYS_SHUTDOWN:
1364                        err = sys_shutdown(a0,a1);
1365                        break;
1366                case SYS_SETSOCKOPT:
1367                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1368                        break;
1369                case SYS_GETSOCKOPT:
1370                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1371                        break;
1372                case SYS_SENDMSG:
1373                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1374                        break;
1375                case SYS_RECVMSG:
1376                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1377                        break;
1378                default:
1379                        err = -EINVAL;
1380                        break;
1381        }
1382        return err;
1383}
1384
1385/*
1386 *      This function is called by a protocol handler that wants to
1387 *      advertise its address family, and have it linked into the
1388 *      SOCKET module.
1389 */
1390 
1391int sock_register(struct net_proto_family *ops)
1392{
1393        if (ops->family < 0 || ops->family >= NPROTO)
1394                return -1;
1395
1396        net_families[ops->family]=ops;
1397        return 0;
1398}
1399
1400/*
1401 *      This function is called by a protocol handler that wants to
1402 *      remove its address family, and have it unlinked from the
1403 *      SOCKET module.
1404 */
1405 
1406int sock_unregister(int family)
1407{
1408        if (family < 0 || family >= NPROTO)
1409                return -1;
1410
1411        net_families[family]=NULL;
1412        return 0;
1413}
1414
1415__initfunc(void proto_init(void))
1416{
1417        extern struct net_proto protocols[];    /* Network protocols */
1418        struct net_proto *pro;
1419
1420        /* Kick all configured protocols. */
1421        pro = protocols;
1422        while (pro->name != NULL) 
1423        {
1424                (*pro->init_func)(pro);
1425                pro++;
1426        }
1427        /* We're all done... */
1428}
1429
1430extern void sk_init(void);
1431
1432__initfunc(void sock_init(void))
1433{
1434        int i;
1435
1436        printk(KERN_INFO "Swansea University Computer Society NET3.039 for Linux 2.1\n");
1437
1438        /*
1439         *      Initialize all address (protocol) families. 
1440         */
1441         
1442        for (i = 0; i < NPROTO; i++) 
1443                net_families[i] = NULL;
1444
1445        /*
1446         *      Initialize sock SLAB cache.
1447         */
1448         
1449        sk_init();
1450        
1451        /*
1452         *      The netlink device handler may be needed early.
1453         */
1454
1455#ifdef CONFIG_NETLINK
1456        init_netlink();
1457#endif
1458
1459        /*
1460         *      Wan router layer. 
1461         */
1462
1463#ifdef CONFIG_WAN_ROUTER         
1464        wanrouter_init();
1465#endif
1466
1467        /*
1468         *      Attach the firewall module if configured
1469         */
1470         
1471#ifdef CONFIG_FIREWALL   
1472        fwchain_init();
1473#endif
1474
1475        /*
1476         *      Initialize the protocols module. 
1477         */
1478
1479        proto_init();
1480}
1481
1482int socket_get_info(char *buffer, char **start, off_t offset, int length)
1483{
1484        int len = sprintf(buffer, "sockets: used %d\n", sockets_in_use);
1485        if (offset >= len)
1486        {
1487                *start = buffer;
1488                return 0;
1489        }
1490        *start = buffer + offset;
1491        len -= offset;
1492        if (len > length)
1493                len = length;
1494        return len;
1495}
1496
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.