linux-old/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro, <bir7@leland.Stanford.Edu>
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45 *
  46 *
  47 *              This program is free software; you can redistribute it and/or
  48 *              modify it under the terms of the GNU General Public License
  49 *              as published by the Free Software Foundation; either version
  50 *              2 of the License, or (at your option) any later version.
  51 *
  52 *
  53 *      This module is effectively the top level interface to the BSD socket
  54 *      paradigm. 
  55 *
  56 */
  57
  58#include <linux/config.h>
  59#include <linux/mm.h>
  60#include <linux/smp_lock.h>
  61#include <linux/socket.h>
  62#include <linux/file.h>
  63#include <linux/net.h>
  64#include <linux/interrupt.h>
  65#include <linux/netdevice.h>
  66#include <linux/proc_fs.h>
  67#include <linux/firewall.h>
  68#include <linux/wanrouter.h>
  69#include <linux/init.h>
  70#include <linux/poll.h>
  71
  72#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
  73#include <linux/kmod.h>
  74#endif
  75
  76#include <asm/uaccess.h>
  77
  78#include <linux/inet.h>
  79#include <net/ip.h>
  80#include <net/sock.h>
  81#include <net/rarp.h>
  82#include <net/tcp.h>
  83#include <net/udp.h>
  84#include <net/scm.h>
  85
  86static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
  87static long long sock_lseek(struct file *file, long long offset, int whence);
  88static ssize_t sock_read(struct file *file, char *buf,
  89                         size_t size, loff_t *ppos);
  90static ssize_t sock_write(struct file *file, const char *buf,
  91                          size_t size, loff_t *ppos);
  92
  93static int sock_close(struct inode *inode, struct file *file);
  94static unsigned int sock_poll(struct file *file,
  95                              struct poll_table_struct *wait);
  96static int sock_ioctl(struct inode *inode, struct file *file,
  97                      unsigned int cmd, unsigned long arg);
  98static int sock_fasync(int fd, struct file *filp, int on);
  99
 100
 101/*
 102 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 103 *      in the operation structures but are done directly via the socketcall() multiplexor.
 104 */
 105
 106static struct file_operations socket_file_ops = {
 107        sock_lseek,
 108        sock_read,
 109        sock_write,
 110        NULL,                   /* readdir */
 111        sock_poll,
 112        sock_ioctl,
 113        NULL,                   /* mmap */
 114        sock_no_open,           /* special open code to disallow open via /proc */
 115        NULL,                   /* flush */
 116        sock_close,
 117        NULL,                   /* no fsync */
 118        sock_fasync
 119};
 120
 121/*
 122 *      The protocol list. Each protocol is registered in here.
 123 */
 124
 125struct net_proto_family *net_families[NPROTO];
 126
 127/*
 128 *      Statistics counters of the socket lists
 129 */
 130
 131static int sockets_in_use  = 0;
 132
 133/*
 134 *      Support routines. Move socket addresses back and forth across the kernel/user
 135 *      divide and look after the messy bits.
 136 */
 137
 138#define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
 139                                           16 for IP, 16 for IPX,
 140                                           24 for IPv6,
 141                                           about 80 for AX.25 
 142                                           must be at least one bigger than
 143                                           the AF_UNIX size (see net/unix/af_unix.c
 144                                           :unix_mkname()).  
 145                                         */
 146
 147int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 148{
 149        if(ulen<0||ulen>MAX_SOCK_ADDR)
 150                return -EINVAL;
 151        if(ulen==0)
 152                return 0;
 153        if(copy_from_user(kaddr,uaddr,ulen))
 154                return -EFAULT;
 155        return 0;
 156}
 157
 158int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 159{
 160        int err;
 161        int len;
 162
 163        if((err=get_user(len, ulen)))
 164                return err;
 165        if(len>klen)
 166                len=klen;
 167        if(len<0 || len> MAX_SOCK_ADDR)
 168                return -EINVAL;
 169        if(len)
 170        {
 171                if(copy_to_user(uaddr,kaddr,len))
 172                        return -EFAULT;
 173        }
 174        /*
 175         *      "fromlen shall refer to the value before truncation.."
 176         *                      1003.1g
 177         */
 178        return __put_user(klen, ulen);
 179}
 180
 181/*
 182 *      Obtains the first available file descriptor and sets it up for use. 
 183 */
 184
 185static int get_fd(struct inode *inode)
 186{
 187        int fd;
 188
 189        /*
 190         *      Find a file descriptor suitable for return to the user. 
 191         */
 192
 193        fd = get_unused_fd();
 194        if (fd >= 0) {
 195                struct file *file = get_empty_filp();
 196
 197                if (!file) {
 198                        put_unused_fd(fd);
 199                        return -ENFILE;
 200                }
 201
 202                file->f_dentry = d_alloc_root(inode, NULL);
 203                if (!file->f_dentry) {
 204                        put_filp(file);
 205                        put_unused_fd(fd);
 206                        return -ENOMEM;
 207                }
 208
 209                /*
 210                 * The socket maintains a reference to the inode, so we
 211                 * have to increment the count.
 212                 */
 213                inode->i_count++;
 214
 215                fd_install(fd, file);
 216                file->f_op = &socket_file_ops;
 217                file->f_mode = 3;
 218                file->f_flags = O_RDWR;
 219                file->f_pos = 0;
 220        }
 221        return fd;
 222}
 223
 224extern __inline__ struct socket *socki_lookup(struct inode *inode)
 225{
 226        return &inode->u.socket_i;
 227}
 228
 229/*
 230 *      Go from a file number to its socket slot.
 231 */
 232
 233extern struct socket *sockfd_lookup(int fd, int *err)
 234{
 235        struct file *file;
 236        struct inode *inode;
 237        struct socket *sock;
 238
 239        if (!(file = fget(fd)))
 240        {
 241                *err = -EBADF;
 242                return NULL;
 243        }
 244
 245        inode = file->f_dentry->d_inode;
 246        if (!inode || !inode->i_sock || !(sock = socki_lookup(inode)))
 247        {
 248                *err = -ENOTSOCK;
 249                fput(file);
 250                return NULL;
 251        }
 252
 253        if (sock->file != file) {
 254                printk(KERN_ERR "socki_lookup: socket file changed!\n");
 255                sock->file = file;
 256        }
 257        return sock;
 258}
 259
 260extern __inline__ void sockfd_put(struct socket *sock)
 261{
 262        fput(sock->file);
 263}
 264
 265/*
 266 *      Allocate a socket.
 267 */
 268
 269struct socket *sock_alloc(void)
 270{
 271        struct inode * inode;
 272        struct socket * sock;
 273
 274        inode = get_empty_inode();
 275        if (!inode)
 276                return NULL;
 277
 278        sock = socki_lookup(inode);
 279
 280        inode->i_mode = S_IFSOCK|S_IRWXUGO;
 281        inode->i_sock = 1;
 282        inode->i_uid = current->uid;
 283        inode->i_gid = current->gid;
 284
 285        sock->inode = inode;
 286        init_waitqueue(&sock->wait);
 287        sock->fasync_list = NULL;
 288        sock->state = SS_UNCONNECTED;
 289        sock->flags = 0;
 290        sock->ops = NULL;
 291        sock->sk = NULL;
 292        sock->file = NULL;
 293
 294        sockets_in_use++;
 295        return sock;
 296}
 297
 298/*
 299 *      In theory you can't get an open on this inode, but /proc provides
 300 *      a back door. Remember to keep it shut otherwise you'll let the
 301 *      creepy crawlies in.
 302 */
 303  
 304static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 305{
 306        return -ENXIO;
 307}
 308
 309void sock_release(struct socket *sock)
 310{
 311        if (sock->state != SS_UNCONNECTED)
 312                sock->state = SS_DISCONNECTING;
 313
 314        if (sock->ops) 
 315                sock->ops->release(sock, NULL);
 316
 317        if (sock->fasync_list)
 318                printk(KERN_ERR "sock_release: fasync list not empty!\n");
 319
 320        --sockets_in_use;       /* Bookkeeping.. */
 321        sock->file=NULL;
 322        iput(sock->inode);
 323}
 324
 325int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 326{
 327        int err;
 328        struct scm_cookie scm;
 329
 330        err = scm_send(sock, msg, &scm);
 331        if (err >= 0) {
 332                err = sock->ops->sendmsg(sock, msg, size, &scm);
 333                scm_destroy(&scm);
 334        }
 335        return err;
 336}
 337
 338int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 339{
 340        struct scm_cookie scm;
 341
 342        memset(&scm, 0, sizeof(scm));
 343
 344        size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
 345        if (size >= 0)
 346                scm_recv(sock, msg, &scm, flags);
 347
 348        return size;
 349}
 350
 351
 352/*
 353 *      Sockets are not seekable.
 354 */
 355
 356static long long sock_lseek(struct file *file,long long offset, int whence)
 357{
 358        return -ESPIPE;
 359}
 360
 361/*
 362 *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
 363 *      area ubuf...ubuf+size-1 is writable before asking the protocol.
 364 */
 365
 366static ssize_t sock_read(struct file *file, char *ubuf,
 367                         size_t size, loff_t *ppos)
 368{
 369        struct socket *sock;
 370        struct iovec iov;
 371        struct msghdr msg;
 372
 373        if (ppos != &file->f_pos)
 374                return -ESPIPE;
 375        if (size==0)            /* Match SYS5 behaviour */
 376                return 0;
 377
 378        sock = socki_lookup(file->f_dentry->d_inode); 
 379
 380        msg.msg_name=NULL;
 381        msg.msg_namelen=0;
 382        msg.msg_iov=&iov;
 383        msg.msg_iovlen=1;
 384        msg.msg_control=NULL;
 385        msg.msg_controllen=0;
 386        iov.iov_base=ubuf;
 387        iov.iov_len=size;
 388
 389        return sock_recvmsg(sock, &msg, size,
 390                            !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT);
 391}
 392
 393
 394/*
 395 *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1
 396 *      is readable by the user process.
 397 */
 398
 399static ssize_t sock_write(struct file *file, const char *ubuf,
 400                          size_t size, loff_t *ppos)
 401{
 402        struct socket *sock;
 403        struct msghdr msg;
 404        struct iovec iov;
 405        
 406        if (ppos != &file->f_pos)
 407                return -ESPIPE;
 408        if(size==0)             /* Match SYS5 behaviour */
 409                return 0;
 410
 411        sock = socki_lookup(file->f_dentry->d_inode); 
 412
 413        msg.msg_name=NULL;
 414        msg.msg_namelen=0;
 415        msg.msg_iov=&iov;
 416        msg.msg_iovlen=1;
 417        msg.msg_control=NULL;
 418        msg.msg_controllen=0;
 419        msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 420        iov.iov_base=(void *)ubuf;
 421        iov.iov_len=size;
 422        
 423        return sock_sendmsg(sock, &msg, size);
 424}
 425
 426int sock_readv_writev(int type, struct inode * inode, struct file * file,
 427                      const struct iovec * iov, long count, long size)
 428{
 429        struct msghdr msg;
 430        struct socket *sock;
 431
 432        sock = socki_lookup(inode);
 433
 434        msg.msg_name = NULL;
 435        msg.msg_namelen = 0;
 436        msg.msg_control = NULL;
 437        msg.msg_controllen = 0;
 438        msg.msg_iov = (struct iovec *) iov;
 439        msg.msg_iovlen = count;
 440        msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 441
 442        /* read() does a VERIFY_WRITE */
 443        if (type == VERIFY_WRITE)
 444                return sock_recvmsg(sock, &msg, size, msg.msg_flags);
 445        return sock_sendmsg(sock, &msg, size);
 446}
 447
 448
 449/*
 450 *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
 451 *      with it - that's up to the protocol still.
 452 */
 453
 454int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 455           unsigned long arg)
 456{
 457        struct socket *sock = socki_lookup(inode);
 458        return sock->ops->ioctl(sock, cmd, arg);
 459}
 460
 461
 462static unsigned int sock_poll(struct file *file, poll_table * wait)
 463{
 464        struct socket *sock;
 465
 466        sock = socki_lookup(file->f_dentry->d_inode);
 467
 468        /*
 469         *      We can't return errors to poll, so it's either yes or no. 
 470         */
 471
 472        return sock->ops->poll(file, sock, wait);
 473}
 474
 475
 476int sock_close(struct inode *inode, struct file *filp)
 477{
 478        /*
 479         *      It was possible the inode is NULL we were 
 480         *      closing an unfinished socket. 
 481         */
 482
 483        if (!inode)
 484        {
 485                printk(KERN_DEBUG "sock_close: NULL inode\n");
 486                return 0;
 487        }
 488        sock_fasync(-1, filp, 0);
 489        sock_release(socki_lookup(inode));
 490        return 0;
 491}
 492
 493/*
 494 *      Update the socket async list
 495 */
 496
 497static int sock_fasync(int fd, struct file *filp, int on)
 498{
 499        struct fasync_struct *fa, *fna=NULL, **prev;
 500        struct socket *sock;
 501        
 502        if (on)
 503        {
 504                fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 505                if(fna==NULL)
 506                        return -ENOMEM;
 507        }
 508
 509        sock = socki_lookup(filp->f_dentry->d_inode);
 510        
 511        prev=&(sock->fasync_list);
 512
 513        lock_sock(sock->sk); 
 514        
 515        for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
 516                if (fa->fa_file==filp)
 517                        break;
 518        
 519        if(on)
 520        {
 521                if(fa!=NULL)
 522                {
 523                        fa->fa_fd=fd;
 524                        kfree_s(fna,sizeof(struct fasync_struct));
 525                        release_sock(sock->sk); 
 526                        return 0;
 527                }
 528                fna->fa_file=filp;
 529                fna->fa_fd=fd;
 530                fna->magic=FASYNC_MAGIC;
 531                fna->fa_next=sock->fasync_list;
 532                sock->fasync_list=fna;
 533        }
 534        else
 535        {
 536                if (fa!=NULL)
 537                {
 538                        *prev=fa->fa_next;
 539                        kfree_s(fa,sizeof(struct fasync_struct));
 540                }
 541        }
 542
 543        release_sock(sock->sk); 
 544        return 0;
 545}
 546
 547int sock_wake_async(struct socket *sock, int how)
 548{
 549        if (!sock || !sock->fasync_list)
 550                return -1;
 551        switch (how)
 552        {
 553        case 1:
 554                if (sock->flags & SO_WAITDATA)
 555                        break;
 556                goto call_kill;
 557        case 2:
 558                if (!(sock->flags & SO_NOSPACE))
 559                        break;
 560                sock->flags &= ~SO_NOSPACE;
 561                /* fall through */
 562        case 0:
 563        call_kill:
 564                kill_fasync(sock->fasync_list, SIGIO);
 565                break;
 566        }
 567        return 0;
 568}
 569
 570
 571int sock_create(int family, int type, int protocol, struct socket **res)
 572{
 573        int i;
 574        struct socket *sock;
 575
 576        /*
 577         *      Check protocol is in range
 578         */
 579        if(family<0||family>=NPROTO)
 580                return -EINVAL;
 581                
 582#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 583        /* Attempt to load a protocol module if the find failed. 
 584         * 
 585         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
 586         * requested real, full-featured networking support upon configuration.
 587         * Otherwise module support will break!
 588         */
 589        if (net_families[family]==NULL)
 590        {
 591                char module_name[30];
 592                sprintf(module_name,"net-pf-%d",family);
 593                request_module(module_name);
 594        }
 595#endif
 596
 597        if (net_families[family]==NULL)
 598                return -EINVAL;
 599
 600/*
 601 *      Check that this is a type that we know how to manipulate and
 602 *      the protocol makes sense here. The family can still reject the
 603 *      protocol later.
 604 */
 605 
 606        if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
 607             type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM &&
 608#ifdef CONFIG_XTP
 609                type != SOCK_WEB  &&
 610#endif
 611             type != SOCK_PACKET) || protocol < 0)
 612                        return -EINVAL;
 613
 614/*
 615 *      Allocate the socket and allow the family to set things up. if
 616 *      the protocol is 0, the family is instructed to select an appropriate
 617 *      default.
 618 */
 619
 620        if (!(sock = sock_alloc())) 
 621        {
 622                printk(KERN_WARNING "socket: no more sockets\n");
 623                return -ENFILE;         /* Not exactly a match, but its the
 624                                           closest posix thing */
 625        }
 626
 627        sock->type   = type;
 628
 629        if ((i = net_families[family]->create(sock, protocol)) < 0) 
 630        {
 631                sock_release(sock);
 632                return i;
 633        }
 634
 635        *res = sock;
 636        return 0;
 637}
 638
 639asmlinkage int sys_socket(int family, int type, int protocol)
 640{
 641        int retval;
 642        struct socket *sock;
 643
 644        lock_kernel();
 645
 646        retval = sock_create(family, type, protocol, &sock);
 647        if (retval < 0)
 648                goto out;
 649
 650        retval = get_fd(sock->inode);
 651        if (retval < 0)
 652                goto out_release;
 653        sock->file = fcheck(retval);
 654
 655out:
 656        unlock_kernel();
 657        return retval;
 658
 659out_release:
 660        sock_release(sock);
 661        goto out;
 662}
 663
 664/*
 665 *      Create a pair of connected sockets.
 666 */
 667
 668asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2])
 669{
 670        struct socket *sock1, *sock2;
 671        int fd1, fd2, err;
 672
 673        lock_kernel();
 674
 675        /*
 676         * Obtain the first socket and check if the underlying protocol
 677         * supports the socketpair call.
 678         */
 679
 680        err = sys_socket(family, type, protocol);
 681        if (err < 0)
 682                goto out;
 683        fd1 = err;
 684
 685        /*
 686         * Now grab another socket
 687         */
 688        err = -EINVAL;
 689        fd2 = sys_socket(family, type, protocol);
 690        if (fd2 < 0) 
 691                goto out_close1;
 692
 693        /*
 694         * Get the sockets for the two fd's
 695         */
 696        sock1 = sockfd_lookup(fd1, &err);
 697        if (!sock1)
 698                goto out_close2;
 699        sock2 = sockfd_lookup(fd2, &err);
 700        if (!sock2)
 701                goto out_put1;
 702
 703        /* try to connect the two sockets together */ 
 704        err = sock1->ops->socketpair(sock1, sock2);
 705        if (err < 0) 
 706                goto out_put2;
 707
 708        err = put_user(fd1, &usockvec[0]); 
 709        if (err) 
 710                goto out_put2;
 711        err = put_user(fd2, &usockvec[1]);
 712
 713out_put2:
 714        sockfd_put(sock2);
 715out_put1:
 716        sockfd_put(sock1);
 717
 718        if (err) {
 719        out_close2:
 720                sys_close(fd2);
 721        out_close1:
 722                sys_close(fd1);
 723        }
 724out:
 725        unlock_kernel();
 726        return err;
 727}
 728
 729
 730/*
 731 *      Bind a name to a socket. Nothing much to do here since it's
 732 *      the protocol's responsibility to handle the local address.
 733 *
 734 *      We move the socket address to kernel space before we call
 735 *      the protocol layer (having also checked the address is ok).
 736 */
 737
 738asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
 739{
 740        struct socket *sock;
 741        char address[MAX_SOCK_ADDR];
 742        int err;
 743
 744        lock_kernel();
 745        if((sock = sockfd_lookup(fd,&err))!=NULL)
 746        {
 747                if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
 748                        err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
 749                sockfd_put(sock);
 750        }                       
 751        unlock_kernel();
 752        return err;
 753}
 754
 755
 756/*
 757 *      Perform a listen. Basically, we allow the protocol to do anything
 758 *      necessary for a listen, and if that works, we mark the socket as
 759 *      ready for listening.
 760 */
 761
 762asmlinkage int sys_listen(int fd, int backlog)
 763{
 764        struct socket *sock;
 765        int err;
 766        
 767        lock_kernel();
 768        if((sock = sockfd_lookup(fd, &err))!=NULL)
 769        {
 770                err=sock->ops->listen(sock, backlog);
 771                sockfd_put(sock);
 772        }
 773        unlock_kernel();
 774        return err;
 775}
 776
 777
 778/*
 779 *      For accept, we attempt to create a new socket, set up the link
 780 *      with the client, wake up the client, then return the new
 781 *      connected fd. We collect the address of the connector in kernel
 782 *      space and move it to user at the very end. This is unclean because
 783 *      we open the socket then return an error.
 784 *
 785 *      1003.1g adds the ability to recvmsg() to query connection pending
 786 *      status to recvmsg. We need to add that support in a way thats
 787 *      clean when we restucture accept also.
 788 */
 789
 790asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
 791{
 792        struct inode *inode;
 793        struct socket *sock, *newsock;
 794        int err, len;
 795        char address[MAX_SOCK_ADDR];
 796
 797        lock_kernel();
 798        sock = sockfd_lookup(fd, &err);
 799        if (!sock)
 800                goto out;
 801
 802restart:
 803        err = -EMFILE;
 804        if (!(newsock = sock_alloc())) 
 805                goto out_put;
 806
 807        inode = newsock->inode;
 808        newsock->type = sock->type;
 809
 810        err = sock->ops->dup(newsock, sock);
 811        if (err < 0) 
 812                goto out_release;
 813
 814        err = newsock->ops->accept(sock, newsock, sock->file->f_flags);
 815        if (err < 0)
 816                goto out_release;
 817        newsock = socki_lookup(inode);
 818
 819        if ((err = get_fd(inode)) < 0) 
 820                goto out_release;
 821        newsock->file = fcheck(err);
 822
 823        if (upeer_sockaddr)
 824        {
 825                /* Handle the race where the accept works and we
 826                   then getname after it has closed again */
 827                if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1)<0)
 828                {
 829                        sys_close(err);
 830                        goto restart;
 831                }
 832                /* N.B. Should check for errors here */
 833                move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
 834        }
 835
 836out_put:
 837        sockfd_put(sock);
 838out:
 839        unlock_kernel();
 840        return err;
 841
 842out_release:
 843        sock_release(newsock);
 844        goto out_put;
 845}
 846
 847
 848/*
 849 *      Attempt to connect to a socket with the server address.  The address
 850 *      is in user space so we verify it is OK and move it to kernel space.
 851 *
 852 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
 853 *      break bindings
 854 *
 855 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
 856 *      other SEQPACKET protocols that take time to connect() as it doesn't
 857 *      include the -EINPROGRESS status for such sockets.
 858 */
 859
 860asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
 861{
 862        struct socket *sock;
 863        char address[MAX_SOCK_ADDR];
 864        int err;
 865
 866        lock_kernel();
 867        sock = sockfd_lookup(fd, &err);
 868        if (!sock)
 869                goto out;
 870        err = move_addr_to_kernel(uservaddr, addrlen, address);
 871        if (err < 0)
 872                goto out_put;
 873        err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
 874                                 sock->file->f_flags);
 875out_put:
 876        sockfd_put(sock);
 877out:
 878        unlock_kernel();
 879        return err;
 880}
 881
 882/*
 883 *      Get the local address ('name') of a socket object. Move the obtained
 884 *      name to user space.
 885 */
 886
 887asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 888{
 889        struct socket *sock;
 890        char address[MAX_SOCK_ADDR];
 891        int len, err;
 892        
 893        lock_kernel();
 894        sock = sockfd_lookup(fd, &err);
 895        if (!sock)
 896                goto out;
 897        err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
 898        if (err)
 899                goto out_put;
 900        err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
 901
 902out_put:
 903        sockfd_put(sock);
 904out:
 905        unlock_kernel();
 906        return err;
 907}
 908
 909/*
 910 *      Get the remote address ('name') of a socket object. Move the obtained
 911 *      name to user space.
 912 */
 913
 914asmlinkage int sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 915{
 916        struct socket *sock;
 917        char address[MAX_SOCK_ADDR];
 918        int len, err;
 919
 920        lock_kernel();
 921        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 922        {
 923                err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
 924                if (!err)
 925                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
 926                sockfd_put(sock);
 927        }
 928        unlock_kernel();
 929        return err;
 930}
 931
 932/*
 933 *      Send a datagram to a given address. We move the address into kernel
 934 *      space and check the user space data area is readable before invoking
 935 *      the protocol.
 936 */
 937
 938asmlinkage int sys_sendto(int fd, void * buff, size_t len, unsigned flags,
 939           struct sockaddr *addr, int addr_len)
 940{
 941        struct socket *sock;
 942        char address[MAX_SOCK_ADDR];
 943        int err;
 944        struct msghdr msg;
 945        struct iovec iov;
 946        
 947        lock_kernel();
 948        sock = sockfd_lookup(fd, &err);
 949        if (!sock)
 950                goto out;
 951        iov.iov_base=buff;
 952        iov.iov_len=len;
 953        msg.msg_name=NULL;
 954        msg.msg_iov=&iov;
 955        msg.msg_iovlen=1;
 956        msg.msg_control=NULL;
 957        msg.msg_controllen=0;
 958        msg.msg_namelen=addr_len;
 959        if(addr)
 960        {
 961                err = move_addr_to_kernel(addr, addr_len, address);
 962                if (err < 0)
 963                        goto out_put;
 964                msg.msg_name=address;
 965        }
 966        if (sock->file->f_flags & O_NONBLOCK)
 967                flags |= MSG_DONTWAIT;
 968        msg.msg_flags = flags;
 969        err = sock_sendmsg(sock, &msg, len);
 970
 971out_put:                
 972        sockfd_put(sock);
 973out:
 974        unlock_kernel();
 975        return err;
 976}
 977
 978/*
 979 *      Send a datagram down a socket. 
 980 */
 981
 982asmlinkage int sys_send(int fd, void * buff, size_t len, unsigned flags)
 983{
 984        return sys_sendto(fd, buff, len, flags, NULL, 0);
 985}
 986
 987/*
 988 *      Receive a frame from the socket and optionally record the address of the 
 989 *      sender. We verify the buffers are writable and if needed move the
 990 *      sender address from kernel to user space.
 991 */
 992
 993asmlinkage int sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
 994             struct sockaddr *addr, int *addr_len)
 995{
 996        struct socket *sock;
 997        struct iovec iov;
 998        struct msghdr msg;
 999        char address[MAX_SOCK_ADDR];
1000        int err,err2;
1001
1002        lock_kernel();
1003        sock = sockfd_lookup(fd, &err);
1004        if (!sock)
1005                goto out;
1006
1007        msg.msg_control=NULL;
1008        msg.msg_controllen=0;
1009        msg.msg_iovlen=1;
1010        msg.msg_iov=&iov;
1011        iov.iov_len=size;
1012        iov.iov_base=ubuf;
1013        msg.msg_name=address;
1014        msg.msg_namelen=MAX_SOCK_ADDR;
1015        if (sock->file->f_flags & O_NONBLOCK)
1016                flags |= MSG_DONTWAIT;
1017        err=sock_recvmsg(sock, &msg, size, flags);
1018
1019        if(err >= 0 && addr != NULL)
1020        {
1021                err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1022                if(err2<0)
1023                        err=err2;
1024        }
1025        sockfd_put(sock);                       
1026out:
1027        unlock_kernel();
1028        return err;
1029}
1030
1031/*
1032 *      Receive a datagram from a socket. 
1033 */
1034
1035asmlinkage int sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1036{
1037        return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1038}
1039
1040/*
1041 *      Set a socket option. Because we don't know the option lengths we have
1042 *      to pass the user mode parameter for the protocols to sort out.
1043 */
1044
1045asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1046{
1047        int err;
1048        struct socket *sock;
1049        
1050        lock_kernel();
1051        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1052        {
1053                if (level == SOL_SOCKET)
1054                        err=sock_setsockopt(sock,level,optname,optval,optlen);
1055                else
1056                        err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1057                sockfd_put(sock);
1058        }
1059        unlock_kernel();
1060        return err;
1061}
1062
1063/*
1064 *      Get a socket option. Because we don't know the option lengths we have
1065 *      to pass a user mode parameter for the protocols to sort out.
1066 */
1067
1068asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1069{
1070        int err;
1071        struct socket *sock;
1072
1073        lock_kernel();
1074        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1075        {
1076                if (level == SOL_SOCKET)
1077                        err=sock_getsockopt(sock,level,optname,optval,optlen);
1078                else
1079                        err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1080                sockfd_put(sock);
1081        }
1082        unlock_kernel();
1083        return err;
1084}
1085
1086
1087/*
1088 *      Shutdown a socket.
1089 */
1090
1091asmlinkage int sys_shutdown(int fd, int how)
1092{
1093        int err;
1094        struct socket *sock;
1095
1096        lock_kernel();
1097        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1098        {
1099                err=sock->ops->shutdown(sock, how);
1100                sockfd_put(sock);
1101        }
1102        unlock_kernel();
1103        return err;
1104}
1105
1106/*
1107 *      BSD sendmsg interface
1108 */
1109
1110asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1111{
1112        struct socket *sock;
1113        char address[MAX_SOCK_ADDR];
1114        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1115        unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1116        unsigned char *ctl_buf = ctl;
1117        struct msghdr msg_sys;
1118        int err, ctl_len, iov_size, total_len;
1119        
1120        lock_kernel();
1121
1122        err = -EFAULT;
1123        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1124                goto out; 
1125
1126        sock = sockfd_lookup(fd, &err);
1127        if (!sock) 
1128                goto out;
1129
1130        /* do not move before msg_sys is valid */
1131        err = -EINVAL;
1132        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1133                goto out_put;
1134
1135        /* Check whether to allocate the iovec area*/
1136        err = -ENOMEM;
1137        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1138        if (msg_sys.msg_iovlen > 1 /* UIO_FASTIOV */) {
1139                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1140                if (!iov)
1141                        goto out_put;
1142        }
1143
1144        /* This will also move the address data into kernel space */
1145        err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1146        if (err < 0) 
1147                goto out_freeiov;
1148        total_len = err;
1149
1150        ctl_len = msg_sys.msg_controllen; 
1151        if (ctl_len) 
1152        {
1153                if (ctl_len > sizeof(ctl))
1154                {
1155                        /* Suggested by the Advanced Sockets API for IPv6 draft:
1156                         * Limit the msg_controllen size by the SO_SNDBUF size.
1157                         */
1158                        /* Note - when this code becomes multithreaded on
1159                         * SMP machines you have a race to fix here.
1160                         */
1161                        err = -ENOBUFS;
1162                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1163                        if (ctl_buf == NULL) 
1164                                goto out_freeiov;
1165                }
1166                err = -EFAULT;
1167                if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
1168                        goto out_freectl;
1169                msg_sys.msg_control = ctl_buf;
1170        }
1171        msg_sys.msg_flags = flags;
1172
1173        if (sock->file->f_flags & O_NONBLOCK)
1174                msg_sys.msg_flags |= MSG_DONTWAIT;
1175        err = sock_sendmsg(sock, &msg_sys, total_len);
1176
1177out_freectl:
1178        if (ctl_buf != ctl)    
1179                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1180out_freeiov:
1181        if (iov != iovstack)
1182                sock_kfree_s(sock->sk, iov, iov_size);
1183out_put:
1184        sockfd_put(sock);
1185out:       
1186        unlock_kernel();
1187        return err;
1188}
1189
1190/*
1191 *      BSD recvmsg interface
1192 */
1193
1194asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1195{
1196        struct socket *sock;
1197        struct iovec iovstack[UIO_FASTIOV];
1198        struct iovec *iov=iovstack;
1199        struct msghdr msg_sys;
1200        unsigned long cmsg_ptr;
1201        int err, iov_size, total_len, len;
1202
1203        /* kernel mode address */
1204        char addr[MAX_SOCK_ADDR];
1205
1206        /* user mode address pointers */
1207        struct sockaddr *uaddr;
1208        int *uaddr_len;
1209        
1210        lock_kernel();
1211        err=-EFAULT;
1212        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1213                goto out;
1214
1215        sock = sockfd_lookup(fd, &err);
1216        if (!sock)
1217                goto out;
1218
1219        err = -EINVAL;
1220        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1221                goto out_put;
1222        
1223        /* Check whether to allocate the iovec area*/
1224        err = -ENOMEM;
1225        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1226        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1227                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1228                if (!iov)
1229                        goto out_put;
1230        }
1231
1232        /*
1233         *      Save the user-mode address (verify_iovec will change the
1234         *      kernel msghdr to use the kernel address space)
1235         */
1236         
1237        uaddr = msg_sys.msg_name;
1238        uaddr_len = &msg->msg_namelen;
1239        err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1240        if (err < 0)
1241                goto out_freeiov;
1242        total_len=err;
1243
1244        cmsg_ptr = (unsigned long)msg_sys.msg_control;
1245        msg_sys.msg_flags = 0;
1246        
1247        if (sock->file->f_flags & O_NONBLOCK)
1248                flags |= MSG_DONTWAIT;
1249        err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1250        if (err < 0)
1251                goto out_freeiov;
1252        len = err;
1253
1254        if (uaddr != NULL) {
1255                err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1256                if (err < 0)
1257                        goto out_freeiov;
1258        }
1259        err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1260        if (err)
1261                goto out_freeiov;
1262        err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
1263                                                         &msg->msg_controllen);
1264        if (err)
1265                goto out_freeiov;
1266        err = len;
1267
1268out_freeiov:
1269        if (iov != iovstack)
1270                sock_kfree_s(sock->sk, iov, iov_size);
1271out_put:
1272        sockfd_put(sock);
1273out:
1274        unlock_kernel();
1275        return err;
1276}
1277
1278
1279/*
1280 *      Perform a file control on a socket file descriptor.
1281 *
1282 *      Doesn't aquire a fd lock, because no network fcntl
1283 *      function sleeps currently.
1284 */
1285
1286int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1287{
1288        struct socket *sock;
1289
1290        sock = socki_lookup (filp->f_dentry->d_inode);
1291        if (sock && sock->ops)
1292                return sock->ops->fcntl(sock, cmd, arg);
1293        return(-EINVAL);
1294}
1295
1296/* Argument list sizes for sys_socketcall */
1297#define AL(x) ((x) * sizeof(unsigned long))
1298static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1299                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1300                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1301#undef AL
1302
1303/*
1304 *      System call vectors. 
1305 *
1306 *      Argument checking cleaned up. Saved 20% in size.
1307 *  This function doesn't need to set the kernel lock because
1308 *  it is set by the callees. 
1309 */
1310
1311asmlinkage int sys_socketcall(int call, unsigned long *args)
1312{
1313        unsigned long a[6];
1314        unsigned long a0,a1;
1315        int err;
1316
1317        if(call<1||call>SYS_RECVMSG)
1318                return -EINVAL;
1319
1320        /* copy_from_user should be SMP safe. */
1321        if (copy_from_user(a, args, nargs[call]))
1322                return -EFAULT;
1323                
1324        a0=a[0];
1325        a1=a[1];
1326        
1327        switch(call) 
1328        {
1329                case SYS_SOCKET:
1330                        err = sys_socket(a0,a1,a[2]);
1331                        break;
1332                case SYS_BIND:
1333                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1334                        break;
1335                case SYS_CONNECT:
1336                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1337                        break;
1338                case SYS_LISTEN:
1339                        err = sys_listen(a0,a1);
1340                        break;
1341                case SYS_ACCEPT:
1342                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1343                        break;
1344                case SYS_GETSOCKNAME:
1345                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1346                        break;
1347                case SYS_GETPEERNAME:
1348                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1349                        break;
1350                case SYS_SOCKETPAIR:
1351                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1352                        break;
1353                case SYS_SEND:
1354                        err = sys_send(a0, (void *)a1, a[2], a[3]);
1355                        break;
1356                case SYS_SENDTO:
1357                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
1358                                         (struct sockaddr *)a[4], a[5]);
1359                        break;
1360                case SYS_RECV:
1361                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
1362                        break;
1363                case SYS_RECVFROM:
1364                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1365                                           (struct sockaddr *)a[4], (int *)a[5]);
1366                        break;
1367                case SYS_SHUTDOWN:
1368                        err = sys_shutdown(a0,a1);
1369                        break;
1370                case SYS_SETSOCKOPT:
1371                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1372                        break;
1373                case SYS_GETSOCKOPT:
1374                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1375                        break;
1376                case SYS_SENDMSG:
1377                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1378                        break;
1379                case SYS_RECVMSG:
1380                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1381                        break;
1382                default:
1383                        err = -EINVAL;
1384                        break;
1385        }
1386        return err;
1387}
1388
1389/*
1390 *      This function is called by a protocol handler that wants to
1391 *      advertise its address family, and have it linked into the
1392 *      SOCKET module.
1393 */
1394
1395int sock_register(struct net_proto_family *ops)
1396{
1397        if (ops->family >= NPROTO) {
1398                printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1399                return -ENOBUFS;
1400        }
1401        net_families[ops->family]=ops;
1402        return 0;
1403}
1404
1405/*
1406 *      This function is called by a protocol handler that wants to
1407 *      remove its address family, and have it unlinked from the
1408 *      SOCKET module.
1409 */
1410
1411int sock_unregister(int family)
1412{
1413        if (family < 0 || family >= NPROTO)
1414                return -1;
1415
1416        net_families[family]=NULL;
1417        return 0;
1418}
1419
1420void __init proto_init(void)
1421{
1422        extern struct net_proto protocols[];    /* Network protocols */
1423        struct net_proto *pro;
1424
1425        /* Kick all configured protocols. */
1426        pro = protocols;
1427        while (pro->name != NULL) 
1428        {
1429                (*pro->init_func)(pro);
1430                pro++;
1431        }
1432        /* We're all done... */
1433}
1434
1435extern void sk_init(void);
1436#ifdef CONFIG_WAN_ROUTER
1437extern void wanrouter_init(void);
1438#endif
1439
1440void __init sock_init(void)
1441{
1442        int i;
1443
1444        printk(KERN_INFO "Linux NET4.0 for Linux 2.2\n");
1445        printk(KERN_INFO "Based upon Swansea University Computer Society NET3.039\n");
1446
1447        /*
1448         *      Initialize all address (protocol) families. 
1449         */
1450         
1451        for (i = 0; i < NPROTO; i++) 
1452                net_families[i] = NULL;
1453
1454        /*
1455         *      Initialize sock SLAB cache.
1456         */
1457         
1458        sk_init();
1459
1460#ifdef SLAB_SKB
1461        /*
1462         *      Initialize skbuff SLAB cache 
1463         */
1464        skb_init();
1465#endif
1466
1467
1468        /*
1469         *      Wan router layer. 
1470         */
1471
1472#ifdef CONFIG_WAN_ROUTER         
1473        wanrouter_init();
1474#endif
1475
1476        /*
1477         *      Attach the firewall module if configured
1478         */
1479         
1480#ifdef CONFIG_FIREWALL   
1481        fwchain_init();
1482#endif
1483
1484        /*
1485         *      Initialize the protocols module. 
1486         */
1487
1488        proto_init();
1489
1490        /*
1491         *      The netlink device handler may be needed early.
1492         */
1493
1494#ifdef  CONFIG_RTNETLINK
1495        rtnetlink_init();
1496#endif
1497#ifdef CONFIG_NETLINK_DEV
1498        init_netlink();
1499#endif
1500}
1501
1502int socket_get_info(char *buffer, char **start, off_t offset, int length)
1503{
1504        int len = sprintf(buffer, "sockets: used %d\n", sockets_in_use);
1505        if (offset >= len)
1506        {
1507                *start = buffer;
1508                return 0;
1509        }
1510        *start = buffer + offset;
1511        len -= offset;
1512        if (len > length)
1513                len = length;
1514        return len;
1515}
1516
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.