linux-old/net/socket.c
<<
>>
Prefs
   1/*
   2 * NET          An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:     @(#)socket.c    1.1.93  18/02/95
   5 *
   6 * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7 *              Ross Biro, <bir7@leland.Stanford.Edu>
   8 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12 *                                      shutdown()
  13 *              Alan Cox        :       verify_area() fixes
  14 *              Alan Cox        :       Removed DDI
  15 *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16 *              Alan Cox        :       Moved a load of checks to the very
  17 *                                      top level.
  18 *              Alan Cox        :       Move address structures to/from user
  19 *                                      mode above the protocol layers.
  20 *              Rob Janssen     :       Allow 0 length sends.
  21 *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22 *                                      tty drivers).
  23 *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24 *              Jeff Uphoff     :       Made max number of sockets command-line
  25 *                                      configurable.
  26 *              Matti Aarnio    :       Made the number of sockets dynamic,
  27 *                                      to be allocated when needed, and mr.
  28 *                                      Uphoff's max is used as max to be
  29 *                                      allowed to allocate.
  30 *              Linus           :       Argh. removed all the socket allocation
  31 *                                      altogether: it's in the inode now.
  32 *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33 *                                      for NetROM and future kernel nfsd type
  34 *                                      stuff.
  35 *              Alan Cox        :       sendmsg/recvmsg basics.
  36 *              Tom Dyas        :       Export net symbols.
  37 *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38 *              Alan Cox        :       Added thread locking to sys_* calls
  39 *                                      for sockets. May have errors at the
  40 *                                      moment.
  41 *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42 *              Andi Kleen      :       Some small cleanups, optimizations,
  43 *                                      and fixed a copy_from_user() bug.
  44 *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45 *
  46 *
  47 *              This program is free software; you can redistribute it and/or
  48 *              modify it under the terms of the GNU General Public License
  49 *              as published by the Free Software Foundation; either version
  50 *              2 of the License, or (at your option) any later version.
  51 *
  52 *
  53 *      This module is effectively the top level interface to the BSD socket
  54 *      paradigm. 
  55 *
  56 */
  57
  58#include <linux/config.h>
  59#include <linux/mm.h>
  60#include <linux/smp_lock.h>
  61#include <linux/socket.h>
  62#include <linux/file.h>
  63#include <linux/net.h>
  64#include <linux/interrupt.h>
  65#include <linux/netdevice.h>
  66#include <linux/proc_fs.h>
  67#include <linux/firewall.h>
  68#include <linux/wanrouter.h>
  69#include <linux/init.h>
  70#include <linux/poll.h>
  71
  72#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
  73#include <linux/kmod.h>
  74#endif
  75
  76#include <asm/uaccess.h>
  77
  78#include <linux/inet.h>
  79#include <net/ip.h>
  80#include <net/sock.h>
  81#include <net/rarp.h>
  82#include <net/tcp.h>
  83#include <net/udp.h>
  84#include <net/scm.h>
  85
  86static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
  87static long long sock_lseek(struct file *file, long long offset, int whence);
  88static ssize_t sock_read(struct file *file, char *buf,
  89                         size_t size, loff_t *ppos);
  90static ssize_t sock_write(struct file *file, const char *buf,
  91                          size_t size, loff_t *ppos);
  92
  93static int sock_close(struct inode *inode, struct file *file);
  94static unsigned int sock_poll(struct file *file,
  95                              struct poll_table_struct *wait);
  96static int sock_ioctl(struct inode *inode, struct file *file,
  97                      unsigned int cmd, unsigned long arg);
  98static int sock_fasync(int fd, struct file *filp, int on);
  99
 100
 101/*
 102 *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 103 *      in the operation structures but are done directly via the socketcall() multiplexor.
 104 */
 105
 106static struct file_operations socket_file_ops = {
 107        sock_lseek,
 108        sock_read,
 109        sock_write,
 110        NULL,                   /* readdir */
 111        sock_poll,
 112        sock_ioctl,
 113        NULL,                   /* mmap */
 114        sock_no_open,           /* special open code to disallow open via /proc */
 115        NULL,                   /* flush */
 116        sock_close,
 117        NULL,                   /* no fsync */
 118        sock_fasync
 119};
 120
 121/*
 122 *      The protocol list. Each protocol is registered in here.
 123 */
 124
 125struct net_proto_family *net_families[NPROTO];
 126
 127/*
 128 *      Statistics counters of the socket lists
 129 */
 130
 131static int sockets_in_use  = 0;
 132
 133/*
 134 *      Support routines. Move socket addresses back and forth across the kernel/user
 135 *      divide and look after the messy bits.
 136 */
 137
 138#define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
 139                                           16 for IP, 16 for IPX,
 140                                           24 for IPv6,
 141                                           about 80 for AX.25 
 142                                           must be at least one bigger than
 143                                           the AF_UNIX size (see net/unix/af_unix.c
 144                                           :unix_mkname()).  
 145                                         */
 146
 147int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 148{
 149        if(ulen<0||ulen>MAX_SOCK_ADDR)
 150                return -EINVAL;
 151        if(ulen==0)
 152                return 0;
 153        if(copy_from_user(kaddr,uaddr,ulen))
 154                return -EFAULT;
 155        return 0;
 156}
 157
 158int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 159{
 160        int err;
 161        int len;
 162
 163        if((err=get_user(len, ulen)))
 164                return err;
 165        if(len>klen)
 166                len=klen;
 167        if(len<0 || len> MAX_SOCK_ADDR)
 168                return -EINVAL;
 169        if(len)
 170        {
 171                if(copy_to_user(uaddr,kaddr,len))
 172                        return -EFAULT;
 173        }
 174        /*
 175         *      "fromlen shall refer to the value before truncation.."
 176         *                      1003.1g
 177         */
 178        return __put_user(klen, ulen);
 179}
 180
 181/*
 182 *      Obtains the first available file descriptor and sets it up for use. 
 183 */
 184
 185static int get_fd(struct inode *inode)
 186{
 187        int fd;
 188
 189        /*
 190         *      Find a file descriptor suitable for return to the user. 
 191         */
 192
 193        fd = get_unused_fd();
 194        if (fd >= 0) {
 195                struct file *file = get_empty_filp();
 196
 197                if (!file) {
 198                        put_unused_fd(fd);
 199                        return -ENFILE;
 200                }
 201
 202                file->f_dentry = d_alloc_root(inode, NULL);
 203                if (!file->f_dentry) {
 204                        put_filp(file);
 205                        put_unused_fd(fd);
 206                        return -ENOMEM;
 207                }
 208
 209                /*
 210                 * The socket maintains a reference to the inode, so we
 211                 * have to increment the count.
 212                 */
 213                inode->i_count++;
 214
 215                fd_install(fd, file);
 216                file->f_op = &socket_file_ops;
 217                file->f_mode = 3;
 218                file->f_flags = O_RDWR;
 219                file->f_pos = 0;
 220        }
 221        return fd;
 222}
 223
 224extern __inline__ struct socket *socki_lookup(struct inode *inode)
 225{
 226        return &inode->u.socket_i;
 227}
 228
 229/*
 230 *      Go from a file number to its socket slot.
 231 */
 232
 233extern struct socket *sockfd_lookup(int fd, int *err)
 234{
 235        struct file *file;
 236        struct inode *inode;
 237        struct socket *sock;
 238
 239        if (!(file = fget(fd)))
 240        {
 241                *err = -EBADF;
 242                return NULL;
 243        }
 244
 245        inode = file->f_dentry->d_inode;
 246        if (!inode || !inode->i_sock || !(sock = socki_lookup(inode)))
 247        {
 248                *err = -ENOTSOCK;
 249                fput(file);
 250                return NULL;
 251        }
 252
 253        if (sock->file != file) {
 254                printk(KERN_ERR "socki_lookup: socket file changed!\n");
 255                sock->file = file;
 256        }
 257        return sock;
 258}
 259
 260extern __inline__ void sockfd_put(struct socket *sock)
 261{
 262        fput(sock->file);
 263}
 264
 265/*
 266 *      Allocate a socket.
 267 */
 268
 269struct socket *sock_alloc(void)
 270{
 271        struct inode * inode;
 272        struct socket * sock;
 273
 274        inode = get_empty_inode();
 275        if (!inode)
 276                return NULL;
 277
 278        sock = socki_lookup(inode);
 279
 280        inode->i_mode = S_IFSOCK|S_IRWXUGO;
 281        inode->i_sock = 1;
 282        inode->i_uid = current->fsuid;
 283        inode->i_gid = current->fsgid;
 284
 285        sock->inode = inode;
 286        init_waitqueue(&sock->wait);
 287        sock->fasync_list = NULL;
 288        sock->state = SS_UNCONNECTED;
 289        sock->flags = 0;
 290        sock->ops = NULL;
 291        sock->sk = NULL;
 292        sock->file = NULL;
 293
 294        sockets_in_use++;
 295        return sock;
 296}
 297
 298/*
 299 *      In theory you can't get an open on this inode, but /proc provides
 300 *      a back door. Remember to keep it shut otherwise you'll let the
 301 *      creepy crawlies in.
 302 */
 303  
 304static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 305{
 306        return -ENXIO;
 307}
 308
 309void sock_release(struct socket *sock)
 310{
 311        if (sock->state != SS_UNCONNECTED)
 312                sock->state = SS_DISCONNECTING;
 313
 314        if (sock->ops) 
 315                sock->ops->release(sock, NULL);
 316
 317        if (sock->fasync_list)
 318                printk(KERN_ERR "sock_release: fasync list not empty!\n");
 319
 320        --sockets_in_use;       /* Bookkeeping.. */
 321        sock->file=NULL;
 322        iput(sock->inode);
 323}
 324
 325int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 326{
 327        int err;
 328        struct scm_cookie scm;
 329
 330        err = scm_send(sock, msg, &scm);
 331        if (err >= 0) {
 332                err = sock->ops->sendmsg(sock, msg, size, &scm);
 333                scm_destroy(&scm);
 334        }
 335        return err;
 336}
 337
 338int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 339{
 340        struct scm_cookie scm;
 341
 342        memset(&scm, 0, sizeof(scm));
 343
 344        size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
 345        if (size >= 0)
 346                scm_recv(sock, msg, &scm, flags);
 347
 348        return size;
 349}
 350
 351
 352/*
 353 *      Sockets are not seekable.
 354 */
 355
 356static long long sock_lseek(struct file *file,long long offset, int whence)
 357{
 358        return -ESPIPE;
 359}
 360
 361/*
 362 *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
 363 *      area ubuf...ubuf+size-1 is writable before asking the protocol.
 364 */
 365
 366static ssize_t sock_read(struct file *file, char *ubuf,
 367                         size_t size, loff_t *ppos)
 368{
 369        struct socket *sock;
 370        struct iovec iov;
 371        struct msghdr msg;
 372
 373        if (ppos != &file->f_pos)
 374                return -ESPIPE;
 375        if (size==0)            /* Match SYS5 behaviour */
 376                return 0;
 377
 378        sock = socki_lookup(file->f_dentry->d_inode); 
 379
 380        msg.msg_name=NULL;
 381        msg.msg_namelen=0;
 382        msg.msg_iov=&iov;
 383        msg.msg_iovlen=1;
 384        msg.msg_control=NULL;
 385        msg.msg_controllen=0;
 386        iov.iov_base=ubuf;
 387        iov.iov_len=size;
 388
 389        return sock_recvmsg(sock, &msg, size,
 390                            !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT);
 391}
 392
 393
 394/*
 395 *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1
 396 *      is readable by the user process.
 397 */
 398
 399static ssize_t sock_write(struct file *file, const char *ubuf,
 400                          size_t size, loff_t *ppos)
 401{
 402        struct socket *sock;
 403        struct msghdr msg;
 404        struct iovec iov;
 405        
 406        if (ppos != &file->f_pos)
 407                return -ESPIPE;
 408        if(size==0)             /* Match SYS5 behaviour */
 409                return 0;
 410
 411        sock = socki_lookup(file->f_dentry->d_inode); 
 412
 413        msg.msg_name=NULL;
 414        msg.msg_namelen=0;
 415        msg.msg_iov=&iov;
 416        msg.msg_iovlen=1;
 417        msg.msg_control=NULL;
 418        msg.msg_controllen=0;
 419        msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 420        iov.iov_base=(void *)ubuf;
 421        iov.iov_len=size;
 422        
 423        return sock_sendmsg(sock, &msg, size);
 424}
 425
 426int sock_readv_writev(int type, struct inode * inode, struct file * file,
 427                      const struct iovec * iov, long count, long size)
 428{
 429        struct msghdr msg;
 430        struct socket *sock;
 431
 432        sock = socki_lookup(inode);
 433
 434        msg.msg_name = NULL;
 435        msg.msg_namelen = 0;
 436        msg.msg_control = NULL;
 437        msg.msg_controllen = 0;
 438        msg.msg_iov = (struct iovec *) iov;
 439        msg.msg_iovlen = count;
 440        msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 441
 442        /* read() does a VERIFY_WRITE */
 443        if (type == VERIFY_WRITE)
 444                return sock_recvmsg(sock, &msg, size, msg.msg_flags);
 445        return sock_sendmsg(sock, &msg, size);
 446}
 447
 448
 449/*
 450 *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
 451 *      with it - that's up to the protocol still.
 452 */
 453
 454int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 455           unsigned long arg)
 456{
 457        struct socket *sock = socki_lookup(inode);
 458        return sock->ops->ioctl(sock, cmd, arg);
 459}
 460
 461
 462static unsigned int sock_poll(struct file *file, poll_table * wait)
 463{
 464        struct socket *sock;
 465
 466        sock = socki_lookup(file->f_dentry->d_inode);
 467
 468        /*
 469         *      We can't return errors to poll, so it's either yes or no. 
 470         */
 471
 472        return sock->ops->poll(file, sock, wait);
 473}
 474
 475
 476int sock_close(struct inode *inode, struct file *filp)
 477{
 478        /*
 479         *      It was possible the inode is NULL we were 
 480         *      closing an unfinished socket. 
 481         */
 482
 483        if (!inode)
 484        {
 485                printk(KERN_DEBUG "sock_close: NULL inode\n");
 486                return 0;
 487        }
 488        sock_fasync(-1, filp, 0);
 489        sock_release(socki_lookup(inode));
 490        return 0;
 491}
 492
 493/*
 494 *      Update the socket async list
 495 */
 496
 497static int sock_fasync(int fd, struct file *filp, int on)
 498{
 499        struct fasync_struct *fa, *fna=NULL, **prev;
 500        struct socket *sock;
 501        
 502        if (on)
 503        {
 504                fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 505                if(fna==NULL)
 506                        return -ENOMEM;
 507        }
 508
 509        sock = socki_lookup(filp->f_dentry->d_inode);
 510        
 511        prev=&(sock->fasync_list);
 512
 513        lock_sock(sock->sk); 
 514        
 515        for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
 516                if (fa->fa_file==filp)
 517                        break;
 518        
 519        if(on)
 520        {
 521                if(fa!=NULL)
 522                {
 523                        fa->fa_fd=fd;
 524                        synchronize_irq();
 525                        kfree_s(fna,sizeof(struct fasync_struct));
 526                        release_sock(sock->sk); 
 527                        return 0;
 528                }
 529                fna->fa_file=filp;
 530                fna->fa_fd=fd;
 531                fna->magic=FASYNC_MAGIC;
 532                fna->fa_next=sock->fasync_list;
 533                sock->fasync_list=fna;
 534        }
 535        else
 536        {
 537                if (fa!=NULL)
 538                {
 539                        *prev=fa->fa_next;
 540                        synchronize_irq();
 541                        kfree_s(fa,sizeof(struct fasync_struct));
 542                }
 543        }
 544
 545        release_sock(sock->sk); 
 546        return 0;
 547}
 548
 549int sock_wake_async(struct socket *sock, int how)
 550{
 551        if (!sock || !sock->fasync_list)
 552                return -1;
 553        switch (how)
 554        {
 555        case 1:
 556                if (sock->flags & SO_WAITDATA)
 557                        break;
 558                goto call_kill;
 559        case 2:
 560                if (!(sock->flags & SO_NOSPACE))
 561                        break;
 562                sock->flags &= ~SO_NOSPACE;
 563                /* fall through */
 564        case 0:
 565        call_kill:
 566                kill_fasync(sock->fasync_list, SIGIO);
 567                break;
 568        }
 569        return 0;
 570}
 571
 572
 573int sock_create(int family, int type, int protocol, struct socket **res)
 574{
 575        int i;
 576        struct socket *sock;
 577
 578        /*
 579         *      Check protocol is in range
 580         */
 581        if(family<0||family>=NPROTO)
 582                return -EINVAL;
 583                
 584#if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 585        /* Attempt to load a protocol module if the find failed. 
 586         * 
 587         * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
 588         * requested real, full-featured networking support upon configuration.
 589         * Otherwise module support will break!
 590         */
 591        if (net_families[family]==NULL)
 592        {
 593                char module_name[30];
 594                sprintf(module_name,"net-pf-%d",family);
 595                request_module(module_name);
 596        }
 597#endif
 598
 599        if (net_families[family]==NULL)
 600                return -EINVAL;
 601
 602/*
 603 *      Check that this is a type that we know how to manipulate and
 604 *      the protocol makes sense here. The family can still reject the
 605 *      protocol later.
 606 */
 607 
 608        if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
 609             type != SOCK_SEQPACKET && type != SOCK_RAW && type != SOCK_RDM &&
 610#ifdef CONFIG_XTP
 611                type != SOCK_WEB  &&
 612#endif
 613             type != SOCK_PACKET) || protocol < 0)
 614                        return -EINVAL;
 615
 616/*
 617 *      Allocate the socket and allow the family to set things up. if
 618 *      the protocol is 0, the family is instructed to select an appropriate
 619 *      default.
 620 */
 621
 622        if (!(sock = sock_alloc())) 
 623        {
 624                printk(KERN_WARNING "socket: no more sockets\n");
 625                return -ENFILE;         /* Not exactly a match, but its the
 626                                           closest posix thing */
 627        }
 628
 629        sock->type   = type;
 630
 631        if ((i = net_families[family]->create(sock, protocol)) < 0) 
 632        {
 633                sock_release(sock);
 634                return i;
 635        }
 636
 637        *res = sock;
 638        return 0;
 639}
 640
 641asmlinkage int sys_socket(int family, int type, int protocol)
 642{
 643        int retval;
 644        struct socket *sock;
 645
 646        lock_kernel();
 647
 648        retval = sock_create(family, type, protocol, &sock);
 649        if (retval < 0)
 650                goto out;
 651
 652        retval = get_fd(sock->inode);
 653        if (retval < 0)
 654                goto out_release;
 655        sock->file = fcheck(retval);
 656
 657out:
 658        unlock_kernel();
 659        return retval;
 660
 661out_release:
 662        sock_release(sock);
 663        goto out;
 664}
 665
 666/*
 667 *      Create a pair of connected sockets.
 668 */
 669
 670asmlinkage int sys_socketpair(int family, int type, int protocol, int usockvec[2])
 671{
 672        struct socket *sock1, *sock2;
 673        int fd1, fd2, err;
 674
 675        lock_kernel();
 676
 677        /*
 678         * Obtain the first socket and check if the underlying protocol
 679         * supports the socketpair call.
 680         */
 681
 682        err = sys_socket(family, type, protocol);
 683        if (err < 0)
 684                goto out;
 685        fd1 = err;
 686
 687        /*
 688         * Now grab another socket
 689         */
 690        err = -EINVAL;
 691        fd2 = sys_socket(family, type, protocol);
 692        if (fd2 < 0) 
 693                goto out_close1;
 694
 695        /*
 696         * Get the sockets for the two fd's
 697         */
 698        sock1 = sockfd_lookup(fd1, &err);
 699        if (!sock1)
 700                goto out_close2;
 701        sock2 = sockfd_lookup(fd2, &err);
 702        if (!sock2)
 703                goto out_put1;
 704
 705        /* try to connect the two sockets together */ 
 706        err = sock1->ops->socketpair(sock1, sock2);
 707        if (err < 0) 
 708                goto out_put2;
 709
 710        err = put_user(fd1, &usockvec[0]); 
 711        if (err) 
 712                goto out_put2;
 713        err = put_user(fd2, &usockvec[1]);
 714
 715out_put2:
 716        sockfd_put(sock2);
 717out_put1:
 718        sockfd_put(sock1);
 719
 720        if (err) {
 721        out_close2:
 722                sys_close(fd2);
 723        out_close1:
 724                sys_close(fd1);
 725        }
 726out:
 727        unlock_kernel();
 728        return err;
 729}
 730
 731
 732/*
 733 *      Bind a name to a socket. Nothing much to do here since it's
 734 *      the protocol's responsibility to handle the local address.
 735 *
 736 *      We move the socket address to kernel space before we call
 737 *      the protocol layer (having also checked the address is ok).
 738 */
 739
 740asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
 741{
 742        struct socket *sock;
 743        char address[MAX_SOCK_ADDR];
 744        int err;
 745
 746        lock_kernel();
 747        if((sock = sockfd_lookup(fd,&err))!=NULL)
 748        {
 749                if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
 750                        err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
 751                sockfd_put(sock);
 752        }                       
 753        unlock_kernel();
 754        return err;
 755}
 756
 757
 758/*
 759 *      Perform a listen. Basically, we allow the protocol to do anything
 760 *      necessary for a listen, and if that works, we mark the socket as
 761 *      ready for listening.
 762 */
 763
 764asmlinkage int sys_listen(int fd, int backlog)
 765{
 766        struct socket *sock;
 767        int err;
 768        
 769        lock_kernel();
 770        if((sock = sockfd_lookup(fd, &err))!=NULL)
 771        {
 772                err=sock->ops->listen(sock, backlog);
 773                sockfd_put(sock);
 774        }
 775        unlock_kernel();
 776        return err;
 777}
 778
 779
 780/*
 781 *      For accept, we attempt to create a new socket, set up the link
 782 *      with the client, wake up the client, then return the new
 783 *      connected fd. We collect the address of the connector in kernel
 784 *      space and move it to user at the very end. This is unclean because
 785 *      we open the socket then return an error.
 786 *
 787 *      1003.1g adds the ability to recvmsg() to query connection pending
 788 *      status to recvmsg. We need to add that support in a way thats
 789 *      clean when we restucture accept also.
 790 */
 791
 792asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
 793{
 794        struct inode *inode;
 795        struct socket *sock, *newsock;
 796        int err, len;
 797        char address[MAX_SOCK_ADDR];
 798
 799        lock_kernel();
 800        sock = sockfd_lookup(fd, &err);
 801        if (!sock)
 802                goto out;
 803
 804restart:
 805        err = -EMFILE;
 806        if (!(newsock = sock_alloc())) 
 807                goto out_put;
 808
 809        inode = newsock->inode;
 810        newsock->type = sock->type;
 811
 812        err = sock->ops->dup(newsock, sock);
 813        if (err < 0) 
 814                goto out_release;
 815
 816        err = newsock->ops->accept(sock, newsock, sock->file->f_flags);
 817        if (err < 0)
 818                goto out_release;
 819        newsock = socki_lookup(inode);
 820
 821        if ((err = get_fd(inode)) < 0) 
 822                goto out_release;
 823        newsock->file = fcheck(err);
 824
 825        if (upeer_sockaddr)
 826        {
 827                /* Handle the race where the accept works and we
 828                   then getname after it has closed again */
 829                if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 1)<0)
 830                {
 831                        sys_close(err);
 832                        goto restart;
 833                }
 834                /* N.B. Should check for errors here */
 835                move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
 836        }
 837
 838out_put:
 839        sockfd_put(sock);
 840out:
 841        unlock_kernel();
 842        return err;
 843
 844out_release:
 845        sock_release(newsock);
 846        goto out_put;
 847}
 848
 849
 850/*
 851 *      Attempt to connect to a socket with the server address.  The address
 852 *      is in user space so we verify it is OK and move it to kernel space.
 853 *
 854 *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
 855 *      break bindings
 856 *
 857 *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
 858 *      other SEQPACKET protocols that take time to connect() as it doesn't
 859 *      include the -EINPROGRESS status for such sockets.
 860 */
 861
 862asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
 863{
 864        struct socket *sock;
 865        char address[MAX_SOCK_ADDR];
 866        int err;
 867
 868        lock_kernel();
 869        sock = sockfd_lookup(fd, &err);
 870        if (!sock)
 871                goto out;
 872        err = move_addr_to_kernel(uservaddr, addrlen, address);
 873        if (err < 0)
 874                goto out_put;
 875        err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
 876                                 sock->file->f_flags);
 877out_put:
 878        sockfd_put(sock);
 879out:
 880        unlock_kernel();
 881        return err;
 882}
 883
 884/*
 885 *      Get the local address ('name') of a socket object. Move the obtained
 886 *      name to user space.
 887 */
 888
 889asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 890{
 891        struct socket *sock;
 892        char address[MAX_SOCK_ADDR];
 893        int len, err;
 894        
 895        lock_kernel();
 896        sock = sockfd_lookup(fd, &err);
 897        if (!sock)
 898                goto out;
 899        err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
 900        if (err)
 901                goto out_put;
 902        err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
 903
 904out_put:
 905        sockfd_put(sock);
 906out:
 907        unlock_kernel();
 908        return err;
 909}
 910
 911/*
 912 *      Get the remote address ('name') of a socket object. Move the obtained
 913 *      name to user space.
 914 */
 915
 916asmlinkage int sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
 917{
 918        struct socket *sock;
 919        char address[MAX_SOCK_ADDR];
 920        int len, err;
 921
 922        lock_kernel();
 923        if ((sock = sockfd_lookup(fd, &err))!=NULL)
 924        {
 925                err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
 926                if (!err)
 927                        err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
 928                sockfd_put(sock);
 929        }
 930        unlock_kernel();
 931        return err;
 932}
 933
 934/*
 935 *      Send a datagram to a given address. We move the address into kernel
 936 *      space and check the user space data area is readable before invoking
 937 *      the protocol.
 938 */
 939
 940asmlinkage int sys_sendto(int fd, void * buff, size_t len, unsigned flags,
 941           struct sockaddr *addr, int addr_len)
 942{
 943        struct socket *sock;
 944        char address[MAX_SOCK_ADDR];
 945        int err;
 946        struct msghdr msg;
 947        struct iovec iov;
 948        
 949        lock_kernel();
 950        sock = sockfd_lookup(fd, &err);
 951        if (!sock)
 952                goto out;
 953        iov.iov_base=buff;
 954        iov.iov_len=len;
 955        msg.msg_name=NULL;
 956        msg.msg_iov=&iov;
 957        msg.msg_iovlen=1;
 958        msg.msg_control=NULL;
 959        msg.msg_controllen=0;
 960        msg.msg_namelen=addr_len;
 961        if(addr)
 962        {
 963                err = move_addr_to_kernel(addr, addr_len, address);
 964                if (err < 0)
 965                        goto out_put;
 966                msg.msg_name=address;
 967        }
 968        if (sock->file->f_flags & O_NONBLOCK)
 969                flags |= MSG_DONTWAIT;
 970        msg.msg_flags = flags;
 971        err = sock_sendmsg(sock, &msg, len);
 972
 973out_put:                
 974        sockfd_put(sock);
 975out:
 976        unlock_kernel();
 977        return err;
 978}
 979
 980/*
 981 *      Send a datagram down a socket. 
 982 */
 983
 984asmlinkage int sys_send(int fd, void * buff, size_t len, unsigned flags)
 985{
 986        return sys_sendto(fd, buff, len, flags, NULL, 0);
 987}
 988
 989/*
 990 *      Receive a frame from the socket and optionally record the address of the 
 991 *      sender. We verify the buffers are writable and if needed move the
 992 *      sender address from kernel to user space.
 993 */
 994
 995asmlinkage int sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
 996             struct sockaddr *addr, int *addr_len)
 997{
 998        struct socket *sock;
 999        struct iovec iov;
1000        struct msghdr msg;
1001        char address[MAX_SOCK_ADDR];
1002        int err,err2;
1003
1004        lock_kernel();
1005        sock = sockfd_lookup(fd, &err);
1006        if (!sock)
1007                goto out;
1008
1009        msg.msg_control=NULL;
1010        msg.msg_controllen=0;
1011        msg.msg_iovlen=1;
1012        msg.msg_iov=&iov;
1013        iov.iov_len=size;
1014        iov.iov_base=ubuf;
1015        msg.msg_name=address;
1016        msg.msg_namelen=MAX_SOCK_ADDR;
1017        if (sock->file->f_flags & O_NONBLOCK)
1018                flags |= MSG_DONTWAIT;
1019        err=sock_recvmsg(sock, &msg, size, flags);
1020
1021        if(err >= 0 && addr != NULL)
1022        {
1023                err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1024                if(err2<0)
1025                        err=err2;
1026        }
1027        sockfd_put(sock);                       
1028out:
1029        unlock_kernel();
1030        return err;
1031}
1032
1033/*
1034 *      Receive a datagram from a socket. 
1035 */
1036
1037asmlinkage int sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1038{
1039        return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1040}
1041
1042/*
1043 *      Set a socket option. Because we don't know the option lengths we have
1044 *      to pass the user mode parameter for the protocols to sort out.
1045 */
1046
1047asmlinkage int sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1048{
1049        int err;
1050        struct socket *sock;
1051        
1052        lock_kernel();
1053        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1054        {
1055                if (level == SOL_SOCKET)
1056                        err=sock_setsockopt(sock,level,optname,optval,optlen);
1057                else
1058                        err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1059                sockfd_put(sock);
1060        }
1061        unlock_kernel();
1062        return err;
1063}
1064
1065/*
1066 *      Get a socket option. Because we don't know the option lengths we have
1067 *      to pass a user mode parameter for the protocols to sort out.
1068 */
1069
1070asmlinkage int sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1071{
1072        int err;
1073        struct socket *sock;
1074
1075        lock_kernel();
1076        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1077        {
1078                if (level == SOL_SOCKET)
1079                        err=sock_getsockopt(sock,level,optname,optval,optlen);
1080                else
1081                        err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1082                sockfd_put(sock);
1083        }
1084        unlock_kernel();
1085        return err;
1086}
1087
1088
1089/*
1090 *      Shutdown a socket.
1091 */
1092
1093asmlinkage int sys_shutdown(int fd, int how)
1094{
1095        int err;
1096        struct socket *sock;
1097
1098        lock_kernel();
1099        if ((sock = sockfd_lookup(fd, &err))!=NULL)
1100        {
1101                err=sock->ops->shutdown(sock, how);
1102                sockfd_put(sock);
1103        }
1104        unlock_kernel();
1105        return err;
1106}
1107
1108/*
1109 *      BSD sendmsg interface
1110 */
1111
1112asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1113{
1114        struct socket *sock;
1115        char address[MAX_SOCK_ADDR];
1116        struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1117        unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1118        unsigned char *ctl_buf = ctl;
1119        struct msghdr msg_sys;
1120        int err, ctl_len, iov_size, total_len;
1121        
1122        lock_kernel();
1123
1124        err = -EFAULT;
1125        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1126                goto out; 
1127
1128        sock = sockfd_lookup(fd, &err);
1129        if (!sock) 
1130                goto out;
1131
1132        /* do not move before msg_sys is valid */
1133        err = -EINVAL;
1134        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1135                goto out_put;
1136
1137        /* Check whether to allocate the iovec area*/
1138        err = -ENOMEM;
1139        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1140        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1141                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1142                if (!iov)
1143                        goto out_put;
1144        }
1145
1146        /* This will also move the address data into kernel space */
1147        err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1148        if (err < 0) 
1149                goto out_freeiov;
1150        total_len = err;
1151
1152        err = -ENOBUFS;
1153
1154        /* msg_controllen must fit to int */
1155        if (msg_sys.msg_controllen > INT_MAX)
1156                goto out_freeiov;
1157        ctl_len = msg_sys.msg_controllen; 
1158        if (ctl_len) 
1159        {
1160                if (ctl_len > sizeof(ctl))
1161                {
1162                        /* Suggested by the Advanced Sockets API for IPv6 draft:
1163                         * Limit the msg_controllen size by the SO_SNDBUF size.
1164                         */
1165                        /* Note - when this code becomes multithreaded on
1166                         * SMP machines you have a race to fix here.
1167                         */
1168                        err = -ENOBUFS;
1169                        ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1170                        if (ctl_buf == NULL) 
1171                                goto out_freeiov;
1172                }
1173                err = -EFAULT;
1174                if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
1175                        goto out_freectl;
1176                msg_sys.msg_control = ctl_buf;
1177        }
1178        msg_sys.msg_flags = flags;
1179
1180        if (sock->file->f_flags & O_NONBLOCK)
1181                msg_sys.msg_flags |= MSG_DONTWAIT;
1182        err = sock_sendmsg(sock, &msg_sys, total_len);
1183
1184out_freectl:
1185        if (ctl_buf != ctl)    
1186                sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1187out_freeiov:
1188        if (iov != iovstack)
1189                sock_kfree_s(sock->sk, iov, iov_size);
1190out_put:
1191        sockfd_put(sock);
1192out:       
1193        unlock_kernel();
1194        return err;
1195}
1196
1197/*
1198 *      BSD recvmsg interface
1199 */
1200
1201asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1202{
1203        struct socket *sock;
1204        struct iovec iovstack[UIO_FASTIOV];
1205        struct iovec *iov=iovstack;
1206        struct msghdr msg_sys;
1207        unsigned long cmsg_ptr;
1208        int err, iov_size, total_len, len;
1209
1210        /* kernel mode address */
1211        char addr[MAX_SOCK_ADDR];
1212
1213        /* user mode address pointers */
1214        struct sockaddr *uaddr;
1215        int *uaddr_len;
1216        
1217        lock_kernel();
1218        err=-EFAULT;
1219        if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1220                goto out;
1221
1222        sock = sockfd_lookup(fd, &err);
1223        if (!sock)
1224                goto out;
1225
1226        err = -EINVAL;
1227        if (msg_sys.msg_iovlen > UIO_MAXIOV)
1228                goto out_put;
1229        
1230        /* Check whether to allocate the iovec area*/
1231        err = -ENOMEM;
1232        iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1233        if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1234                iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1235                if (!iov)
1236                        goto out_put;
1237        }
1238
1239        /*
1240         *      Save the user-mode address (verify_iovec will change the
1241         *      kernel msghdr to use the kernel address space)
1242         */
1243         
1244        uaddr = msg_sys.msg_name;
1245        uaddr_len = &msg->msg_namelen;
1246        err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1247        if (err < 0)
1248                goto out_freeiov;
1249        total_len=err;
1250
1251        cmsg_ptr = (unsigned long)msg_sys.msg_control;
1252        msg_sys.msg_flags = 0;
1253        
1254        if (sock->file->f_flags & O_NONBLOCK)
1255                flags |= MSG_DONTWAIT;
1256        err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1257        if (err < 0)
1258                goto out_freeiov;
1259        len = err;
1260
1261        if (uaddr != NULL) {
1262                err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1263                if (err < 0)
1264                        goto out_freeiov;
1265        }
1266        err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1267        if (err)
1268                goto out_freeiov;
1269        err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
1270                                                         &msg->msg_controllen);
1271        if (err)
1272                goto out_freeiov;
1273        err = len;
1274
1275out_freeiov:
1276        if (iov != iovstack)
1277                sock_kfree_s(sock->sk, iov, iov_size);
1278out_put:
1279        sockfd_put(sock);
1280out:
1281        unlock_kernel();
1282        return err;
1283}
1284
1285
1286/*
1287 *      Perform a file control on a socket file descriptor.
1288 *
1289 *      Doesn't aquire a fd lock, because no network fcntl
1290 *      function sleeps currently.
1291 */
1292
1293int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1294{
1295        struct socket *sock;
1296
1297        sock = socki_lookup (filp->f_dentry->d_inode);
1298        if (sock && sock->ops)
1299                return sock->ops->fcntl(sock, cmd, arg);
1300        return(-EINVAL);
1301}
1302
1303/* Argument list sizes for sys_socketcall */
1304#define AL(x) ((x) * sizeof(unsigned long))
1305static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1306                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1307                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1308#undef AL
1309
1310/*
1311 *      System call vectors. 
1312 *
1313 *      Argument checking cleaned up. Saved 20% in size.
1314 *  This function doesn't need to set the kernel lock because
1315 *  it is set by the callees. 
1316 */
1317
1318asmlinkage int sys_socketcall(int call, unsigned long *args)
1319{
1320        unsigned long a[6];
1321        unsigned long a0,a1;
1322        int err;
1323
1324        if(call<1||call>SYS_RECVMSG)
1325                return -EINVAL;
1326
1327        /* copy_from_user should be SMP safe. */
1328        if (copy_from_user(a, args, nargs[call]))
1329                return -EFAULT;
1330                
1331        a0=a[0];
1332        a1=a[1];
1333        
1334        switch(call) 
1335        {
1336                case SYS_SOCKET:
1337                        err = sys_socket(a0,a1,a[2]);
1338                        break;
1339                case SYS_BIND:
1340                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1341                        break;
1342                case SYS_CONNECT:
1343                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1344                        break;
1345                case SYS_LISTEN:
1346                        err = sys_listen(a0,a1);
1347                        break;
1348                case SYS_ACCEPT:
1349                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1350                        break;
1351                case SYS_GETSOCKNAME:
1352                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1353                        break;
1354                case SYS_GETPEERNAME:
1355                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1356                        break;
1357                case SYS_SOCKETPAIR:
1358                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1359                        break;
1360                case SYS_SEND:
1361                        err = sys_send(a0, (void *)a1, a[2], a[3]);
1362                        break;
1363                case SYS_SENDTO:
1364                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
1365                                         (struct sockaddr *)a[4], a[5]);
1366                        break;
1367                case SYS_RECV:
1368                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
1369                        break;
1370                case SYS_RECVFROM:
1371                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1372                                           (struct sockaddr *)a[4], (int *)a[5]);
1373                        break;
1374                case SYS_SHUTDOWN:
1375                        err = sys_shutdown(a0,a1);
1376                        break;
1377                case SYS_SETSOCKOPT:
1378                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1379                        break;
1380                case SYS_GETSOCKOPT:
1381                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1382                        break;
1383                case SYS_SENDMSG:
1384                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1385                        break;
1386                case SYS_RECVMSG:
1387                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1388                        break;
1389                default:
1390                        err = -EINVAL;
1391                        break;
1392        }
1393        return err;
1394}
1395
1396/*
1397 *      This function is called by a protocol handler that wants to
1398 *      advertise its address family, and have it linked into the
1399 *      SOCKET module.
1400 */
1401
1402int sock_register(struct net_proto_family *ops)
1403{
1404        if (ops->family >= NPROTO) {
1405                printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1406                return -ENOBUFS;
1407        }
1408        net_families[ops->family]=ops;
1409        return 0;
1410}
1411
1412/*
1413 *      This function is called by a protocol handler that wants to
1414 *      remove its address family, and have it unlinked from the
1415 *      SOCKET module.
1416 */
1417
1418int sock_unregister(int family)
1419{
1420        if (family < 0 || family >= NPROTO)
1421                return -1;
1422
1423        net_families[family]=NULL;
1424        return 0;
1425}
1426
1427void __init proto_init(void)
1428{
1429        extern struct net_proto protocols[];    /* Network protocols */
1430        struct net_proto *pro;
1431
1432        /* Kick all configured protocols. */
1433        pro = protocols;
1434        while (pro->name != NULL) 
1435        {
1436                (*pro->init_func)(pro);
1437                pro++;
1438        }
1439        /* We're all done... */
1440}
1441
1442extern void sk_init(void);
1443#ifdef CONFIG_WAN_ROUTER
1444extern void wanrouter_init(void);
1445#endif
1446
1447void __init sock_init(void)
1448{
1449        int i;
1450
1451        printk(KERN_INFO "Linux NET4.0 for Linux 2.2\n");
1452        printk(KERN_INFO "Based upon Swansea University Computer Society NET3.039\n");
1453
1454        /*
1455         *      Initialize all address (protocol) families. 
1456         */
1457         
1458        for (i = 0; i < NPROTO; i++) 
1459                net_families[i] = NULL;
1460
1461        /*
1462         *      Initialize sock SLAB cache.
1463         */
1464         
1465        sk_init();
1466
1467#ifdef SLAB_SKB
1468        /*
1469         *      Initialize skbuff SLAB cache 
1470         */
1471        skb_init();
1472#endif
1473
1474
1475        /*
1476         *      Wan router layer. 
1477         */
1478
1479#ifdef CONFIG_WAN_ROUTER         
1480        wanrouter_init();
1481#endif
1482
1483        /*
1484         *      Attach the firewall module if configured
1485         */
1486         
1487#ifdef CONFIG_FIREWALL   
1488        fwchain_init();
1489#endif
1490
1491        /*
1492         *      Initialize the protocols module. 
1493         */
1494
1495        proto_init();
1496
1497        /*
1498         *      The netlink device handler may be needed early.
1499         */
1500
1501#ifdef  CONFIG_RTNETLINK
1502        rtnetlink_init();
1503#endif
1504#ifdef CONFIG_NETLINK_DEV
1505        init_netlink();
1506#endif
1507}
1508
1509int socket_get_info(char *buffer, char **start, off_t offset, int length)
1510{
1511        int len = sprintf(buffer, "sockets: used %d\n", sockets_in_use);
1512        if (offset >= len)
1513        {
1514                *start = buffer;
1515                return 0;
1516        }
1517        *start = buffer + offset;
1518        len -= offset;
1519        if (len > length)
1520                len = length;
1521        return len;
1522}
1523
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.