linux-bk/net/unix/af_unix.c
<<
>>
Prefs
   1/*
   2 * NET4:        Implementation of BSD Unix domain sockets.
   3 *
   4 * Authors:     Alan Cox, <alan.cox@linux.org>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
  12 *
  13 * Fixes:
  14 *              Linus Torvalds  :       Assorted bug cures.
  15 *              Niibe Yutaka    :       async I/O support.
  16 *              Carsten Paeth   :       PF_UNIX check, address fixes.
  17 *              Alan Cox        :       Limit size of allocated blocks.
  18 *              Alan Cox        :       Fixed the stupid socketpair bug.
  19 *              Alan Cox        :       BSD compatibility fine tuning.
  20 *              Alan Cox        :       Fixed a bug in connect when interrupted.
  21 *              Alan Cox        :       Sorted out a proper draft version of
  22 *                                      file descriptor passing hacked up from
  23 *                                      Mike Shaver's work.
  24 *              Marty Leisner   :       Fixes to fd passing
  25 *              Nick Nevin      :       recvmsg bugfix.
  26 *              Alan Cox        :       Started proper garbage collector
  27 *              Heiko EiBfeldt  :       Missing verify_area check
  28 *              Alan Cox        :       Started POSIXisms
  29 *              Andreas Schwab  :       Replace inode by dentry for proper
  30 *                                      reference counting
  31 *              Kirk Petersen   :       Made this a module
  32 *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  33 *                                      Lots of bug fixes.
  34 *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  35 *                                      by above two patches.
  36 *           Andrea Arcangeli   :       If possible we block in connect(2)
  37 *                                      if the max backlog of the listen socket
  38 *                                      is been reached. This won't break
  39 *                                      old apps and it will avoid huge amount
  40 *                                      of socks hashed (this for unix_gc()
  41 *                                      performances reasons).
  42 *                                      Security fix that limits the max
  43 *                                      number of socks to 2*max_files and
  44 *                                      the number of skb queueable in the
  45 *                                      dgram receiver.
  46 *              Artur Skawina   :       Hash function optimizations
  47 *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  48 *            Malcolm Beattie   :       Set peercred for socketpair
  49 *           Michal Ostrowski   :       Module initialization cleanup.
  50 *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  51 *                                      the core infrastructure is doing that
  52 *                                      for all net proto families now (2.5.69+)
  53 *
  54 *
  55 * Known differences from reference BSD that was tested:
  56 *
  57 *      [TO FIX]
  58 *      ECONNREFUSED is not returned from one end of a connected() socket to the
  59 *              other the moment one end closes.
  60 *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  61 *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  62 *      [NOT TO FIX]
  63 *      accept() returns a path name even if the connecting socket has closed
  64 *              in the meantime (BSD loses the path and gives up).
  65 *      accept() returns 0 length path for an unbound connector. BSD returns 16
  66 *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  67 *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  68 *      BSD af_unix apparently has connect forgetting to block properly.
  69 *              (need to check this with the POSIX spec in detail)
  70 *
  71 * Differences from 2.0.0-11-... (ANK)
  72 *      Bug fixes and improvements.
  73 *              - client shutdown killed server socket.
  74 *              - removed all useless cli/sti pairs.
  75 *
  76 *      Semantic changes/extensions.
  77 *              - generic control message passing.
  78 *              - SCM_CREDENTIALS control message.
  79 *              - "Abstract" (not FS based) socket bindings.
  80 *                Abstract names are sequences of bytes (not zero terminated)
  81 *                started by 0, so that this name space does not intersect
  82 *                with BSD names.
  83 */
  84
  85#include <linux/module.h>
  86#include <linux/config.h>
  87#include <linux/kernel.h>
  88#include <linux/major.h>
  89#include <linux/signal.h>
  90#include <linux/sched.h>
  91#include <linux/errno.h>
  92#include <linux/string.h>
  93#include <linux/stat.h>
  94#include <linux/dcache.h>
  95#include <linux/namei.h>
  96#include <linux/socket.h>
  97#include <linux/un.h>
  98#include <linux/fcntl.h>
  99#include <linux/termios.h>
 100#include <linux/sockios.h>
 101#include <linux/net.h>
 102#include <linux/in.h>
 103#include <linux/fs.h>
 104#include <linux/slab.h>
 105#include <asm/uaccess.h>
 106#include <linux/skbuff.h>
 107#include <linux/netdevice.h>
 108#include <net/sock.h>
 109#include <linux/tcp.h>
 110#include <net/af_unix.h>
 111#include <linux/proc_fs.h>
 112#include <linux/seq_file.h>
 113#include <net/scm.h>
 114#include <linux/init.h>
 115#include <linux/poll.h>
 116#include <linux/smp_lock.h>
 117#include <linux/rtnetlink.h>
 118#include <linux/mount.h>
 119#include <net/checksum.h>
 120#include <linux/security.h>
 121
 122int sysctl_unix_max_dgram_qlen = 10;
 123
 124static kmem_cache_t *unix_sk_cachep;
 125
 126struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 127DEFINE_RWLOCK(unix_table_lock);
 128static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 129
 130#define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 131
 132#define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 133
 134/*
 135 *  SMP locking strategy:
 136 *    hash table is protected with rwlock unix_table_lock
 137 *    each socket state is protected by separate rwlock.
 138 */
 139
 140static inline unsigned unix_hash_fold(unsigned hash)
 141{
 142        hash ^= hash>>16;
 143        hash ^= hash>>8;
 144        return hash&(UNIX_HASH_SIZE-1);
 145}
 146
 147#define unix_peer(sk) (unix_sk(sk)->peer)
 148
 149static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 150{
 151        return unix_peer(osk) == sk;
 152}
 153
 154static inline int unix_may_send(struct sock *sk, struct sock *osk)
 155{
 156        return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
 157}
 158
 159static struct sock *unix_peer_get(struct sock *s)
 160{
 161        struct sock *peer;
 162
 163        unix_state_rlock(s);
 164        peer = unix_peer(s);
 165        if (peer)
 166                sock_hold(peer);
 167        unix_state_runlock(s);
 168        return peer;
 169}
 170
 171static inline void unix_release_addr(struct unix_address *addr)
 172{
 173        if (atomic_dec_and_test(&addr->refcnt))
 174                kfree(addr);
 175}
 176
 177/*
 178 *      Check unix socket name:
 179 *              - should be not zero length.
 180 *              - if started by not zero, should be NULL terminated (FS object)
 181 *              - if started by zero, it is abstract name.
 182 */
 183 
 184static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
 185{
 186        if (len <= sizeof(short) || len > sizeof(*sunaddr))
 187                return -EINVAL;
 188        if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 189                return -EINVAL;
 190        if (sunaddr->sun_path[0]) {
 191                ((char *)sunaddr)[len]=0;
 192                len = strlen(sunaddr->sun_path)+1+sizeof(short);
 193                return len;
 194        }
 195
 196        *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
 197        return len;
 198}
 199
 200static void __unix_remove_socket(struct sock *sk)
 201{
 202        sk_del_node_init(sk);
 203}
 204
 205static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 206{
 207        BUG_TRAP(sk_unhashed(sk));
 208        sk_add_node(sk, list);
 209}
 210
 211static inline void unix_remove_socket(struct sock *sk)
 212{
 213        write_lock(&unix_table_lock);
 214        __unix_remove_socket(sk);
 215        write_unlock(&unix_table_lock);
 216}
 217
 218static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 219{
 220        write_lock(&unix_table_lock);
 221        __unix_insert_socket(list, sk);
 222        write_unlock(&unix_table_lock);
 223}
 224
 225static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
 226                                              int len, int type, unsigned hash)
 227{
 228        struct sock *s;
 229        struct hlist_node *node;
 230
 231        sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 232                struct unix_sock *u = unix_sk(s);
 233
 234                if (u->addr->len == len &&
 235                    !memcmp(u->addr->name, sunname, len))
 236                        goto found;
 237        }
 238        s = NULL;
 239found:
 240        return s;
 241}
 242
 243static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
 244                                                   int len, int type,
 245                                                   unsigned hash)
 246{
 247        struct sock *s;
 248
 249        read_lock(&unix_table_lock);
 250        s = __unix_find_socket_byname(sunname, len, type, hash);
 251        if (s)
 252                sock_hold(s);
 253        read_unlock(&unix_table_lock);
 254        return s;
 255}
 256
 257static struct sock *unix_find_socket_byinode(struct inode *i)
 258{
 259        struct sock *s;
 260        struct hlist_node *node;
 261
 262        read_lock(&unix_table_lock);
 263        sk_for_each(s, node,
 264                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 265                struct dentry *dentry = unix_sk(s)->dentry;
 266
 267                if(dentry && dentry->d_inode == i)
 268                {
 269                        sock_hold(s);
 270                        goto found;
 271                }
 272        }
 273        s = NULL;
 274found:
 275        read_unlock(&unix_table_lock);
 276        return s;
 277}
 278
 279static inline int unix_writable(struct sock *sk)
 280{
 281        return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 282}
 283
 284static void unix_write_space(struct sock *sk)
 285{
 286        read_lock(&sk->sk_callback_lock);
 287        if (unix_writable(sk)) {
 288                if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 289                        wake_up_interruptible(sk->sk_sleep);
 290                sk_wake_async(sk, 2, POLL_OUT);
 291        }
 292        read_unlock(&sk->sk_callback_lock);
 293}
 294
 295/* When dgram socket disconnects (or changes its peer), we clear its receive
 296 * queue of packets arrived from previous peer. First, it allows to do
 297 * flow control based only on wmem_alloc; second, sk connected to peer
 298 * may receive messages only from that peer. */
 299static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 300{
 301        if (skb_queue_len(&sk->sk_receive_queue)) {
 302                skb_queue_purge(&sk->sk_receive_queue);
 303                wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 304
 305                /* If one link of bidirectional dgram pipe is disconnected,
 306                 * we signal error. Messages are lost. Do not make this,
 307                 * when peer was not connected to us.
 308                 */
 309                if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 310                        other->sk_err = ECONNRESET;
 311                        other->sk_error_report(other);
 312                }
 313        }
 314}
 315
 316static void unix_sock_destructor(struct sock *sk)
 317{
 318        struct unix_sock *u = unix_sk(sk);
 319
 320        skb_queue_purge(&sk->sk_receive_queue);
 321
 322        BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
 323        BUG_TRAP(sk_unhashed(sk));
 324        BUG_TRAP(!sk->sk_socket);
 325        if (!sock_flag(sk, SOCK_DEAD)) {
 326                printk("Attempt to release alive unix socket: %p\n", sk);
 327                return;
 328        }
 329
 330        if (u->addr)
 331                unix_release_addr(u->addr);
 332
 333        atomic_dec(&unix_nr_socks);
 334#ifdef UNIX_REFCNT_DEBUG
 335        printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
 336#endif
 337}
 338
 339static int unix_release_sock (struct sock *sk, int embrion)
 340{
 341        struct unix_sock *u = unix_sk(sk);
 342        struct dentry *dentry;
 343        struct vfsmount *mnt;
 344        struct sock *skpair;
 345        struct sk_buff *skb;
 346        int state;
 347
 348        unix_remove_socket(sk);
 349
 350        /* Clear state */
 351        unix_state_wlock(sk);
 352        sock_orphan(sk);
 353        sk->sk_shutdown = SHUTDOWN_MASK;
 354        dentry       = u->dentry;
 355        u->dentry    = NULL;
 356        mnt          = u->mnt;
 357        u->mnt       = NULL;
 358        state = sk->sk_state;
 359        sk->sk_state = TCP_CLOSE;
 360        unix_state_wunlock(sk);
 361
 362        wake_up_interruptible_all(&u->peer_wait);
 363
 364        skpair=unix_peer(sk);
 365
 366        if (skpair!=NULL) {
 367                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 368                        unix_state_wlock(skpair);
 369                        /* No more writes */
 370                        skpair->sk_shutdown = SHUTDOWN_MASK;
 371                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 372                                skpair->sk_err = ECONNRESET;
 373                        unix_state_wunlock(skpair);
 374                        skpair->sk_state_change(skpair);
 375                        read_lock(&skpair->sk_callback_lock);
 376                        sk_wake_async(skpair,1,POLL_HUP);
 377                        read_unlock(&skpair->sk_callback_lock);
 378                }
 379                sock_put(skpair); /* It may now die */
 380                unix_peer(sk) = NULL;
 381        }
 382
 383        /* Try to flush out this socket. Throw out buffers at least */
 384
 385        while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 386                if (state==TCP_LISTEN)
 387                        unix_release_sock(skb->sk, 1);
 388                /* passed fds are erased in the kfree_skb hook        */
 389                kfree_skb(skb);
 390        }
 391
 392        if (dentry) {
 393                dput(dentry);
 394                mntput(mnt);
 395        }
 396
 397        sock_put(sk);
 398
 399        /* ---- Socket is dead now and most probably destroyed ---- */
 400
 401        /*
 402         * Fixme: BSD difference: In BSD all sockets connected to use get
 403         *        ECONNRESET and we die on the spot. In Linux we behave
 404         *        like files and pipes do and wait for the last
 405         *        dereference.
 406         *
 407         * Can't we simply set sock->err?
 408         *
 409         *        What the above comment does talk about? --ANK(980817)
 410         */
 411
 412        if (atomic_read(&unix_tot_inflight))
 413                unix_gc();              /* Garbage collect fds */       
 414
 415        return 0;
 416}
 417
 418static int unix_listen(struct socket *sock, int backlog)
 419{
 420        int err;
 421        struct sock *sk = sock->sk;
 422        struct unix_sock *u = unix_sk(sk);
 423
 424        err = -EOPNOTSUPP;
 425        if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
 426                goto out;                       /* Only stream/seqpacket sockets accept */
 427        err = -EINVAL;
 428        if (!u->addr)
 429                goto out;                       /* No listens on an unbound socket */
 430        unix_state_wlock(sk);
 431        if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 432                goto out_unlock;
 433        if (backlog > sk->sk_max_ack_backlog)
 434                wake_up_interruptible_all(&u->peer_wait);
 435        sk->sk_max_ack_backlog  = backlog;
 436        sk->sk_state            = TCP_LISTEN;
 437        /* set credentials so connect can copy them */
 438        sk->sk_peercred.pid     = current->tgid;
 439        sk->sk_peercred.uid     = current->euid;
 440        sk->sk_peercred.gid     = current->egid;
 441        err = 0;
 442
 443out_unlock:
 444        unix_state_wunlock(sk);
 445out:
 446        return err;
 447}
 448
 449static int unix_release(struct socket *);
 450static int unix_bind(struct socket *, struct sockaddr *, int);
 451static int unix_stream_connect(struct socket *, struct sockaddr *,
 452                               int addr_len, int flags);
 453static int unix_socketpair(struct socket *, struct socket *);
 454static int unix_accept(struct socket *, struct socket *, int);
 455static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 456static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 457static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 458static int unix_shutdown(struct socket *, int);
 459static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 460                               struct msghdr *, size_t);
 461static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 462                               struct msghdr *, size_t, int);
 463static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 464                              struct msghdr *, size_t);
 465static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 466                              struct msghdr *, size_t, int);
 467static int unix_dgram_connect(struct socket *, struct sockaddr *,
 468                              int, int);
 469static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 470                                  struct msghdr *, size_t);
 471
 472static struct proto_ops unix_stream_ops = {
 473        .family =       PF_UNIX,
 474        .owner =        THIS_MODULE,
 475        .release =      unix_release,
 476        .bind =         unix_bind,
 477        .connect =      unix_stream_connect,
 478        .socketpair =   unix_socketpair,
 479        .accept =       unix_accept,
 480        .getname =      unix_getname,
 481        .poll =         unix_poll,
 482        .ioctl =        unix_ioctl,
 483        .listen =       unix_listen,
 484        .shutdown =     unix_shutdown,
 485        .setsockopt =   sock_no_setsockopt,
 486        .getsockopt =   sock_no_getsockopt,
 487        .sendmsg =      unix_stream_sendmsg,
 488        .recvmsg =      unix_stream_recvmsg,
 489        .mmap =         sock_no_mmap,
 490        .sendpage =     sock_no_sendpage,
 491};
 492
 493static struct proto_ops unix_dgram_ops = {
 494        .family =       PF_UNIX,
 495        .owner =        THIS_MODULE,
 496        .release =      unix_release,
 497        .bind =         unix_bind,
 498        .connect =      unix_dgram_connect,
 499        .socketpair =   unix_socketpair,
 500        .accept =       sock_no_accept,
 501        .getname =      unix_getname,
 502        .poll =         datagram_poll,
 503        .ioctl =        unix_ioctl,
 504        .listen =       sock_no_listen,
 505        .shutdown =     unix_shutdown,
 506        .setsockopt =   sock_no_setsockopt,
 507        .getsockopt =   sock_no_getsockopt,
 508        .sendmsg =      unix_dgram_sendmsg,
 509        .recvmsg =      unix_dgram_recvmsg,
 510        .mmap =         sock_no_mmap,
 511        .sendpage =     sock_no_sendpage,
 512};
 513
 514static struct proto_ops unix_seqpacket_ops = {
 515        .family =       PF_UNIX,
 516        .owner =        THIS_MODULE,
 517        .release =      unix_release,
 518        .bind =         unix_bind,
 519        .connect =      unix_stream_connect,
 520        .socketpair =   unix_socketpair,
 521        .accept =       unix_accept,
 522        .getname =      unix_getname,
 523        .poll =         datagram_poll,
 524        .ioctl =        unix_ioctl,
 525        .listen =       unix_listen,
 526        .shutdown =     unix_shutdown,
 527        .setsockopt =   sock_no_setsockopt,
 528        .getsockopt =   sock_no_getsockopt,
 529        .sendmsg =      unix_seqpacket_sendmsg,
 530        .recvmsg =      unix_dgram_recvmsg,
 531        .mmap =         sock_no_mmap,
 532        .sendpage =     sock_no_sendpage,
 533};
 534
 535static struct sock * unix_create1(struct socket *sock)
 536{
 537        struct sock *sk = NULL;
 538        struct unix_sock *u;
 539
 540        if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
 541                goto out;
 542
 543        sk = sk_alloc(PF_UNIX, GFP_KERNEL, sizeof(struct unix_sock),
 544                      unix_sk_cachep);
 545        if (!sk)
 546                goto out;
 547
 548        atomic_inc(&unix_nr_socks);
 549
 550        sock_init_data(sock,sk);
 551        sk_set_owner(sk, THIS_MODULE);
 552
 553        sk->sk_write_space      = unix_write_space;
 554        sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
 555        sk->sk_destruct         = unix_sock_destructor;
 556        u         = unix_sk(sk);
 557        u->dentry = NULL;
 558        u->mnt    = NULL;
 559        rwlock_init(&u->lock);
 560        atomic_set(&u->inflight, sock ? 0 : -1);
 561        init_MUTEX(&u->readsem); /* single task reading lock */
 562        init_waitqueue_head(&u->peer_wait);
 563        unix_insert_socket(unix_sockets_unbound, sk);
 564out:
 565        return sk;
 566}
 567
 568static int unix_create(struct socket *sock, int protocol)
 569{
 570        if (protocol && protocol != PF_UNIX)
 571                return -EPROTONOSUPPORT;
 572
 573        sock->state = SS_UNCONNECTED;
 574
 575        switch (sock->type) {
 576        case SOCK_STREAM:
 577                sock->ops = &unix_stream_ops;
 578                break;
 579                /*
 580                 *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 581                 *      nothing uses it.
 582                 */
 583        case SOCK_RAW:
 584                sock->type=SOCK_DGRAM;
 585        case SOCK_DGRAM:
 586                sock->ops = &unix_dgram_ops;
 587                break;
 588        case SOCK_SEQPACKET:
 589                sock->ops = &unix_seqpacket_ops;
 590                break;
 591        default:
 592                return -ESOCKTNOSUPPORT;
 593        }
 594
 595        return unix_create1(sock) ? 0 : -ENOMEM;
 596}
 597
 598static int unix_release(struct socket *sock)
 599{
 600        struct sock *sk = sock->sk;
 601
 602        if (!sk)
 603                return 0;
 604
 605        sock->sk = NULL;
 606
 607        return unix_release_sock (sk, 0);
 608}
 609
 610static int unix_autobind(struct socket *sock)
 611{
 612        struct sock *sk = sock->sk;
 613        struct unix_sock *u = unix_sk(sk);
 614        static u32 ordernum = 1;
 615        struct unix_address * addr;
 616        int err;
 617
 618        down(&u->readsem);
 619
 620        err = 0;
 621        if (u->addr)
 622                goto out;
 623
 624        err = -ENOMEM;
 625        addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 626        if (!addr)
 627                goto out;
 628
 629        memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
 630        addr->name->sun_family = AF_UNIX;
 631        atomic_set(&addr->refcnt, 1);
 632
 633retry:
 634        addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 635        addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
 636
 637        write_lock(&unix_table_lock);
 638        ordernum = (ordernum+1)&0xFFFFF;
 639
 640        if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
 641                                      addr->hash)) {
 642                write_unlock(&unix_table_lock);
 643                /* Sanity yield. It is unusual case, but yet... */
 644                if (!(ordernum&0xFF))
 645                        yield();
 646                goto retry;
 647        }
 648        addr->hash ^= sk->sk_type;
 649
 650        __unix_remove_socket(sk);
 651        u->addr = addr;
 652        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 653        write_unlock(&unix_table_lock);
 654        err = 0;
 655
 656out:    up(&u->readsem);
 657        return err;
 658}
 659
 660static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
 661                                    int type, unsigned hash, int *error)
 662{
 663        struct sock *u;
 664        struct nameidata nd;
 665        int err = 0;
 666        
 667        if (sunname->sun_path[0]) {
 668                err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
 669                if (err)
 670                        goto fail;
 671                err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
 672                if (err)
 673                        goto put_fail;
 674
 675                err = -ECONNREFUSED;
 676                if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
 677                        goto put_fail;
 678                u=unix_find_socket_byinode(nd.dentry->d_inode);
 679                if (!u)
 680                        goto put_fail;
 681
 682                if (u->sk_type == type)
 683                        touch_atime(nd.mnt, nd.dentry);
 684
 685                path_release(&nd);
 686
 687                err=-EPROTOTYPE;
 688                if (u->sk_type != type) {
 689                        sock_put(u);
 690                        goto fail;
 691                }
 692        } else {
 693                err = -ECONNREFUSED;
 694                u=unix_find_socket_byname(sunname, len, type, hash);
 695                if (u) {
 696                        struct dentry *dentry;
 697                        dentry = unix_sk(u)->dentry;
 698                        if (dentry)
 699                                touch_atime(unix_sk(u)->mnt, dentry);
 700                } else
 701                        goto fail;
 702        }
 703        return u;
 704
 705put_fail:
 706        path_release(&nd);
 707fail:
 708        *error=err;
 709        return NULL;
 710}
 711
 712
 713static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 714{
 715        struct sock *sk = sock->sk;
 716        struct unix_sock *u = unix_sk(sk);
 717        struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 718        struct dentry * dentry = NULL;
 719        struct nameidata nd;
 720        int err;
 721        unsigned hash;
 722        struct unix_address *addr;
 723        struct hlist_head *list;
 724
 725        err = -EINVAL;
 726        if (sunaddr->sun_family != AF_UNIX)
 727                goto out;
 728
 729        if (addr_len==sizeof(short)) {
 730                err = unix_autobind(sock);
 731                goto out;
 732        }
 733
 734        err = unix_mkname(sunaddr, addr_len, &hash);
 735        if (err < 0)
 736                goto out;
 737        addr_len = err;
 738
 739        down(&u->readsem);
 740
 741        err = -EINVAL;
 742        if (u->addr)
 743                goto out_up;
 744
 745        err = -ENOMEM;
 746        addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 747        if (!addr)
 748                goto out_up;
 749
 750        memcpy(addr->name, sunaddr, addr_len);
 751        addr->len = addr_len;
 752        addr->hash = hash ^ sk->sk_type;
 753        atomic_set(&addr->refcnt, 1);
 754
 755        if (sunaddr->sun_path[0]) {
 756                unsigned int mode;
 757                err = 0;
 758                /*
 759                 * Get the parent directory, calculate the hash for last
 760                 * component.
 761                 */
 762                err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 763                if (err)
 764                        goto out_mknod_parent;
 765                /*
 766                 * Yucky last component or no last component at all?
 767                 * (foo/., foo/.., /////)
 768                 */
 769                err = -EEXIST;
 770                if (nd.last_type != LAST_NORM)
 771                        goto out_mknod;
 772                /*
 773                 * Lock the directory.
 774                 */
 775                down(&nd.dentry->d_inode->i_sem);
 776                /*
 777                 * Do the final lookup.
 778                 */
 779                dentry = lookup_hash(&nd.last, nd.dentry);
 780                err = PTR_ERR(dentry);
 781                if (IS_ERR(dentry))
 782                        goto out_mknod_unlock;
 783                err = -ENOENT;
 784                /*
 785                 * Special case - lookup gave negative, but... we had foo/bar/
 786                 * From the vfs_mknod() POV we just have a negative dentry -
 787                 * all is fine. Let's be bastards - you had / on the end, you've
 788                 * been asking for (non-existent) directory. -ENOENT for you.
 789                 */
 790                if (nd.last.name[nd.last.len] && !dentry->d_inode)
 791                        goto out_mknod_dput;
 792                /*
 793                 * All right, let's create it.
 794                 */
 795                mode = S_IFSOCK |
 796                       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
 797                err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
 798                if (err)
 799                        goto out_mknod_dput;
 800                up(&nd.dentry->d_inode->i_sem);
 801                dput(nd.dentry);
 802                nd.dentry = dentry;
 803
 804                addr->hash = UNIX_HASH_SIZE;
 805        }
 806
 807        write_lock(&unix_table_lock);
 808
 809        if (!sunaddr->sun_path[0]) {
 810                err = -EADDRINUSE;
 811                if (__unix_find_socket_byname(sunaddr, addr_len,
 812                                              sk->sk_type, hash)) {
 813                        unix_release_addr(addr);
 814                        goto out_unlock;
 815                }
 816
 817                list = &unix_socket_table[addr->hash];
 818        } else {
 819                list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 820                u->dentry = nd.dentry;
 821                u->mnt    = nd.mnt;
 822        }
 823
 824        err = 0;
 825        __unix_remove_socket(sk);
 826        u->addr = addr;
 827        __unix_insert_socket(list, sk);
 828
 829out_unlock:
 830        write_unlock(&unix_table_lock);
 831out_up:
 832        up(&u->readsem);
 833out:
 834        return err;
 835
 836out_mknod_dput:
 837        dput(dentry);
 838out_mknod_unlock:
 839        up(&nd.dentry->d_inode->i_sem);
 840out_mknod:
 841        path_release(&nd);
 842out_mknod_parent:
 843        if (err==-EEXIST)
 844                err=-EADDRINUSE;
 845        unix_release_addr(addr);
 846        goto out_up;
 847}
 848
 849static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 850                              int alen, int flags)
 851{
 852        struct sock *sk = sock->sk;
 853        struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
 854        struct sock *other;
 855        unsigned hash;
 856        int err;
 857
 858        if (addr->sa_family != AF_UNSPEC) {
 859                err = unix_mkname(sunaddr, alen, &hash);
 860                if (err < 0)
 861                        goto out;
 862                alen = err;
 863
 864                if (sock->passcred && !unix_sk(sk)->addr &&
 865                    (err = unix_autobind(sock)) != 0)
 866                        goto out;
 867
 868                other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
 869                if (!other)
 870                        goto out;
 871
 872                unix_state_wlock(sk);
 873
 874                err = -EPERM;
 875                if (!unix_may_send(sk, other))
 876                        goto out_unlock;
 877
 878                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 879                if (err)
 880                        goto out_unlock;
 881
 882        } else {
 883                /*
 884                 *      1003.1g breaking connected state with AF_UNSPEC
 885                 */
 886                other = NULL;
 887                unix_state_wlock(sk);
 888        }
 889
 890        /*
 891         * If it was connected, reconnect.
 892         */
 893        if (unix_peer(sk)) {
 894                struct sock *old_peer = unix_peer(sk);
 895                unix_peer(sk)=other;
 896                unix_state_wunlock(sk);
 897
 898                if (other != old_peer)
 899                        unix_dgram_disconnected(sk, old_peer);
 900                sock_put(old_peer);
 901        } else {
 902                unix_peer(sk)=other;
 903                unix_state_wunlock(sk);
 904        }
 905        return 0;
 906
 907out_unlock:
 908        unix_state_wunlock(sk);
 909        sock_put(other);
 910out:
 911        return err;
 912}
 913
 914static long unix_wait_for_peer(struct sock *other, long timeo)
 915{
 916        struct unix_sock *u = unix_sk(other);
 917        int sched;
 918        DEFINE_WAIT(wait);
 919
 920        prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
 921
 922        sched = !sock_flag(other, SOCK_DEAD) &&
 923                !(other->sk_shutdown & RCV_SHUTDOWN) &&
 924                (skb_queue_len(&other->sk_receive_queue) >
 925                 other->sk_max_ack_backlog);
 926
 927        unix_state_runlock(other);
 928
 929        if (sched)
 930                timeo = schedule_timeout(timeo);
 931
 932        finish_wait(&u->peer_wait, &wait);
 933        return timeo;
 934}
 935
 936static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 937                               int addr_len, int flags)
 938{
 939        struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 940        struct sock *sk = sock->sk;
 941        struct unix_sock *u = unix_sk(sk), *newu, *otheru;
 942        struct sock *newsk = NULL;
 943        struct sock *other = NULL;
 944        struct sk_buff *skb = NULL;
 945        unsigned hash;
 946        int st;
 947        int err;
 948        long timeo;
 949
 950        err = unix_mkname(sunaddr, addr_len, &hash);
 951        if (err < 0)
 952                goto out;
 953        addr_len = err;
 954
 955        if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0)
 956                goto out;
 957
 958        timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
 959
 960        /* First of all allocate resources.
 961           If we will make it after state is locked,
 962           we will have to recheck all again in any case.
 963         */
 964
 965        err = -ENOMEM;
 966
 967        /* create new sock for complete connection */
 968        newsk = unix_create1(NULL);
 969        if (newsk == NULL)
 970                goto out;
 971
 972        /* Allocate skb for sending to listening sock */
 973        skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
 974        if (skb == NULL)
 975                goto out;
 976
 977restart:
 978        /*  Find listening sock. */
 979        other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
 980        if (!other)
 981                goto out;
 982
 983        /* Latch state of peer */
 984        unix_state_rlock(other);
 985
 986        /* Apparently VFS overslept socket death. Retry. */
 987        if (sock_flag(other, SOCK_DEAD)) {
 988                unix_state_runlock(other);
 989                sock_put(other);
 990                goto restart;
 991        }
 992
 993        err = -ECONNREFUSED;
 994        if (other->sk_state != TCP_LISTEN)
 995                goto out_unlock;
 996
 997        if (skb_queue_len(&other->sk_receive_queue) >
 998            other->sk_max_ack_backlog) {
 999                err = -EAGAIN;
1000                if (!timeo)
1001                        goto out_unlock;
1002
1003                timeo = unix_wait_for_peer(other, timeo);
1004
1005                err = sock_intr_errno(timeo);
1006                if (signal_pending(current))
1007                        goto out;
1008                sock_put(other);
1009                goto restart;
1010        }
1011
1012        /* Latch our state.
1013
1014           It is tricky place. We need to grab write lock and cannot
1015           drop lock on peer. It is dangerous because deadlock is
1016           possible. Connect to self case and simultaneous
1017           attempt to connect are eliminated by checking socket
1018           state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1019           check this before attempt to grab lock.
1020
1021           Well, and we have to recheck the state after socket locked.
1022         */
1023        st = sk->sk_state;
1024
1025        switch (st) {
1026        case TCP_CLOSE:
1027                /* This is ok... continue with connect */
1028                break;
1029        case TCP_ESTABLISHED:
1030                /* Socket is already connected */
1031                err = -EISCONN;
1032                goto out_unlock;
1033        default:
1034                err = -EINVAL;
1035                goto out_unlock;
1036        }
1037
1038        unix_state_wlock(sk);
1039
1040        if (sk->sk_state != st) {
1041                unix_state_wunlock(sk);
1042                unix_state_runlock(other);
1043                sock_put(other);
1044                goto restart;
1045        }
1046
1047        err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1048        if (err) {
1049                unix_state_wunlock(sk);
1050                goto out_unlock;
1051        }
1052
1053        /* The way is open! Fastly set all the necessary fields... */
1054
1055        sock_hold(sk);
1056        unix_peer(newsk)        = sk;
1057        newsk->sk_state         = TCP_ESTABLISHED;
1058        newsk->sk_type          = sk->sk_type;
1059        newsk->sk_peercred.pid  = current->tgid;
1060        newsk->sk_peercred.uid  = current->euid;
1061        newsk->sk_peercred.gid  = current->egid;
1062        newu = unix_sk(newsk);
1063        newsk->sk_sleep         = &newu->peer_wait;
1064        otheru = unix_sk(other);
1065
1066        /* copy address information from listening to new sock*/
1067        if (otheru->addr) {
1068                atomic_inc(&otheru->addr->refcnt);
1069                newu->addr = otheru->addr;
1070        }
1071        if (otheru->dentry) {
1072                newu->dentry    = dget(otheru->dentry);
1073                newu->mnt       = mntget(otheru->mnt);
1074        }
1075
1076        /* Set credentials */
1077        sk->sk_peercred = other->sk_peercred;
1078
1079        sock_hold(newsk);
1080        unix_peer(sk)   = newsk;
1081        sock->state     = SS_CONNECTED;
1082        sk->sk_state    = TCP_ESTABLISHED;
1083
1084        unix_state_wunlock(sk);
1085
1086        /* take ten and and send info to listening sock */
1087        spin_lock(&other->sk_receive_queue.lock);
1088        __skb_queue_tail(&other->sk_receive_queue, skb);
1089        /* Undo artificially decreased inflight after embrion
1090         * is installed to listening socket. */
1091        atomic_inc(&newu->inflight);
1092        spin_unlock(&other->sk_receive_queue.lock);
1093        unix_state_runlock(other);
1094        other->sk_data_ready(other, 0);
1095        sock_put(other);
1096        return 0;
1097
1098out_unlock:
1099        if (other)
1100                unix_state_runlock(other);
1101
1102out:
1103        if (skb)
1104                kfree_skb(skb);
1105        if (newsk)
1106                unix_release_sock(newsk, 0);
1107        if (other)
1108                sock_put(other);
1109        return err;
1110}
1111
1112static int unix_socketpair(struct socket *socka, struct socket *sockb)
1113{
1114        struct sock *ska=socka->sk, *skb = sockb->sk;
1115
1116        /* Join our sockets back to back */
1117        sock_hold(ska);
1118        sock_hold(skb);
1119        unix_peer(ska)=skb;
1120        unix_peer(skb)=ska;
1121        ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1122        ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1123        ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1124
1125        if (ska->sk_type != SOCK_DGRAM) {
1126                ska->sk_state = TCP_ESTABLISHED;
1127                skb->sk_state = TCP_ESTABLISHED;
1128                socka->state  = SS_CONNECTED;
1129                sockb->state  = SS_CONNECTED;
1130        }
1131        return 0;
1132}
1133
1134static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1135{
1136        struct sock *sk = sock->sk;
1137        struct sock *tsk;
1138        struct sk_buff *skb;
1139        int err;
1140
1141        err = -EOPNOTSUPP;
1142        if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1143                goto out;
1144
1145        err = -EINVAL;
1146        if (sk->sk_state != TCP_LISTEN)
1147                goto out;
1148
1149        /* If socket state is TCP_LISTEN it cannot change (for now...),
1150         * so that no locks are necessary.
1151         */
1152
1153        skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1154        if (!skb) {
1155                /* This means receive shutdown. */
1156                if (err == 0)
1157                        err = -EINVAL;
1158                goto out;
1159        }
1160
1161        tsk = skb->sk;
1162        skb_free_datagram(sk, skb);
1163        wake_up_interruptible(&unix_sk(sk)->peer_wait);
1164
1165        /* attach accepted sock to socket */
1166        unix_state_wlock(tsk);
1167        newsock->state = SS_CONNECTED;
1168        sock_graft(tsk, newsock);
1169        unix_state_wunlock(tsk);
1170        return 0;
1171
1172out:
1173        return err;
1174}
1175
1176
1177static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1178{
1179        struct sock *sk = sock->sk;
1180        struct unix_sock *u;
1181        struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1182        int err = 0;
1183
1184        if (peer) {
1185                sk = unix_peer_get(sk);
1186
1187                err = -ENOTCONN;
1188                if (!sk)
1189                        goto out;
1190                err = 0;
1191        } else {
1192                sock_hold(sk);
1193        }
1194
1195        u = unix_sk(sk);
1196        unix_state_rlock(sk);
1197        if (!u->addr) {
1198                sunaddr->sun_family = AF_UNIX;
1199                sunaddr->sun_path[0] = 0;
1200                *uaddr_len = sizeof(short);
1201        } else {
1202                struct unix_address *addr = u->addr;
1203
1204                *uaddr_len = addr->len;
1205                memcpy(sunaddr, addr->name, *uaddr_len);
1206        }
1207        unix_state_runlock(sk);
1208        sock_put(sk);
1209out:
1210        return err;
1211}
1212
1213static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1214{
1215        int i;
1216
1217        scm->fp = UNIXCB(skb).fp;
1218        skb->destructor = sock_wfree;
1219        UNIXCB(skb).fp = NULL;
1220
1221        for (i=scm->fp->count-1; i>=0; i--)
1222                unix_notinflight(scm->fp->fp[i]);
1223}
1224
1225static void unix_destruct_fds(struct sk_buff *skb)
1226{
1227        struct scm_cookie scm;
1228        memset(&scm, 0, sizeof(scm));
1229        unix_detach_fds(&scm, skb);
1230
1231        /* Alas, it calls VFS */
1232        /* So fscking what? fput() had been SMP-safe since the last Summer */
1233        scm_destroy(&scm);
1234        sock_wfree(skb);
1235}
1236
1237static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1238{
1239        int i;
1240        for (i=scm->fp->count-1; i>=0; i--)
1241                unix_inflight(scm->fp->fp[i]);
1242        UNIXCB(skb).fp = scm->fp;
1243        skb->destructor = unix_destruct_fds;
1244        scm->fp = NULL;
1245}
1246
1247/*
1248 *      Send AF_UNIX data.
1249 */
1250
1251static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1252                              struct msghdr *msg, size_t len)
1253{
1254        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1255        struct sock *sk = sock->sk;
1256        struct unix_sock *u = unix_sk(sk);
1257        struct sockaddr_un *sunaddr=msg->msg_name;
1258        struct sock *other = NULL;
1259        int namelen = 0; /* fake GCC */
1260        int err;
1261        unsigned hash;
1262        struct sk_buff *skb;
1263        long timeo;
1264        struct scm_cookie tmp_scm;
1265
1266        if (NULL == siocb->scm)
1267                siocb->scm = &tmp_scm;
1268        err = scm_send(sock, msg, siocb->scm);
1269        if (err < 0)
1270                return err;
1271
1272        err = -EOPNOTSUPP;
1273        if (msg->msg_flags&MSG_OOB)
1274                goto out;
1275
1276        if (msg->msg_namelen) {
1277                err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1278                if (err < 0)
1279                        goto out;
1280                namelen = err;
1281        } else {
1282                sunaddr = NULL;
1283                err = -ENOTCONN;
1284                other = unix_peer_get(sk);
1285                if (!other)
1286                        goto out;
1287        }
1288
1289        if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0)
1290                goto out;
1291
1292        err = -EMSGSIZE;
1293        if (len > sk->sk_sndbuf - 32)
1294                goto out;
1295
1296        skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1297        if (skb==NULL)
1298                goto out;
1299
1300        memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1301        if (siocb->scm->fp)
1302                unix_attach_fds(siocb->scm, skb);
1303
1304        skb->h.raw = skb->data;
1305        err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1306        if (err)
1307                goto out_free;
1308
1309        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1310
1311restart:
1312        if (!other) {
1313                err = -ECONNRESET;
1314                if (sunaddr == NULL)
1315                        goto out_free;
1316
1317                other = unix_find_other(sunaddr, namelen, sk->sk_type,
1318                                        hash, &err);
1319                if (other==NULL)
1320                        goto out_free;
1321        }
1322
1323        unix_state_rlock(other);
1324        err = -EPERM;
1325        if (!unix_may_send(sk, other))
1326                goto out_unlock;
1327
1328        if (sock_flag(other, SOCK_DEAD)) {
1329                /*
1330                 *      Check with 1003.1g - what should
1331                 *      datagram error
1332                 */
1333                unix_state_runlock(other);
1334                sock_put(other);
1335
1336                err = 0;
1337                unix_state_wlock(sk);
1338                if (unix_peer(sk) == other) {
1339                        unix_peer(sk)=NULL;
1340                        unix_state_wunlock(sk);
1341
1342                        unix_dgram_disconnected(sk, other);
1343                        sock_put(other);
1344                        err = -ECONNREFUSED;
1345                } else {
1346                        unix_state_wunlock(sk);
1347                }
1348
1349                other = NULL;
1350                if (err)
1351                        goto out_free;
1352                goto restart;
1353        }
1354
1355        err = -EPIPE;
1356        if (other->sk_shutdown & RCV_SHUTDOWN)
1357                goto out_unlock;
1358
1359        if (sk->sk_type != SOCK_SEQPACKET) {
1360                err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1361                if (err)
1362                        goto out_unlock;
1363        }
1364
1365        if (unix_peer(other) != sk &&
1366            (skb_queue_len(&other->sk_receive_queue) >
1367             other->sk_max_ack_backlog)) {
1368                if (!timeo) {
1369                        err = -EAGAIN;
1370                        goto out_unlock;
1371                }
1372
1373                timeo = unix_wait_for_peer(other, timeo);
1374
1375                err = sock_intr_errno(timeo);
1376                if (signal_pending(current))
1377                        goto out_free;
1378
1379                goto restart;
1380        }
1381
1382        skb_queue_tail(&other->sk_receive_queue, skb);
1383        unix_state_runlock(other);
1384        other->sk_data_ready(other, len);
1385        sock_put(other);
1386        scm_destroy(siocb->scm);
1387        return len;
1388
1389out_unlock:
1390        unix_state_runlock(other);
1391out_free:
1392        kfree_skb(skb);
1393out:
1394        if (other)
1395                sock_put(other);
1396        scm_destroy(siocb->scm);
1397        return err;
1398}
1399
1400                
1401static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1402                               struct msghdr *msg, size_t len)
1403{
1404        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1405        struct sock *sk = sock->sk;
1406        struct sock *other = NULL;
1407        struct sockaddr_un *sunaddr=msg->msg_name;
1408        int err,size;
1409        struct sk_buff *skb;
1410        int sent=0;
1411        struct scm_cookie tmp_scm;
1412
1413        if (NULL == siocb->scm)
1414                siocb->scm = &tmp_scm;
1415        err = scm_send(sock, msg, siocb->scm);
1416        if (err < 0)
1417                return err;
1418
1419        err = -EOPNOTSUPP;
1420        if (msg->msg_flags&MSG_OOB)
1421                goto out_err;
1422
1423        if (msg->msg_namelen) {
1424                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1425                goto out_err;
1426        } else {
1427                sunaddr = NULL;
1428                err = -ENOTCONN;
1429                other = unix_peer_get(sk);
1430                if (!other)
1431                        goto out_err;
1432        }
1433
1434        if (sk->sk_shutdown & SEND_SHUTDOWN)
1435                goto pipe_err;
1436
1437        while(sent < len)
1438        {
1439                /*
1440                 *      Optimisation for the fact that under 0.01% of X messages typically
1441                 *      need breaking up.
1442                 */
1443
1444                size=len-sent;
1445
1446                /* Keep two messages in the pipe so it schedules better */
1447                if (size > sk->sk_sndbuf / 2 - 64)
1448                        size = sk->sk_sndbuf / 2 - 64;
1449
1450                if (size > SKB_MAX_ALLOC)
1451                        size = SKB_MAX_ALLOC;
1452                        
1453                /*
1454                 *      Grab a buffer
1455                 */
1456                 
1457                skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1458
1459                if (skb==NULL)
1460                        goto out_err;
1461
1462                /*
1463                 *      If you pass two values to the sock_alloc_send_skb
1464                 *      it tries to grab the large buffer with GFP_NOFS
1465                 *      (which can fail easily), and if it fails grab the
1466                 *      fallback size buffer which is under a page and will
1467                 *      succeed. [Alan]
1468                 */
1469                size = min_t(int, size, skb_tailroom(skb));
1470
1471                memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1472                if (siocb->scm->fp)
1473                        unix_attach_fds(siocb->scm, skb);
1474
1475                if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1476                        kfree_skb(skb);
1477                        goto out_err;
1478                }
1479
1480                unix_state_rlock(other);
1481
1482                if (sock_flag(other, SOCK_DEAD) ||
1483                    (other->sk_shutdown & RCV_SHUTDOWN))
1484                        goto pipe_err_free;
1485
1486                skb_queue_tail(&other->sk_receive_queue, skb);
1487                unix_state_runlock(other);
1488                other->sk_data_ready(other, size);
1489                sent+=size;
1490        }
1491        sock_put(other);
1492
1493        scm_destroy(siocb->scm);
1494        siocb->scm = NULL;
1495
1496        return sent;
1497
1498pipe_err_free:
1499        unix_state_runlock(other);
1500        kfree_skb(skb);
1501pipe_err:
1502        if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1503                send_sig(SIGPIPE,current,0);
1504        err = -EPIPE;
1505out_err:
1506        if (other)
1507                sock_put(other);
1508        scm_destroy(siocb->scm);
1509        siocb->scm = NULL;
1510        return sent ? : err;
1511}
1512
1513static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1514                                  struct msghdr *msg, size_t len)
1515{
1516        int err;
1517        struct sock *sk = sock->sk;
1518        
1519        err = sock_error(sk);
1520        if (err)
1521                return err;
1522
1523        if (sk->sk_state != TCP_ESTABLISHED)
1524                return -ENOTCONN;
1525
1526        if (msg->msg_namelen)
1527                msg->msg_namelen = 0;
1528
1529        return unix_dgram_sendmsg(kiocb, sock, msg, len);
1530}
1531                                                                                            
1532static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1533{
1534        struct unix_sock *u = unix_sk(sk);
1535
1536        msg->msg_namelen = 0;
1537        if (u->addr) {
1538                msg->msg_namelen = u->addr->len;
1539                memcpy(msg->msg_name, u->addr->name, u->addr->len);
1540        }
1541}
1542
1543static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1544                              struct msghdr *msg, size_t size,
1545                              int flags)
1546{
1547        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1548        struct scm_cookie tmp_scm;
1549        struct sock *sk = sock->sk;
1550        struct unix_sock *u = unix_sk(sk);
1551        int noblock = flags & MSG_DONTWAIT;
1552        struct sk_buff *skb;
1553        int err;
1554
1555        err = -EOPNOTSUPP;
1556        if (flags&MSG_OOB)
1557                goto out;
1558
1559        msg->msg_namelen = 0;
1560
1561        down(&u->readsem);
1562
1563        skb = skb_recv_datagram(sk, flags, noblock, &err);
1564        if (!skb)
1565                goto out_unlock;
1566
1567        wake_up_interruptible(&u->peer_wait);
1568
1569        if (msg->msg_name)
1570                unix_copy_addr(msg, skb->sk);
1571
1572        if (size > skb->len)
1573                size = skb->len;
1574        else if (size < skb->len)
1575                msg->msg_flags |= MSG_TRUNC;
1576
1577        err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1578        if (err)
1579                goto out_free;
1580
1581        if (!siocb->scm) {
1582                siocb->scm = &tmp_scm;
1583                memset(&tmp_scm, 0, sizeof(tmp_scm));
1584        }
1585        siocb->scm->creds = *UNIXCREDS(skb);
1586
1587        if (!(flags & MSG_PEEK))
1588        {
1589                if (UNIXCB(skb).fp)
1590                        unix_detach_fds(siocb->scm, skb);
1591        }
1592        else 
1593        {
1594                /* It is questionable: on PEEK we could:
1595                   - do not return fds - good, but too simple 8)
1596                   - return fds, and do not return them on read (old strategy,
1597                     apparently wrong)
1598                   - clone fds (I chose it for now, it is the most universal
1599                     solution)
1600                
1601                   POSIX 1003.1g does not actually define this clearly
1602                   at all. POSIX 1003.1g doesn't define a lot of things
1603                   clearly however!                  
1604                   
1605                */
1606                if (UNIXCB(skb).fp)
1607                        siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1608        }
1609        err = size;
1610
1611        scm_recv(sock, msg, siocb->scm, flags);
1612
1613out_free:
1614        skb_free_datagram(sk,skb);
1615out_unlock:
1616        up(&u->readsem);
1617out:
1618        return err;
1619}
1620
1621/*
1622 *      Sleep until data has arrive. But check for races..
1623 */
1624 
1625static long unix_stream_data_wait(struct sock * sk, long timeo)
1626{
1627        DEFINE_WAIT(wait);
1628
1629        unix_state_rlock(sk);
1630
1631        for (;;) {
1632                prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1633
1634                if (skb_queue_len(&sk->sk_receive_queue) ||
1635                    sk->sk_err ||
1636                    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1637                    signal_pending(current) ||
1638                    !timeo)
1639                        break;
1640
1641                set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1642                unix_state_runlock(sk);
1643                timeo = schedule_timeout(timeo);
1644                unix_state_rlock(sk);
1645                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1646        }
1647
1648        finish_wait(sk->sk_sleep, &wait);
1649        unix_state_runlock(sk);
1650        return timeo;
1651}
1652
1653
1654
1655static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1656                               struct msghdr *msg, size_t size,
1657                               int flags)
1658{
1659        struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1660        struct scm_cookie tmp_scm;
1661        struct sock *sk = sock->sk;
1662        struct unix_sock *u = unix_sk(sk);
1663        struct sockaddr_un *sunaddr=msg->msg_name;
1664        int copied = 0;
1665        int check_creds = 0;
1666        int target;
1667        int err = 0;
1668        long timeo;
1669
1670        err = -EINVAL;
1671        if (sk->sk_state != TCP_ESTABLISHED)
1672                goto out;
1673
1674        err = -EOPNOTSUPP;
1675        if (flags&MSG_OOB)
1676                goto out;
1677
1678        target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1679        timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1680
1681        msg->msg_namelen = 0;
1682
1683        /* Lock the socket to prevent queue disordering
1684         * while sleeps in memcpy_tomsg
1685         */
1686
1687        if (!siocb->scm) {
1688                siocb->scm = &tmp_scm;
1689                memset(&tmp_scm, 0, sizeof(tmp_scm));
1690        }
1691
1692        down(&u->readsem);
1693
1694        do
1695        {
1696                int chunk;
1697                struct sk_buff *skb;
1698
1699                skb = skb_dequeue(&sk->sk_receive_queue);
1700                if (skb==NULL)
1701                {
1702                        if (copied >= target)
1703                                break;
1704
1705                        /*
1706                         *      POSIX 1003.1g mandates this order.
1707                         */
1708                         
1709                        if ((err = sock_error(sk)) != 0)
1710                                break;
1711                        if (sk->sk_shutdown & RCV_SHUTDOWN)
1712                                break;
1713                        err = -EAGAIN;
1714                        if (!timeo)
1715                                break;
1716                        up(&u->readsem);
1717
1718                        timeo = unix_stream_data_wait(sk, timeo);
1719
1720                        if (signal_pending(current)) {
1721                                err = sock_intr_errno(timeo);
1722                                goto out;
1723                        }
1724                        down(&u->readsem);
1725                        continue;
1726                }
1727
1728                if (check_creds) {
1729                        /* Never glue messages from different writers */
1730                        if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1731                                skb_queue_head(&sk->sk_receive_queue, skb);
1732                                break;
1733                        }
1734                } else {
1735                        /* Copy credentials */
1736                        siocb->scm->creds = *UNIXCREDS(skb);
1737                        check_creds = 1;
1738                }
1739
1740                /* Copy address just once */
1741                if (sunaddr)
1742                {
1743                        unix_copy_addr(msg, skb->sk);
1744                        sunaddr = NULL;
1745                }
1746
1747                chunk = min_t(unsigned int, skb->len, size);
1748                if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1749                        skb_queue_head(&sk->sk_receive_queue, skb);
1750                        if (copied == 0)
1751                                copied = -EFAULT;
1752                        break;
1753                }
1754                copied += chunk;
1755                size -= chunk;
1756
1757                /* Mark read part of skb as used */
1758                if (!(flags & MSG_PEEK))
1759                {
1760                        skb_pull(skb, chunk);
1761
1762                        if (UNIXCB(skb).fp)
1763                                unix_detach_fds(siocb->scm, skb);
1764
1765                        /* put the skb back if we didn't use it up.. */
1766                        if (skb->len)
1767                        {
1768                                skb_queue_head(&sk->sk_receive_queue, skb);
1769                                break;
1770                        }
1771
1772                        kfree_skb(skb);
1773
1774                        if (siocb->scm->fp)
1775                                break;
1776                }
1777                else
1778                {
1779                        /* It is questionable, see note in unix_dgram_recvmsg.
1780                         */
1781                        if (UNIXCB(skb).fp)
1782                                siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1783
1784                        /* put message back and return */
1785                        skb_queue_head(&sk->sk_receive_queue, skb);
1786                        break;
1787                }
1788        } while (size);
1789
1790        up(&u->readsem);
1791        scm_recv(sock, msg, siocb->scm, flags);
1792out:
1793        return copied ? : err;
1794}
1795
1796static int unix_shutdown(struct socket *sock, int mode)
1797{
1798        struct sock *sk = sock->sk;
1799        struct sock *other;
1800
1801        mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1802
1803        if (mode) {
1804                unix_state_wlock(sk);
1805                sk->sk_shutdown |= mode;
1806                other=unix_peer(sk);
1807                if (other)
1808                        sock_hold(other);
1809                unix_state_wunlock(sk);
1810                sk->sk_state_change(sk);
1811
1812                if (other &&
1813                        (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1814
1815                        int peer_mode = 0;
1816
1817                        if (mode&RCV_SHUTDOWN)
1818                                peer_mode |= SEND_SHUTDOWN;
1819                        if (mode&SEND_SHUTDOWN)
1820                                peer_mode |= RCV_SHUTDOWN;
1821                        unix_state_wlock(other);
1822                        other->sk_shutdown |= peer_mode;
1823                        unix_state_wunlock(other);
1824                        other->sk_state_change(other);
1825                        read_lock(&other->sk_callback_lock);
1826                        if (peer_mode == SHUTDOWN_MASK)
1827                                sk_wake_async(other,1,POLL_HUP);
1828                        else if (peer_mode & RCV_SHUTDOWN)
1829                                sk_wake_async(other,1,POLL_IN);
1830                        read_unlock(&other->sk_callback_lock);
1831                }
1832                if (other)
1833                        sock_put(other);
1834        }
1835        return 0;
1836}
1837
1838static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1839{
1840        struct sock *sk = sock->sk;
1841        long amount=0;
1842        int err;
1843
1844        switch(cmd)
1845        {
1846                case SIOCOUTQ:
1847                        amount = atomic_read(&sk->sk_wmem_alloc);
1848                        err = put_user(amount, (int __user *)arg);
1849                        break;
1850                case SIOCINQ:
1851                {
1852                        struct sk_buff *skb;
1853
1854                        if (sk->sk_state == TCP_LISTEN) {
1855                                err = -EINVAL;
1856                                break;
1857                        }
1858
1859                        spin_lock(&sk->sk_receive_queue.lock);
1860                        if (sk->sk_type == SOCK_STREAM ||
1861                            sk->sk_type == SOCK_SEQPACKET) {
1862                                skb_queue_walk(&sk->sk_receive_queue, skb)
1863                                        amount += skb->len;
1864                        } else {
1865                                skb = skb_peek(&sk->sk_receive_queue);
1866                                if (skb)
1867                                        amount=skb->len;
1868                        }
1869                        spin_unlock(&sk->sk_receive_queue.lock);
1870                        err = put_user(amount, (int __user *)arg);
1871                        break;
1872                }
1873
1874                default:
1875                        err = dev_ioctl(cmd, (void __user *)arg);
1876                        break;
1877        }
1878        return err;
1879}
1880
1881static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1882{
1883        struct sock *sk = sock->sk;
1884        unsigned int mask;
1885
1886        poll_wait(file, sk->sk_sleep, wait);
1887        mask = 0;
1888
1889        /* exceptional events? */
1890        if (sk->sk_err)
1891                mask |= POLLERR;
1892        if (sk->sk_shutdown == SHUTDOWN_MASK)
1893                mask |= POLLHUP;
1894
1895        /* readable? */
1896        if (!skb_queue_empty(&sk->sk_receive_queue) ||
1897            (sk->sk_shutdown & RCV_SHUTDOWN))
1898                mask |= POLLIN | POLLRDNORM;
1899
1900        /* Connection-based need to check for termination and startup */
1901        if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1902                mask |= POLLHUP;
1903
1904        /*
1905         * we set writable also when the other side has shut down the
1906         * connection. This prevents stuck sockets.
1907         */
1908        if (unix_writable(sk))
1909                mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1910
1911        return mask;
1912}
1913
1914
1915#ifdef CONFIG_PROC_FS
1916static struct sock *unix_seq_idx(int *iter, loff_t pos)
1917{
1918        loff_t off = 0;
1919        struct sock *s;
1920
1921        for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1922                if (off == pos) 
1923                        return s;
1924                ++off;
1925        }
1926        return NULL;
1927}
1928
1929
1930static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1931{
1932        read_lock(&unix_table_lock);
1933        return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1934}
1935
1936static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1937{
1938        ++*pos;
1939
1940        if (v == (void *)1) 
1941                return first_unix_socket(seq->private);
1942        return next_unix_socket(seq->private, v);
1943}
1944
1945static void unix_seq_stop(struct seq_file *seq, void *v)
1946{
1947        read_unlock(&unix_table_lock);
1948}
1949
1950static int unix_seq_show(struct seq_file *seq, void *v)
1951{
1952        
1953        if (v == (void *)1)
1954                seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1955                         "Inode Path\n");
1956        else {
1957                struct sock *s = v;
1958                struct unix_sock *u = unix_sk(s);
1959                unix_state_rlock(s);
1960
1961                seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1962                        s,
1963                        atomic_read(&s->sk_refcnt),
1964                        0,
1965                        s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1966                        s->sk_type,
1967                        s->sk_socket ?
1968                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1969                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1970                        sock_i_ino(s));
1971
1972                if (u->addr) {
1973                        int i, len;
1974                        seq_putc(seq, ' ');
1975
1976                        i = 0;
1977                        len = u->addr->len - sizeof(short);
1978                        if (!UNIX_ABSTRACT(s))
1979                                len--;
1980                        else {
1981                                seq_putc(seq, '@');
1982                                i++;
1983                        }
1984                        for ( ; i < len; i++)
1985                                seq_putc(seq, u->addr->name->sun_path[i]);
1986                }
1987                unix_state_runlock(s);
1988                seq_putc(seq, '\n');
1989        }
1990
1991        return 0;
1992}
1993
1994static struct seq_operations unix_seq_ops = {
1995        .start  = unix_seq_start,
1996        .next   = unix_seq_next,
1997        .stop   = unix_seq_stop,
1998        .show   = unix_seq_show,
1999};
2000
2001
2002static int unix_seq_open(struct inode *inode, struct file *file)
2003{
2004        struct seq_file *seq;
2005        int rc = -ENOMEM;
2006        int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2007
2008        if (!iter)
2009                goto out;
2010
2011        rc = seq_open(file, &unix_seq_ops);
2012        if (rc)
2013                goto out_kfree;
2014
2015        seq          = file->private_data;
2016        seq->private = iter;
2017        *iter = 0;
2018out:
2019        return rc;
2020out_kfree:
2021        kfree(iter);
2022        goto out;
2023}
2024
2025static struct file_operations unix_seq_fops = {
2026        .owner          = THIS_MODULE,
2027        .open           = unix_seq_open,
2028        .read           = seq_read,
2029        .llseek         = seq_lseek,
2030        .release        = seq_release_private,
2031};
2032
2033#endif
2034
2035static struct net_proto_family unix_family_ops = {
2036        .family = PF_UNIX,
2037        .create = unix_create,
2038        .owner  = THIS_MODULE,
2039};
2040
2041#ifdef CONFIG_SYSCTL
2042extern void unix_sysctl_register(void);
2043extern void unix_sysctl_unregister(void);
2044#else
2045static inline void unix_sysctl_register(void) {}
2046static inline void unix_sysctl_unregister(void) {}
2047#endif
2048
2049static int __init af_unix_init(void)
2050{
2051        struct sk_buff *dummy_skb;
2052
2053        if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2054                printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2055                return -1;
2056        }
2057        /* allocate our sock slab cache */
2058        unix_sk_cachep = kmem_cache_create("unix_sock",
2059                                           sizeof(struct unix_sock), 0,
2060                                           SLAB_HWCACHE_ALIGN, NULL, NULL);
2061        if (!unix_sk_cachep)
2062                printk(KERN_CRIT
2063                        "af_unix_init: Cannot create unix_sock SLAB cache!\n");
2064
2065        sock_register(&unix_family_ops);
2066#ifdef CONFIG_PROC_FS
2067        proc_net_fops_create("unix", 0, &unix_seq_fops);
2068#endif
2069        unix_sysctl_register();
2070        return 0;
2071}
2072
2073static void __exit af_unix_exit(void)
2074{
2075        sock_unregister(PF_UNIX);
2076        unix_sysctl_unregister();
2077        proc_net_remove("unix");
2078        kmem_cache_destroy(unix_sk_cachep);
2079}
2080
2081module_init(af_unix_init);
2082module_exit(af_unix_exit);
2083
2084MODULE_LICENSE("GPL");
2085MODULE_ALIAS_NETPROTO(PF_UNIX);
2086
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.