linux-old/net/ipv4/af_inet.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              PF_INET protocol family socket handler.
   7 *
   8 * Version:     $Id: af_inet.c,v 1.86 1999/03/25 00:38:15 davem Exp $
   9 *
  10 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *              Florian La Roche, <flla@stud.uni-sb.de>
  13 *              Alan Cox, <A.Cox@swansea.ac.uk>
  14 *
  15 * Changes (see also sock.c)
  16 *
  17 *              A.N.Kuznetsov   :       Socket death error in accept().
  18 *              John Richardson :       Fix non blocking error in connect()
  19 *                                      so sockets that fail to connect
  20 *                                      don't return -EINPROGRESS.
  21 *              Alan Cox        :       Asynchronous I/O support
  22 *              Alan Cox        :       Keep correct socket pointer on sock structures
  23 *                                      when accept() ed
  24 *              Alan Cox        :       Semantics of SO_LINGER aren't state moved
  25 *                                      to close when you look carefully. With
  26 *                                      this fixed and the accept bug fixed 
  27 *                                      some RPC stuff seems happier.
  28 *              Niibe Yutaka    :       4.4BSD style write async I/O
  29 *              Alan Cox, 
  30 *              Tony Gale       :       Fixed reuse semantics.
  31 *              Alan Cox        :       bind() shouldn't abort existing but dead
  32 *                                      sockets. Stops FTP netin:.. I hope.
  33 *              Alan Cox        :       bind() works correctly for RAW sockets. Note
  34 *                                      that FreeBSD at least was broken in this respect
  35 *                                      so be careful with compatibility tests...
  36 *              Alan Cox        :       routing cache support
  37 *              Alan Cox        :       memzero the socket structure for compactness.
  38 *              Matt Day        :       nonblock connect error handler
  39 *              Alan Cox        :       Allow large numbers of pending sockets
  40 *                                      (eg for big web sites), but only if
  41 *                                      specifically application requested.
  42 *              Alan Cox        :       New buffering throughout IP. Used dumbly.
  43 *              Alan Cox        :       New buffering now used smartly.
  44 *              Alan Cox        :       BSD rather than common sense interpretation of
  45 *                                      listen.
  46 *              Germano Caronni :       Assorted small races.
  47 *              Alan Cox        :       sendmsg/recvmsg basic support.
  48 *              Alan Cox        :       Only sendmsg/recvmsg now supported.
  49 *              Alan Cox        :       Locked down bind (see security list).
  50 *              Alan Cox        :       Loosened bind a little.
  51 *              Mike McLagan    :       ADD/DEL DLCI Ioctls
  52 *      Willy Konynenberg       :       Transparent proxying support.
  53 *              David S. Miller :       New socket lookup architecture.
  54 *                                      Some other random speedups.
  55 *              Cyrus Durgin    :       Cleaned up file for kmod hacks.
  56 *              Andi Kleen      :       Fix inet_stream_connect TCP race.
  57 *
  58 *              This program is free software; you can redistribute it and/or
  59 *              modify it under the terms of the GNU General Public License
  60 *              as published by the Free Software Foundation; either version
  61 *              2 of the License, or (at your option) any later version.
  62 */
  63
  64#include <linux/config.h>
  65#include <linux/errno.h>
  66#include <linux/types.h>
  67#include <linux/socket.h>
  68#include <linux/in.h>
  69#include <linux/kernel.h>
  70#include <linux/major.h>
  71#include <linux/sched.h>
  72#include <linux/timer.h>
  73#include <linux/string.h>
  74#include <linux/sockios.h>
  75#include <linux/net.h>
  76#include <linux/fcntl.h>
  77#include <linux/mm.h>
  78#include <linux/interrupt.h>
  79#include <linux/proc_fs.h>
  80#include <linux/stat.h>
  81#include <linux/init.h>
  82#include <linux/poll.h>
  83
  84#include <asm/uaccess.h>
  85#include <asm/system.h>
  86
  87#include <linux/inet.h>
  88#include <linux/netdevice.h>
  89#include <net/ip.h>
  90#include <net/protocol.h>
  91#include <net/arp.h>
  92#include <net/rarp.h>
  93#include <net/route.h>
  94#include <net/tcp.h>
  95#include <net/udp.h>
  96#include <linux/skbuff.h>
  97#include <net/sock.h>
  98#include <net/raw.h>
  99#include <net/icmp.h>
 100#include <net/ipip.h>
 101#include <net/inet_common.h>
 102#include <linux/ip_fw.h>
 103#ifdef CONFIG_IP_MROUTE
 104#include <linux/mroute.h>
 105#endif
 106#ifdef CONFIG_IP_MASQUERADE
 107#include <net/ip_masq.h>
 108#endif
 109#ifdef CONFIG_BRIDGE
 110#include <net/br.h>
 111#endif
 112#ifdef CONFIG_KMOD
 113#include <linux/kmod.h>
 114#endif
 115#ifdef CONFIG_NET_RADIO
 116#include <linux/wireless.h>
 117#endif  /* CONFIG_NET_RADIO */
 118
 119#define min(a,b)        ((a)<(b)?(a):(b))
 120
 121struct linux_mib net_statistics;
 122
 123extern int raw_get_info(char *, char **, off_t, int, int);
 124extern int snmp_get_info(char *, char **, off_t, int, int);
 125extern int netstat_get_info(char *, char **, off_t, int, int);
 126extern int afinet_get_info(char *, char **, off_t, int, int);
 127extern int tcp_get_info(char *, char **, off_t, int, int);
 128extern int udp_get_info(char *, char **, off_t, int, int);
 129extern void ip_mc_drop_socket(struct sock *sk);
 130
 131#ifdef CONFIG_DLCI
 132extern int dlci_ioctl(unsigned int, void*);
 133#endif
 134
 135#ifdef CONFIG_DLCI_MODULE
 136int (*dlci_ioctl_hook)(unsigned int, void *) = NULL;
 137#endif
 138
 139int (*rarp_ioctl_hook)(unsigned int,void*) = NULL;
 140
 141/*
 142 *      Destroy an AF_INET socket
 143 */
 144 
 145static __inline__ void kill_sk_queues(struct sock *sk)
 146{
 147        struct sk_buff *skb;
 148
 149        /* First the read buffer. */
 150        while((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
 151                /* This will take care of closing sockets that were
 152                 * listening and didn't accept everything.
 153                 */
 154                if (skb->sk != NULL && skb->sk != sk)
 155                        skb->sk->prot->close(skb->sk, 0);
 156                kfree_skb(skb);
 157        }
 158
 159        /* Next, the error queue. */
 160        while((skb = skb_dequeue(&sk->error_queue)) != NULL)
 161                kfree_skb(skb);
 162
 163        /* Now the backlog. */
 164        while((skb=skb_dequeue(&sk->back_log)) != NULL)
 165                kfree_skb(skb);
 166}
 167
 168static __inline__ void kill_sk_now(struct sock *sk)
 169{
 170        /* No longer exists. */
 171        del_from_prot_sklist(sk);
 172
 173        /* Remove from protocol hash chains. */
 174        sk->prot->unhash(sk);
 175
 176        if(sk->opt)
 177                kfree(sk->opt);
 178        dst_release(sk->dst_cache);
 179        sk_free(sk);
 180}
 181
 182static __inline__ void kill_sk_later(struct sock *sk)
 183{
 184        /* this should never happen. */
 185        /* actually it can if an ack has just been sent. */
 186        /* 
 187         * It's more normal than that...
 188         * It can happen because a skb is still in the device queues
 189         * [PR]
 190         */
 191                  
 192        NETDEBUG(printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n",
 193                        atomic_read(&sk->rmem_alloc),
 194                        atomic_read(&sk->wmem_alloc)));
 195
 196        sk->destroy = 1;
 197        sk->ack_backlog = 0;
 198        release_sock(sk);
 199        net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
 200}
 201
 202void destroy_sock(struct sock *sk)
 203{
 204        lock_sock(sk);                  /* just to be safe. */
 205
 206        /* Now we can no longer get new packets or once the
 207         * timers are killed, send them.
 208         */
 209        net_delete_timer(sk);
 210
 211        if (sk->prot->destroy)
 212                sk->prot->destroy(sk);
 213
 214        kill_sk_queues(sk);
 215
 216        /* Now if it has a half accepted/ closed socket. */
 217        if (sk->pair) {
 218                sk->pair->prot->close(sk->pair, 0);
 219                sk->pair = NULL;
 220        }
 221
 222        /* Now if everything is gone we can free the socket
 223         * structure, otherwise we need to keep it around until
 224         * everything is gone.
 225         */
 226        if (atomic_read(&sk->rmem_alloc) == 0 && atomic_read(&sk->wmem_alloc) == 0)
 227                kill_sk_now(sk);
 228        else
 229                kill_sk_later(sk);
 230}
 231
 232/*
 233 *      The routines beyond this point handle the behaviour of an AF_INET
 234 *      socket object. Mostly it punts to the subprotocols of IP to do
 235 *      the work.
 236 */
 237 
 238
 239/*
 240 *      Set socket options on an inet socket.
 241 */
 242 
 243int inet_setsockopt(struct socket *sock, int level, int optname,
 244                    char *optval, int optlen)
 245{
 246        struct sock *sk=sock->sk;
 247        if (sk->prot->setsockopt==NULL)
 248                return(-EOPNOTSUPP);
 249        return sk->prot->setsockopt(sk,level,optname,optval,optlen);
 250}
 251
 252/*
 253 *      Get a socket option on an AF_INET socket.
 254 *
 255 *      FIX: POSIX 1003.1g is very ambiguous here. It states that
 256 *      asynchronous errors should be reported by getsockopt. We assume
 257 *      this means if you specify SO_ERROR (otherwise whats the point of it).
 258 */
 259
 260int inet_getsockopt(struct socket *sock, int level, int optname,
 261                    char *optval, int *optlen)
 262{
 263        struct sock *sk=sock->sk;
 264        if (sk->prot->getsockopt==NULL)
 265                return(-EOPNOTSUPP);
 266        return sk->prot->getsockopt(sk,level,optname,optval,optlen);
 267}
 268
 269/*
 270 *      Automatically bind an unbound socket.
 271 */
 272
 273static int inet_autobind(struct sock *sk)
 274{
 275        /* We may need to bind the socket. */
 276        if (sk->num == 0) {
 277                sk->num = sk->prot->good_socknum();
 278                if (sk->num == 0) 
 279                        return(-EAGAIN);
 280                sk->sport = htons(sk->num);
 281                sk->prot->hash(sk);
 282                add_to_prot_sklist(sk);
 283        }
 284        return 0;
 285}
 286
 287/*
 288 *      Move a socket into listening state.
 289 */
 290 
 291int inet_listen(struct socket *sock, int backlog)
 292{
 293        struct sock *sk = sock->sk;
 294
 295        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
 296                return(-EINVAL);
 297
 298        if (inet_autobind(sk) != 0)
 299                return -EAGAIN;
 300
 301        /* We might as well re use these. */ 
 302        if ((unsigned) backlog == 0)    /* BSDism */
 303                backlog = 1;
 304        if ((unsigned) backlog > SOMAXCONN)
 305                backlog = SOMAXCONN;
 306        sk->max_ack_backlog = backlog;
 307        if (sk->state != TCP_LISTEN) {
 308                sk->ack_backlog = 0;
 309                sk->state = TCP_LISTEN;
 310                dst_release(xchg(&sk->dst_cache, NULL));
 311                sk->prot->rehash(sk);
 312                add_to_prot_sklist(sk);
 313        }
 314        sk->socket->flags |= SO_ACCEPTCON;
 315        return(0);
 316}
 317
 318/*
 319 *      Create an inet socket.
 320 *
 321 *      FIXME: Gcc would generate much better code if we set the parameters
 322 *      up in in-memory structure order. Gcc68K even more so
 323 */
 324
 325static int inet_create(struct socket *sock, int protocol)
 326{
 327        struct sock *sk;
 328        struct proto *prot;
 329
 330        /* Compatibility */
 331        if (sock->type == SOCK_PACKET) {
 332                static int warned; 
 333                if (net_families[PF_PACKET]==NULL)
 334                {
 335#if defined(CONFIG_KMOD) && defined(CONFIG_PACKET_MODULE)
 336                        char module_name[30];
 337                        sprintf(module_name,"net-pf-%d", PF_PACKET);
 338                        request_module(module_name);
 339                        if (net_families[PF_PACKET] == NULL)
 340#endif
 341                        return -ESOCKTNOSUPPORT;
 342                }
 343                if (!warned++)
 344                        printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
 345                return net_families[PF_PACKET]->create(sock, protocol);
 346        }
 347
 348        sock->state = SS_UNCONNECTED;
 349        sk = sk_alloc(PF_INET, GFP_KERNEL, 1);
 350        if (sk == NULL) 
 351                goto do_oom;
 352
 353        switch (sock->type) {
 354        case SOCK_STREAM:
 355                if (protocol && protocol != IPPROTO_TCP)
 356                        goto free_and_noproto;
 357                protocol = IPPROTO_TCP;
 358                if (ipv4_config.no_pmtu_disc)
 359                        sk->ip_pmtudisc = IP_PMTUDISC_DONT;
 360                else
 361                        sk->ip_pmtudisc = IP_PMTUDISC_WANT;
 362                prot = &tcp_prot;
 363                sock->ops = &inet_stream_ops;
 364                break;
 365        case SOCK_SEQPACKET:
 366                goto free_and_badtype;
 367        case SOCK_DGRAM:
 368                if (protocol && protocol != IPPROTO_UDP)
 369                        goto free_and_noproto;
 370                protocol = IPPROTO_UDP;
 371                sk->no_check = UDP_NO_CHECK;
 372                sk->ip_pmtudisc = IP_PMTUDISC_DONT;
 373                prot=&udp_prot;
 374                sock->ops = &inet_dgram_ops;
 375                break;
 376        case SOCK_RAW:
 377                if (!capable(CAP_NET_RAW))
 378                        goto free_and_badperm;
 379                if (!protocol)
 380                        goto free_and_noproto;
 381                prot = &raw_prot;
 382                sk->reuse = 1;
 383                sk->ip_pmtudisc = IP_PMTUDISC_DONT;
 384                sk->num = protocol;
 385                sock->ops = &inet_dgram_ops;
 386                if (protocol == IPPROTO_RAW)
 387                        sk->ip_hdrincl = 1;
 388                break;
 389        default:
 390                goto free_and_badtype;
 391        }
 392
 393        sock_init_data(sock,sk);
 394        
 395        sk->destruct = NULL;
 396
 397        sk->zapped=0;
 398#ifdef CONFIG_TCP_NAGLE_OFF
 399        sk->nonagle = 1;
 400#endif  
 401        sk->family = PF_INET;
 402        sk->protocol = protocol;
 403
 404        sk->prot = prot;
 405        sk->backlog_rcv = prot->backlog_rcv;
 406
 407        sk->timer.data = (unsigned long)sk;
 408        sk->timer.function = &net_timer;
 409
 410        sk->ip_ttl=ip_statistics.IpDefaultTTL;
 411
 412        sk->ip_mc_loop=1;
 413        sk->ip_mc_ttl=1;
 414        sk->ip_mc_index=0;
 415        sk->ip_mc_list=NULL;
 416        
 417        if (sk->num) {
 418                /* It assumes that any protocol which allows
 419                 * the user to assign a number at socket
 420                 * creation time automatically
 421                 * shares.
 422                 */
 423                sk->sport = htons(sk->num);
 424
 425                /* Add to protocol hash chains. */
 426                sk->prot->hash(sk);
 427                add_to_prot_sklist(sk);
 428        }
 429
 430        if (sk->prot->init) {
 431                int err = sk->prot->init(sk);
 432                if (err != 0) {
 433                        destroy_sock(sk);
 434                        return(err);
 435                }
 436        }
 437        return(0);
 438
 439free_and_badtype:
 440        sk_free(sk);
 441        return -ESOCKTNOSUPPORT;
 442
 443free_and_badperm:
 444        sk_free(sk);
 445        return -EPERM;
 446
 447free_and_noproto:
 448        sk_free(sk);
 449        return -EPROTONOSUPPORT;
 450
 451do_oom:
 452        return -ENOBUFS;
 453}
 454
 455
 456/*
 457 *      The peer socket should always be NULL (or else). When we call this
 458 *      function we are destroying the object and from then on nobody
 459 *      should refer to it.
 460 */
 461 
 462int inet_release(struct socket *sock, struct socket *peersock)
 463{
 464        struct sock *sk = sock->sk;
 465
 466        if (sk) {
 467                long timeout;
 468
 469                /* Begin closedown and wake up sleepers. */
 470                if (sock->state != SS_UNCONNECTED)
 471                        sock->state = SS_DISCONNECTING;
 472                sk->state_change(sk);
 473
 474                /* Applications forget to leave groups before exiting */
 475                ip_mc_drop_socket(sk);
 476
 477                /* If linger is set, we don't return until the close
 478                 * is complete.  Otherwise we return immediately. The
 479                 * actually closing is done the same either way.
 480                 *
 481                 * If the close is due to the process exiting, we never
 482                 * linger..
 483                 */
 484                timeout = 0;
 485                if (sk->linger && !(current->flags & PF_EXITING)) {
 486                        timeout = MAX_SCHEDULE_TIMEOUT;
 487
 488                        /* XXX This makes no sense whatsoever... -DaveM */
 489                        if (!sk->lingertime)
 490                                timeout = HZ*sk->lingertime;
 491                }
 492                sock->sk = NULL;
 493                sk->socket = NULL;
 494                sk->prot->close(sk, timeout);
 495        }
 496        return(0);
 497}
 498
 499static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 500{
 501        struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
 502        struct sock *sk=sock->sk;
 503        unsigned short snum;
 504        int chk_addr_ret;
 505
 506        /* If the socket has its own bind function then use it. (RAW) */
 507        if(sk->prot->bind)
 508                return sk->prot->bind(sk, uaddr, addr_len);
 509                
 510        /* Check these errors (active socket, bad address length, double bind). */
 511        if ((sk->state != TCP_CLOSE)                    ||
 512            (addr_len < sizeof(struct sockaddr_in))     ||
 513            (sk->num != 0))
 514                return -EINVAL;
 515                
 516        snum = ntohs(addr->sin_port);
 517#ifdef CONFIG_IP_MASQUERADE
 518        /* The kernel masquerader needs some ports. */
 519        if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
 520                return -EADDRINUSE;
 521#endif           
 522        if (snum == 0) 
 523                snum = sk->prot->good_socknum();
 524        if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
 525                return(-EACCES);
 526        
 527        chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
 528        if (addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
 529            chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
 530#ifdef CONFIG_IP_TRANSPARENT_PROXY
 531                /* Superuser may bind to any address to allow transparent proxying. */
 532                if(chk_addr_ret != RTN_UNICAST || !capable(CAP_NET_ADMIN))
 533#endif
 534                        return -EADDRNOTAVAIL;  /* Source address MUST be ours! */
 535        }
 536
 537        /*      We keep a pair of addresses. rcv_saddr is the one
 538         *      used by hash lookups, and saddr is used for transmit.
 539         *
 540         *      In the BSD API these are the same except where it
 541         *      would be illegal to use them (multicast/broadcast) in
 542         *      which case the sending device address is used.
 543         */
 544        sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
 545        if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
 546                sk->saddr = 0;  /* Use device */
 547
 548        /* Make sure we are allowed to bind here. */
 549        if(sk->prot->verify_bind(sk, snum))
 550                return -EADDRINUSE;
 551
 552        sk->num = snum;
 553        sk->sport = htons(snum);
 554        sk->daddr = 0;
 555        sk->dport = 0;
 556        sk->prot->rehash(sk);
 557        add_to_prot_sklist(sk);
 558        dst_release(sk->dst_cache);
 559        sk->dst_cache=NULL;
 560        return(0);
 561}
 562
 563int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
 564                       int addr_len, int flags)
 565{
 566        struct sock *sk=sock->sk;
 567        int err;
 568
 569        if (inet_autobind(sk) != 0)
 570                return(-EAGAIN);
 571        if (sk->prot->connect == NULL) 
 572                return(-EOPNOTSUPP);
 573        err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
 574        if (err < 0) 
 575                return(err);
 576        return(0);
 577}
 578
 579static void inet_wait_for_connect(struct sock *sk)
 580{
 581        struct wait_queue wait = { current, NULL };
 582
 583        add_wait_queue(sk->sleep, &wait);
 584        current->state = TASK_INTERRUPTIBLE;
 585        while (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
 586                if (signal_pending(current))
 587                        break;
 588                if (sk->err)
 589                        break;
 590                schedule();
 591                current->state = TASK_INTERRUPTIBLE;
 592        }
 593        current->state = TASK_RUNNING;
 594        remove_wait_queue(sk->sleep, &wait);
 595}
 596
 597/*
 598 *      Connect to a remote host. There is regrettably still a little
 599 *      TCP 'magic' in here.
 600 */
 601 
 602int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
 603                        int addr_len, int flags)
 604{
 605        struct sock *sk=sock->sk;
 606        int err;
 607
 608        if(sock->state != SS_UNCONNECTED && sock->state != SS_CONNECTING) {
 609                if(sock->state == SS_CONNECTED)
 610                        return -EISCONN;
 611                return -EINVAL;
 612        }
 613
 614        if(sock->state == SS_CONNECTING) {
 615                /* Note: tcp_connected contains SYN_RECV, which may cause
 616                   bogus results here. -AK */ 
 617                if(tcp_connected(sk->state)) {
 618                        sock->state = SS_CONNECTED;
 619                        return 0;
 620                }
 621                if (sk->zapped || sk->err)
 622                        goto sock_error;
 623                if (flags & O_NONBLOCK)
 624                        return -EALREADY;
 625        } else {
 626                /* We may need to bind the socket. */
 627                if (inet_autobind(sk) != 0)
 628                        return(-EAGAIN);
 629                if (sk->prot->connect == NULL) 
 630                        return(-EOPNOTSUPP);
 631                err = sk->prot->connect(sk, uaddr, addr_len);
 632                /* Note: there is a theoretical race here when an wake up
 633                   occurred before inet_wait_for_connect is entered. In 2.3
 634                   the wait queue setup should be moved before the low level
 635                   connect call. -AK*/
 636                if (err < 0)
 637                        return(err);
 638                sock->state = SS_CONNECTING;
 639        }
 640        
 641        if (sk->state > TCP_FIN_WAIT2 && sock->state == SS_CONNECTING)
 642                goto sock_error;
 643
 644        if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) 
 645                return (-EINPROGRESS);
 646
 647        if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
 648                inet_wait_for_connect(sk);
 649                if (signal_pending(current))
 650                        return -ERESTARTSYS;
 651        }
 652
 653        sock->state = SS_CONNECTED;
 654        if ((sk->state != TCP_ESTABLISHED) && sk->err)
 655                goto sock_error; 
 656        return(0);
 657
 658sock_error:     
 659        /* This is ugly but needed to fix a race in the ICMP error handler */
 660        if (sk->zapped && sk->state != TCP_CLOSE) { 
 661                lock_sock(sk);  
 662                tcp_set_state(sk, TCP_CLOSE);
 663                release_sock(sk); 
 664                sk->zapped = 0;
 665        }
 666        sock->state = SS_UNCONNECTED;
 667        return sock_error(sk);
 668}
 669
 670/*
 671 *      Accept a pending connection. The TCP layer now gives BSD semantics.
 672 */
 673
 674int inet_accept(struct socket *sock, struct socket *newsock, int flags)
 675{
 676        struct sock *sk1 = sock->sk, *sk2;
 677        struct sock *newsk = newsock->sk;
 678        int err = -EINVAL;
 679
 680        if (sock->state != SS_UNCONNECTED || !(sock->flags & SO_ACCEPTCON))
 681                goto do_err;
 682
 683        err = -EOPNOTSUPP;
 684        if (sk1->prot->accept == NULL)
 685                goto do_err;
 686
 687        /* Restore the state if we have been interrupted, and then returned. */
 688        if (sk1->pair != NULL) {
 689                sk2 = sk1->pair;
 690                sk1->pair = NULL;
 691        } else {
 692                if((sk2 = sk1->prot->accept(sk1,flags)) == NULL)
 693                        goto do_sk1_err;
 694        }
 695
 696        /*
 697         *      We've been passed an extra socket.
 698         *      We need to free it up because the tcp module creates
 699         *      its own when it accepts one.
 700         */
 701        sk2->sleep = newsk->sleep;
 702
 703        newsock->sk = sk2;
 704        sk2->socket = newsock;
 705        newsk->socket = NULL;
 706
 707        if (flags & O_NONBLOCK)
 708                goto do_half_success;
 709
 710        if(sk2->state == TCP_ESTABLISHED)
 711                goto do_full_success;
 712        if(sk2->err > 0)
 713                goto do_connect_err;
 714        err = -ECONNABORTED;
 715        if (sk2->state == TCP_CLOSE)
 716                goto do_bad_connection;
 717do_full_success:
 718        destroy_sock(newsk);
 719        newsock->state = SS_CONNECTED;
 720        return 0;
 721
 722do_half_success:
 723        destroy_sock(newsk);
 724        return(0);
 725
 726do_connect_err:
 727        err = sock_error(sk2);
 728do_bad_connection:
 729        sk2->sleep = NULL;
 730        sk2->socket = NULL;
 731        destroy_sock(sk2);
 732        newsock->sk = newsk;
 733        newsk->socket = newsock;
 734        return err;
 735
 736do_sk1_err:
 737        err = sock_error(sk1);
 738do_err:
 739        return err;
 740}
 741
 742
 743/*
 744 *      This does both peername and sockname.
 745 */
 746 
 747static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 748                 int *uaddr_len, int peer)
 749{
 750        struct sock *sk         = sock->sk;
 751        struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
 752  
 753        sin->sin_family = AF_INET;
 754        if (peer) {
 755                if (!tcp_connected(sk->state)) 
 756                        return(-ENOTCONN);
 757                sin->sin_port = sk->dport;
 758                sin->sin_addr.s_addr = sk->daddr;
 759        } else {
 760                __u32 addr = sk->rcv_saddr;
 761                if (!addr)
 762                        addr = sk->saddr;
 763                sin->sin_port = sk->sport;
 764                sin->sin_addr.s_addr = addr;
 765        }
 766        *uaddr_len = sizeof(*sin);
 767        return(0);
 768}
 769
 770
 771
 772int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
 773                 int flags, struct scm_cookie *scm)
 774{
 775        struct sock *sk = sock->sk;
 776        int addr_len = 0;
 777        int err;
 778        
 779        if (sock->flags & SO_ACCEPTCON)
 780                return(-EINVAL);
 781        if (sk->prot->recvmsg == NULL) 
 782                return(-EOPNOTSUPP);
 783        /* We may need to bind the socket. */
 784        if (inet_autobind(sk) != 0)
 785                return(-EAGAIN);
 786        err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
 787                                flags&~MSG_DONTWAIT, &addr_len);
 788        if (err >= 0)
 789                msg->msg_namelen = addr_len;
 790        return err;
 791}
 792
 793
 794int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
 795                 struct scm_cookie *scm)
 796{
 797        struct sock *sk = sock->sk;
 798
 799        if (sk->shutdown & SEND_SHUTDOWN) {
 800                if (!(msg->msg_flags&MSG_NOSIGNAL))
 801                        send_sig(SIGPIPE, current, 1);
 802                return(-EPIPE);
 803        }
 804        if (sk->prot->sendmsg == NULL) 
 805                return(-EOPNOTSUPP);
 806        if(sk->err)
 807                return sock_error(sk);
 808
 809        /* We may need to bind the socket. */
 810        if(inet_autobind(sk) != 0)
 811                return -EAGAIN;
 812
 813        return sk->prot->sendmsg(sk, msg, size);
 814}
 815
 816
 817int inet_shutdown(struct socket *sock, int how)
 818{
 819        struct sock *sk = sock->sk;
 820
 821        /* This should really check to make sure
 822         * the socket is a TCP socket. (WHY AC...)
 823         */
 824        how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
 825                       1->2 bit 2 snds.
 826                       2->3 */
 827        if ((how & ~SHUTDOWN_MASK) || how==0)   /* MAXINT->0 */
 828                return(-EINVAL);
 829        if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
 830                sock->state = SS_CONNECTED;
 831        if (!sk || !tcp_connected(sk->state)) 
 832                return(-ENOTCONN);
 833        sk->shutdown |= how;
 834        if (sk->prot->shutdown)
 835                sk->prot->shutdown(sk, how);
 836        /* Wake up anyone sleeping in poll. */
 837        sk->state_change(sk);
 838        return(0);
 839}
 840
 841
 842unsigned int inet_poll(struct file * file, struct socket *sock, poll_table *wait)
 843{
 844        struct sock *sk = sock->sk;
 845
 846        if (sk->prot->poll == NULL)
 847                return(0);
 848        return sk->prot->poll(file, sock, wait);
 849}
 850
 851/*
 852 *      ioctl() calls you can issue on an INET socket. Most of these are
 853 *      device configuration and stuff and very rarely used. Some ioctls
 854 *      pass on to the socket itself.
 855 *
 856 *      NOTE: I like the idea of a module for the config stuff. ie ifconfig
 857 *      loads the devconfigure module does its configuring and unloads it.
 858 *      There's a good 20K of config code hanging around the kernel.
 859 */
 860
 861static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 862{
 863        struct sock *sk = sock->sk;
 864        int err;
 865        int pid;
 866
 867        switch(cmd) 
 868        {
 869                case FIOSETOWN:
 870                case SIOCSPGRP:
 871                        err = get_user(pid, (int *) arg);
 872                        if (err)
 873                                return err; 
 874                        if (current->pid != pid && current->pgrp != -pid && 
 875                            !capable(CAP_NET_ADMIN))
 876                                return -EPERM;
 877                        sk->proc = pid;
 878                        return(0);
 879                case FIOGETOWN:
 880                case SIOCGPGRP:
 881                        return put_user(sk->proc, (int *)arg);
 882                case SIOCGSTAMP:
 883                        if(sk->stamp.tv_sec==0)
 884                                return -ENOENT;
 885                        err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval));
 886                        if (err)
 887                                err = -EFAULT;
 888                        return err;
 889                case SIOCADDRT:
 890                case SIOCDELRT:
 891                case SIOCRTMSG:
 892                        return(ip_rt_ioctl(cmd,(void *) arg));
 893                case SIOCDARP:
 894                case SIOCGARP:
 895                case SIOCSARP:
 896                        return(arp_ioctl(cmd,(void *) arg));
 897                case SIOCDRARP:
 898                case SIOCGRARP:
 899                case SIOCSRARP:
 900#ifdef CONFIG_KMOD
 901                        if (rarp_ioctl_hook == NULL)
 902                                request_module("rarp");
 903#endif
 904                        if (rarp_ioctl_hook != NULL)
 905                                return(rarp_ioctl_hook(cmd,(void *) arg));
 906                case SIOCGIFADDR:
 907                case SIOCSIFADDR:
 908                case SIOCGIFBRDADDR:
 909                case SIOCSIFBRDADDR:
 910                case SIOCGIFNETMASK:
 911                case SIOCSIFNETMASK:
 912                case SIOCGIFDSTADDR:
 913                case SIOCSIFDSTADDR:
 914                case SIOCSIFPFLAGS:     
 915                case SIOCGIFPFLAGS:     
 916                case SIOCSIFFLAGS:
 917                        return(devinet_ioctl(cmd,(void *) arg));
 918                case SIOCGIFBR:
 919                case SIOCSIFBR:
 920#ifdef CONFIG_BRIDGE            
 921                        return(br_ioctl(cmd,(void *) arg));
 922#else
 923                        return -ENOPKG;
 924#endif                                          
 925                        
 926                case SIOCADDDLCI:
 927                case SIOCDELDLCI:
 928#ifdef CONFIG_DLCI
 929                        return(dlci_ioctl(cmd, (void *) arg));
 930#endif
 931
 932#ifdef CONFIG_DLCI_MODULE
 933
 934#ifdef CONFIG_KMOD
 935                        if (dlci_ioctl_hook == NULL)
 936                                request_module("dlci");
 937#endif
 938
 939                        if (dlci_ioctl_hook)
 940                                return((*dlci_ioctl_hook)(cmd, (void *) arg));
 941#endif
 942                        return -ENOPKG;
 943
 944                default:
 945                        if ((cmd >= SIOCDEVPRIVATE) &&
 946                            (cmd <= (SIOCDEVPRIVATE + 15)))
 947                                return(dev_ioctl(cmd,(void *) arg));
 948
 949#ifdef CONFIG_NET_RADIO
 950                        if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
 951                                return(dev_ioctl(cmd,(void *) arg));
 952#endif
 953
 954                        if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD)
 955                                return(dev_ioctl(cmd,(void *) arg));            
 956                        return err;
 957        }
 958        /*NOTREACHED*/
 959        return(0);
 960}
 961
 962struct proto_ops inet_stream_ops = {
 963        PF_INET,
 964
 965        sock_no_dup,
 966        inet_release,
 967        inet_bind,
 968        inet_stream_connect,
 969        sock_no_socketpair,
 970        inet_accept,
 971        inet_getname, 
 972        inet_poll,
 973        inet_ioctl,
 974        inet_listen,
 975        inet_shutdown,
 976        inet_setsockopt,
 977        inet_getsockopt,
 978        sock_no_fcntl,
 979        inet_sendmsg,
 980        inet_recvmsg
 981};
 982
 983struct proto_ops inet_dgram_ops = {
 984        PF_INET,
 985
 986        sock_no_dup,
 987        inet_release,
 988        inet_bind,
 989        inet_dgram_connect,
 990        sock_no_socketpair,
 991        sock_no_accept,
 992        inet_getname, 
 993        datagram_poll,
 994        inet_ioctl,
 995        sock_no_listen,
 996        inet_shutdown,
 997        inet_setsockopt,
 998        inet_getsockopt,
 999        sock_no_fcntl,
1000        inet_sendmsg,
1001        inet_recvmsg
1002};
1003
1004struct net_proto_family inet_family_ops = {
1005        PF_INET,
1006        inet_create
1007};
1008
1009
1010#ifdef CONFIG_PROC_FS
1011#ifdef CONFIG_INET_RARP
1012static struct proc_dir_entry proc_net_rarp = {
1013        PROC_NET_RARP, 4, "rarp",
1014        S_IFREG | S_IRUGO, 1, 0, 0,
1015        0, &proc_net_inode_operations,
1016        rarp_get_info
1017};
1018#endif          /* RARP */
1019static struct proc_dir_entry proc_net_raw = {
1020        PROC_NET_RAW, 3, "raw",
1021        S_IFREG | S_IRUGO, 1, 0, 0,
1022        0, &proc_net_inode_operations,
1023        raw_get_info
1024};
1025static struct proc_dir_entry proc_net_netstat = {
1026        PROC_NET_NETSTAT, 7, "netstat",
1027        S_IFREG | S_IRUGO, 1, 0, 0,
1028        0, &proc_net_inode_operations,
1029        netstat_get_info
1030};
1031static struct proc_dir_entry proc_net_snmp = {
1032        PROC_NET_SNMP, 4, "snmp",
1033        S_IFREG | S_IRUGO, 1, 0, 0,
1034        0, &proc_net_inode_operations,
1035        snmp_get_info
1036};
1037static struct proc_dir_entry proc_net_sockstat = {
1038        PROC_NET_SOCKSTAT, 8, "sockstat",
1039        S_IFREG | S_IRUGO, 1, 0, 0,
1040        0, &proc_net_inode_operations,
1041        afinet_get_info
1042};
1043static struct proc_dir_entry proc_net_tcp = {
1044        PROC_NET_TCP, 3, "tcp",
1045        S_IFREG | S_IRUGO, 1, 0, 0,
1046        0, &proc_net_inode_operations,
1047        tcp_get_info
1048};
1049static struct proc_dir_entry proc_net_udp = {
1050        PROC_NET_UDP, 3, "udp",
1051        S_IFREG | S_IRUGO, 1, 0, 0,
1052        0, &proc_net_inode_operations,
1053        udp_get_info
1054};
1055#endif          /* CONFIG_PROC_FS */
1056
1057extern void tcp_init(void);
1058extern void tcp_v4_init(struct net_proto_family *);
1059
1060
1061/*
1062 *      Called by socket.c on kernel startup.  
1063 */
1064 
1065__initfunc(void inet_proto_init(struct net_proto *pro))
1066{
1067        struct sk_buff *dummy_skb;
1068        struct inet_protocol *p;
1069
1070        printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n");
1071
1072        if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb))
1073        {
1074                printk(KERN_CRIT "inet_proto_init: panic\n");
1075                return;
1076        }
1077
1078        /*
1079         *      Tell SOCKET that we are alive... 
1080         */
1081   
1082        (void) sock_register(&inet_family_ops);
1083
1084        /*
1085         *      Add all the protocols. 
1086         */
1087
1088        printk(KERN_INFO "IP Protocols: ");
1089        for(p = inet_protocol_base; p != NULL;) 
1090        {
1091                struct inet_protocol *tmp = (struct inet_protocol *) p->next;
1092                inet_add_protocol(p);
1093                printk("%s%s",p->name,tmp?", ":"\n");
1094                p = tmp;
1095        }
1096
1097        /*
1098         *      Set the ARP module up
1099         */
1100
1101        arp_init();
1102
1103        /*
1104         *      Set the IP module up
1105         */
1106
1107        ip_init();
1108
1109        tcp_v4_init(&inet_family_ops);
1110
1111        /* Setup TCP slab cache for open requests. */
1112        tcp_init();
1113
1114
1115        /*
1116         *      Set the ICMP layer up
1117         */
1118
1119        icmp_init(&inet_family_ops);
1120
1121        /* I wish inet_add_protocol had no constructor hook...
1122           I had to move IPIP from net/ipv4/protocol.c :-( --ANK
1123         */
1124#ifdef CONFIG_NET_IPIP
1125        ipip_init();
1126#endif
1127#ifdef CONFIG_NET_IPGRE
1128        ipgre_init();
1129#endif
1130
1131        /*
1132         *      Set the firewalling up
1133         */
1134#if defined(CONFIG_IP_FIREWALL)
1135        ip_fw_init();
1136#endif
1137
1138#ifdef CONFIG_IP_MASQUERADE
1139        ip_masq_init();
1140#endif
1141        
1142        /*
1143         *      Initialise the multicast router
1144         */
1145#if defined(CONFIG_IP_MROUTE)
1146        ip_mr_init();
1147#endif
1148
1149#ifdef CONFIG_INET_RARP
1150        rarp_ioctl_hook = rarp_ioctl;
1151#endif
1152        /*
1153         *      Create all the /proc entries.
1154         */
1155
1156#ifdef CONFIG_PROC_FS
1157#ifdef CONFIG_INET_RARP
1158        proc_net_register(&proc_net_rarp);
1159#endif          /* RARP */
1160        proc_net_register(&proc_net_raw);
1161        proc_net_register(&proc_net_snmp);
1162        proc_net_register(&proc_net_netstat);
1163        proc_net_register(&proc_net_sockstat);
1164        proc_net_register(&proc_net_tcp);
1165        proc_net_register(&proc_net_udp);
1166#endif          /* CONFIG_PROC_FS */
1167}
1168
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.