linux-old/include/net/sock.h History
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Definitions for the AF_INET socket handler.
   7 *
   8 * Version:     @(#)sock.h      1.0.4   05/13/93
   9 *
  10 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  13 *              Florian La Roche <flla@stud.uni-sb.de>
  14 *
  15 * Fixes:
  16 *              Alan Cox        :       Volatiles in skbuff pointers. See
  17 *                                      skbuff comments. May be overdone,
  18 *                                      better to prove they can be removed
  19 *                                      than the reverse.
  20 *              Alan Cox        :       Added a zapped field for tcp to note
  21 *                                      a socket is reset and must stay shut up
  22 *              Alan Cox        :       New fields for options
  23 *      Pauline Middelink       :       identd support
  24 *              Alan Cox        :       Eliminate low level recv/recvfrom
  25 *              David S. Miller :       New socket lookup architecture.
  26 *              Steve Whitehouse:       Default routines for sock_ops
  27 *
  28 *              This program is free software; you can redistribute it and/or
  29 *              modify it under the terms of the GNU General Public License
  30 *              as published by the Free Software Foundation; either version
  31 *              2 of the License, or (at your option) any later version.
  32 */
  33#ifndef _SOCK_H
  34#define _SOCK_H
  35
  36#include <linux/config.h>
  37#include <linux/timer.h>
  38#include <linux/cache.h>
  39#include <linux/in.h>           /* struct sockaddr_in */
  40
  41#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
  42#include <linux/in6.h>          /* struct sockaddr_in6 */
  43#include <linux/ipv6.h>         /* dest_cache, inet6_options */
  44#include <linux/icmpv6.h>
  45#include <net/if_inet6.h>       /* struct ipv6_mc_socklist */
  46#endif
  47
  48#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
  49#include <linux/icmp.h>
  50#endif
  51#include <linux/tcp.h>          /* struct tcphdr */
  52#if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
  53#include <net/sctp/structs.h>   /* struct sctp_opt */
  54#endif
  55
  56#include <linux/netdevice.h>
  57#include <linux/skbuff.h>       /* struct sk_buff */
  58#include <net/protocol.h>               /* struct inet_protocol */
  59#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
  60#include <net/x25.h>
  61#endif
  62#if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
  63#include <linux/if_wanpipe.h>
  64#endif
  65
  66#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
  67#include <net/ax25.h>
  68#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
  69#include <net/netrom.h>
  70#endif
  71#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
  72#include <net/rose.h>
  73#endif
  74#endif
  75
  76#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
  77#include <linux/if_pppox.h>
  78#include <linux/ppp_channel.h>   /* struct ppp_channel */
  79#endif
  80
  81#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
  82#if defined(CONFIG_SPX) || defined(CONFIG_SPX_MODULE)
  83#include <net/spx.h>
  84#else
  85#include <net/ipx.h>
  86#endif /* CONFIG_SPX */
  87#endif /* CONFIG_IPX */
  88
  89#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
  90#include <linux/atalk.h>
  91#endif
  92
  93#if defined(CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
  94#include <net/dn.h>
  95#endif
  96
  97#if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
  98#include <net/irda/irda.h>
  99#endif
 100
 101#if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
 102struct atm_vcc;
 103#endif
 104
 105#ifdef CONFIG_FILTER
 106#include <linux/filter.h>
 107#endif
 108
 109#include <asm/atomic.h>
 110#include <net/dst.h>
 111
 112
 113/* The AF_UNIX specific socket options */
 114struct unix_opt {
 115        struct unix_address     *addr;
 116        struct dentry *         dentry;
 117        struct vfsmount *       mnt;
 118        struct semaphore        readsem;
 119        struct sock *           other;
 120        struct sock **          list;
 121        struct sock *           gc_tree;
 122        atomic_t                inflight;
 123        rwlock_t                lock;
 124        wait_queue_head_t       peer_wait;
 125};
 126
 127
 128/* Once the IPX ncpd patches are in these are going into protinfo. */
 129#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
 130struct ipx_opt {
 131        ipx_address             dest_addr;
 132        ipx_interface           *intrfc;
 133        unsigned short          port;
 134#ifdef CONFIG_IPX_INTERN
 135        unsigned char           node[IPX_NODE_LEN];
 136#endif
 137        unsigned short          type;
 138/* 
 139 * To handle special ncp connection-handling sockets for mars_nwe,
 140 * the connection number must be stored in the socket.
 141 */
 142        unsigned short          ipx_ncp_conn;
 143};
 144#endif
 145
 146#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 147struct ipv6_pinfo {
 148        struct in6_addr         saddr;
 149        struct in6_addr         rcv_saddr;
 150        struct in6_addr         daddr;
 151        struct in6_addr         *daddr_cache;
 152
 153        __u32                   flow_label;
 154        __u32                   frag_size;
 155        int                     hop_limit;
 156        int                     mcast_hops;
 157        int                     mcast_oif;
 158
 159        /* pktoption flags */
 160        union {
 161                struct {
 162                        __u8    srcrt:2,
 163                                rxinfo:1,
 164                                rxhlim:1,
 165                                hopopts:1,
 166                                dstopts:1,
 167                                authhdr:1,
 168                                rxflow:1;
 169                } bits;
 170                __u8            all;
 171        } rxopt;
 172
 173        /* sockopt flags */
 174        __u8                    mc_loop:1,
 175                                recverr:1,
 176                                sndflow:1,
 177                                pmtudisc:2,
 178                                ipv6only:1;
 179
 180        struct ipv6_mc_socklist *ipv6_mc_list;
 181        struct ipv6_ac_socklist *ipv6_ac_list;
 182        struct ipv6_fl_socklist *ipv6_fl_list;
 183        __u32                   dst_cookie;
 184
 185        struct ipv6_txoptions   *opt;
 186        struct sk_buff          *pktoptions;
 187};
 188
 189struct raw6_opt {
 190        __u32                   checksum;       /* perform checksum */
 191        __u32                   offset;         /* checksum offset  */
 192
 193        struct icmp6_filter     filter;
 194};
 195
 196#define __ipv6_only_sock(sk)    ((sk)->net_pinfo.af_inet6.ipv6only)
 197#define ipv6_only_sock(sk)      ((sk)->family == PF_INET6 && \
 198                                 (sk)->net_pinfo.af_inet6.ipv6only)
 199#else
 200#define __ipv6_only_sock(sk)    0
 201#define ipv6_only_sock(sk)      0
 202#endif /* IPV6 */
 203
 204#if defined(CONFIG_INET) || defined(CONFIG_INET_MODULE)
 205struct raw_opt {
 206        struct icmp_filter      filter;
 207};
 208#endif
 209
 210#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 211struct inet_opt
 212{
 213        int                     ttl;                    /* TTL setting */
 214        int                     tos;                    /* TOS */
 215        unsigned                cmsg_flags;
 216        struct ip_options       *opt;
 217        unsigned char           hdrincl;                /* Include headers ? */
 218        __u8                    mc_ttl;                 /* Multicasting TTL */
 219        __u8                    mc_loop;                /* Loopback */
 220        unsigned                recverr : 1,
 221                                freebind : 1;
 222        __u16                   id;                     /* ID counter for DF pkts */
 223        __u8                    pmtudisc;
 224        int                     mc_index;               /* Multicast device index */
 225        __u32                   mc_addr;
 226        struct ip_mc_socklist   *mc_list;               /* Group array */
 227};
 228#endif
 229
 230#if defined(CONFIG_PPPOE) || defined (CONFIG_PPPOE_MODULE)
 231struct pppoe_opt
 232{
 233        struct net_device      *dev;      /* device associated with socket*/
 234        struct pppoe_addr       pa;       /* what this socket is bound to*/
 235        struct sockaddr_pppox   relay;    /* what socket data will be
 236                                             relayed to (PPPoE relaying) */
 237};
 238
 239struct pppox_opt
 240{
 241        struct ppp_channel      chan;
 242        struct sock             *sk;
 243        struct pppox_opt        *next;    /* for hash table */
 244        union {
 245                struct pppoe_opt pppoe;
 246        } proto;
 247};
 248#define pppoe_dev       proto.pppoe.dev
 249#define pppoe_pa        proto.pppoe.pa
 250#define pppoe_relay     proto.pppoe.relay
 251#endif
 252
 253/* This defines a selective acknowledgement block. */
 254struct tcp_sack_block {
 255        __u32   start_seq;
 256        __u32   end_seq;
 257};
 258
 259enum tcp_congestion_algo {
 260        TCP_RENO=0,
 261        TCP_VEGAS,
 262        TCP_WESTWOOD,
 263        TCP_BIC,
 264};
 265 
 266struct tcp_opt {
 267        int     tcp_header_len; /* Bytes of tcp header to send          */
 268
 269/*
 270 *      Header prediction flags
 271 *      0x5?10 << 16 + snd_wnd in net byte order
 272 */
 273        __u32   pred_flags;
 274
 275/*
 276 *      RFC793 variables by their proper names. This means you can
 277 *      read the code and the spec side by side (and laugh ...)
 278 *      See RFC793 and RFC1122. The RFC writes these in capitals.
 279 */
 280        __u32   rcv_nxt;        /* What we want to receive next         */
 281        __u32   snd_nxt;        /* Next sequence we send                */
 282
 283        __u32   snd_una;        /* First byte we want an ack for        */
 284        __u32   snd_sml;        /* Last byte of the most recently transmitted small packet */
 285        __u32   rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
 286        __u32   lsndtime;       /* timestamp of last sent data packet (for restart window) */
 287
 288        /* Delayed ACK control data */
 289        struct {
 290                __u8    pending;        /* ACK is pending */
 291                __u8    quick;          /* Scheduled number of quick acks       */
 292                __u8    pingpong;       /* The session is interactive           */
 293                __u8    blocked;        /* Delayed ACK was blocked by socket lock*/
 294                __u32   ato;            /* Predicted tick of soft clock         */
 295                unsigned long timeout;  /* Currently scheduled timeout          */
 296                __u32   lrcvtime;       /* timestamp of last received data packet*/
 297                __u16   last_seg_size;  /* Size of last incoming segment        */
 298                __u16   rcv_mss;        /* MSS used for delayed ACK decisions   */ 
 299        } ack;
 300
 301        /* Data for direct copy to user */
 302        struct {
 303                struct sk_buff_head     prequeue;
 304                struct task_struct      *task;
 305                struct iovec            *iov;
 306                int                     memory;
 307                int                     len;
 308        } ucopy;
 309
 310        __u32   snd_wl1;        /* Sequence for window update           */
 311        __u32   snd_wnd;        /* The window we expect to receive      */
 312        __u32   max_window;     /* Maximal window ever seen from peer   */
 313        __u32   pmtu_cookie;    /* Last pmtu seen by socket             */
 314        __u16   mss_cache;      /* Cached effective mss, not including SACKS */
 315        __u16   mss_clamp;      /* Maximal mss, negotiated at connection setup */
 316        __u16   ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
 317        __u8    ca_state;       /* State of fast-retransmit machine     */
 318        __u8    retransmits;    /* Number of unrecovered RTO timeouts.  */
 319
 320        __u8    reordering;     /* Packet reordering metric.            */
 321        __u8    queue_shrunk;   /* Write queue has been shrunk recently.*/
 322        __u8    defer_accept;   /* User waits for some data after accept() */
 323
 324/* RTT measurement */
 325        __u8    backoff;        /* backoff                              */
 326        __u32   srtt;           /* smothed round trip time << 3         */
 327        __u32   mdev;           /* medium deviation                     */
 328        __u32   mdev_max;       /* maximal mdev for the last rtt period */
 329        __u32   rttvar;         /* smoothed mdev_max                    */
 330        __u32   rtt_seq;        /* sequence number to update rttvar     */
 331        __u32   rto;            /* retransmit timeout                   */
 332
 333        __u32   packets_out;    /* Packets which are "in flight"        */
 334        __u32   left_out;       /* Packets which leaved network         */
 335        __u32   retrans_out;    /* Retransmitted packets out            */
 336
 337
 338/*
 339 *      Slow start and congestion control (see also Nagle, and Karn & Partridge)
 340 */
 341        __u32   snd_ssthresh;   /* Slow start size threshold            */
 342        __u32   snd_cwnd;       /* Sending congestion window            */
 343        __u16   snd_cwnd_cnt;   /* Linear increase counter              */
 344        __u16   snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 345        __u32   snd_cwnd_used;
 346        __u32   snd_cwnd_stamp;
 347
 348        /* Two commonly used timers in both sender and receiver paths. */
 349        unsigned long           timeout;
 350        struct timer_list       retransmit_timer;       /* Resend (no ack)      */
 351        struct timer_list       delack_timer;           /* Ack delay            */
 352
 353        struct sk_buff_head     out_of_order_queue; /* Out of order segments go here */
 354
 355        struct tcp_func         *af_specific;   /* Operations which are AF_INET{4,6} specific   */
 356        struct sk_buff          *send_head;     /* Front of stuff to transmit                   */
 357        struct page             *sndmsg_page;   /* Cached page for sendmsg                      */
 358        u32                     sndmsg_off;     /* Cached offset for sendmsg                    */
 359
 360        __u32   rcv_wnd;        /* Current receiver window              */
 361        __u32   rcv_wup;        /* rcv_nxt on last window update sent   */
 362        __u32   write_seq;      /* Tail(+1) of data held in tcp send buffer */
 363        __u32   pushed_seq;     /* Last pushed seq, required to talk to windows */
 364        __u32   copied_seq;     /* Head of yet unread data              */
 365/*
 366 *      Options received (usually on last packet, some only on SYN packets).
 367 */
 368        char    tstamp_ok,      /* TIMESTAMP seen on SYN packet         */
 369                wscale_ok,      /* Wscale seen on SYN packet            */
 370                sack_ok;        /* SACK seen on SYN packet              */
 371        char    saw_tstamp;     /* Saw TIMESTAMP on last packet         */
 372        __u8    snd_wscale;     /* Window scaling received from sender  */
 373        __u8    rcv_wscale;     /* Window scaling to send to receiver   */
 374        __u8    nonagle;        /* Disable Nagle algorithm?             */
 375        __u8    keepalive_probes; /* num of allowed keep alive probes   */
 376
 377/*      PAWS/RTTM data  */
 378        __u32   rcv_tsval;      /* Time stamp value                     */
 379        __u32   rcv_tsecr;      /* Time stamp echo reply                */
 380        __u32   ts_recent;      /* Time stamp to echo next              */
 381        long    ts_recent_stamp;/* Time we stored ts_recent (for aging) */
 382
 383/*      SACKs data      */
 384        __u16   user_mss;       /* mss requested by user in ioctl */
 385        __u8    dsack;          /* D-SACK is scheduled                  */
 386        __u8    eff_sacks;      /* Size of SACK array to send with next packet */
 387        struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
 388        struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
 389
 390        __u32   window_clamp;   /* Maximal window to advertise          */
 391        __u32   rcv_ssthresh;   /* Current window clamp                 */
 392        __u8    probes_out;     /* unanswered 0 window probes           */
 393        __u8    num_sacks;      /* Number of SACK blocks                */
 394        __u16   advmss;         /* Advertised MSS                       */
 395
 396        __u8    syn_retries;    /* num of allowed syn retries */
 397        __u8    ecn_flags;      /* ECN status bits.                     */
 398        __u16   prior_ssthresh; /* ssthresh saved at recovery start     */
 399        __u32   lost_out;       /* Lost packets                         */
 400        __u32   sacked_out;     /* SACK'd packets                       */
 401        __u32   fackets_out;    /* FACK'd packets                       */
 402        __u32   high_seq;       /* snd_nxt at onset of congestion       */
 403
 404        __u32   retrans_stamp;  /* Timestamp of the last retransmit,
 405                                 * also used in SYN-SENT to remember stamp of
 406                                 * the first SYN. */
 407        __u32   undo_marker;    /* tracking retrans started here. */
 408        int     undo_retrans;   /* number of undoable retransmissions. */
 409        __u32   urg_seq;        /* Seq of received urgent pointer */
 410        __u16   urg_data;       /* Saved octet of OOB data and control flags */
 411        __u8    pending;        /* Scheduled timer event        */
 412        __u8    urg_mode;       /* In urgent mode               */
 413        __u32   snd_up;         /* Urgent pointer               */
 414
 415        /* The syn_wait_lock is necessary only to avoid tcp_get_info having
 416         * to grab the main lock sock while browsing the listening hash
 417         * (otherwise it's deadlock prone).
 418         * This lock is acquired in read mode only from tcp_get_info() and
 419         * it's acquired in write mode _only_ from code that is actively
 420         * changing the syn_wait_queue. All readers that are holding
 421         * the master sock lock don't need to grab this lock in read mode
 422         * too as the syn_wait_queue writes are always protected from
 423         * the main sock lock.
 424         */
 425        rwlock_t                syn_wait_lock;
 426        struct tcp_listen_opt   *listen_opt;
 427
 428        /* FIFO of established children */
 429        struct open_request     *accept_queue;
 430        struct open_request     *accept_queue_tail;
 431
 432        int                     write_pending;  /* A write to socket waits to start. */
 433
 434        unsigned int            keepalive_time;   /* time before keep alive takes place */
 435        unsigned int            keepalive_intvl;  /* time interval between keep alive probes */
 436        int                     linger2;
 437
 438        __u8                    adv_cong;    /* Using Vegas, Westwood, or BIC */
 439        __u8                    frto_counter; /* Number of new acks after RTO */
 440        __u32                   frto_highmark; /* snd_nxt when RTO occurred */
 441
 442        unsigned long last_synq_overflow; 
 443
 444/* Receiver side RTT estimation */
 445        struct {
 446                __u32   rtt;
 447                __u32   seq;
 448                __u32   time;
 449        } rcv_rtt_est;
 450
 451/* Receiver queue space */
 452        struct {
 453                int     space;
 454                __u32   seq;
 455                __u32   time;
 456        } rcvq_space;
 457
 458/* TCP Westwood structure */
 459        struct {
 460                __u32    bw_ns_est;        /* first bandwidth estimation..not too smoothed 8) */
 461                __u32    bw_est;           /* bandwidth estimate */
 462                __u32    rtt_win_sx;       /* here starts a new evaluation... */
 463                __u32    bk;
 464                __u32    snd_una;          /* used for evaluating the number of acked bytes */
 465                __u32    cumul_ack;
 466                __u32    accounted;
 467                __u32    rtt;
 468                __u32    rtt_min;          /* minimum observed RTT */
 469        } westwood;
 470
 471/* Vegas variables */
 472        struct {
 473                __u32   beg_snd_nxt;    /* right edge during last RTT */
 474                __u32   beg_snd_una;    /* left edge  during last RTT */
 475                __u32   beg_snd_cwnd;   /* saves the size of the cwnd */
 476                __u8    doing_vegas_now;/* if true, do vegas for this RTT */
 477                __u16   cntRTT;         /* # of RTTs measured within last RTT */
 478                __u32   minRTT;         /* min of RTTs measured within last RTT (in usec) */
 479                __u32   baseRTT;        /* the min of all Vegas RTT measurements seen (in usec) */
 480        } vegas;
 481
 482        /* BI TCP Parameters */
 483        struct {
 484                __u32   cnt;            /* increase cwnd by 1 after this number of ACKs */
 485                __u32   last_max_cwnd;  /* last maximium snd_cwnd */
 486                __u32   last_cwnd;      /* the last snd_cwnd */
 487                __u32   last_stamp;     /* time when updated last_cwnd */
 488        } bictcp;
 489};
 490
 491        
 492/*
 493 * This structure really needs to be cleaned up.
 494 * Most of it is for TCP, and not used by any of
 495 * the other protocols.
 496 */
 497
 498/*
 499 * The idea is to start moving to a newer struct gradualy
 500 * 
 501 * IMHO the newer struct should have the following format:
 502 * 
 503 *      struct sock {
 504 *              sockmem [mem, proto, callbacks]
 505 *
 506 *              union or struct {
 507 *                      ax25;
 508 *              } ll_pinfo;
 509 *      
 510 *              union {
 511 *                      ipv4;
 512 *                      ipv6;
 513 *                      ipx;
 514 *                      netrom;
 515 *                      rose;
 516 *                      x25;
 517 *              } net_pinfo;
 518 *
 519 *              union {
 520 *                      tcp;
 521 *                      udp;
 522 *                      spx;
 523 *                      netrom;
 524 *              } tp_pinfo;
 525 *
 526 *      }
 527 *
 528 * The idea failed because IPv6 transition asssumes dual IP/IPv6 sockets.
 529 * So, net_pinfo is IPv6 are really, and protinfo unifies all another
 530 * private areas.
 531 */
 532
 533/* Define this to get the sk->debug debugging facility. */
 534#define SOCK_DEBUGGING
 535#ifdef SOCK_DEBUGGING
 536#define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG msg); } while (0)
 537#else
 538#define SOCK_DEBUG(sk, msg...) do { } while (0)
 539#endif
 540
 541/* This is the per-socket lock.  The spinlock provides a synchronization
 542 * between user contexts and software interrupt processing, whereas the
 543 * mini-semaphore synchronizes multiple users amongst themselves.
 544 */
 545typedef struct {
 546        spinlock_t              slock;
 547        unsigned int            users;
 548        wait_queue_head_t       wq;
 549} socket_lock_t;
 550
 551#define sock_lock_init(__sk) \
 552do {    spin_lock_init(&((__sk)->lock.slock)); \
 553        (__sk)->lock.users = 0; \
 554        init_waitqueue_head(&((__sk)->lock.wq)); \
 555} while(0)
 556
 557struct sock {
 558        /* Socket demultiplex comparisons on incoming packets. */
 559        __u32                   daddr;          /* Foreign IPv4 addr                    */
 560        __u32                   rcv_saddr;      /* Bound local IPv4 addr                */
 561        __u16                   dport;          /* Destination port                     */
 562        unsigned short          num;            /* Local port                           */
 563        int                     bound_dev_if;   /* Bound device index if != 0           */
 564
 565        /* Main hash linkage for various protocol lookup tables. */
 566        struct sock             *next;
 567        struct sock             **pprev;
 568        struct sock             *bind_next;
 569        struct sock             **bind_pprev;
 570
 571        volatile unsigned char  state,          /* Connection state                     */
 572                                zapped;         /* In ax25 & ipx means not linked       */
 573        __u16                   sport;          /* Source port                          */
 574
 575        unsigned short          family;         /* Address family                       */
 576        unsigned char           reuse;          /* SO_REUSEADDR setting                 */
 577        unsigned char           shutdown;
 578        atomic_t                refcnt;         /* Reference count                      */
 579
 580        socket_lock_t           lock;           /* Synchronizer...                      */
 581        int                     rcvbuf;         /* Size of receive buffer in bytes      */
 582
 583        wait_queue_head_t       *sleep;         /* Sock wait queue                      */
 584        struct dst_entry        *dst_cache;     /* Destination cache                    */
 585        rwlock_t                dst_lock;
 586        atomic_t                rmem_alloc;     /* Receive queue bytes committed        */
 587        struct sk_buff_head     receive_queue;  /* Incoming packets                     */
 588        atomic_t                wmem_alloc;     /* Transmit queue bytes committed       */
 589        struct sk_buff_head     write_queue;    /* Packet sending queue                 */
 590        atomic_t                omem_alloc;     /* "o" is "option" or "other" */
 591        int                     wmem_queued;    /* Persistent queue size */
 592        int                     forward_alloc;  /* Space allocated forward. */
 593        __u32                   saddr;          /* Sending source                       */
 594        unsigned int            allocation;     /* Allocation mode                      */
 595        int                     sndbuf;         /* Size of send buffer in bytes         */
 596        struct sock             *prev;
 597
 598        /* Not all are volatile, but some are, so we might as well say they all are.
 599         * XXX Make this a flag word -DaveM
 600         */
 601        volatile char           dead,
 602                                done,
 603                                urginline,
 604                                keepopen,
 605                                linger,
 606                                destroy,
 607                                no_check,
 608                                broadcast,
 609                                bsdism;
 610        unsigned char           debug;
 611        unsigned char           rcvtstamp;
 612        unsigned char           use_write_queue;
 613        unsigned char           userlocks;
 614        /* Hole of 3 bytes. Try to pack. */
 615        int                     route_caps;
 616        int                     proc;
 617        unsigned long           lingertime;
 618
 619        int                     hashent;
 620        struct sock             *pair;
 621
 622        /* The backlog queue is special, it is always used with
 623         * the per-socket spinlock held and requires low latency
 624         * access.  Therefore we special case it's implementation.
 625         */
 626        struct {
 627                struct sk_buff *head;
 628                struct sk_buff *tail;
 629        } backlog;
 630
 631        rwlock_t                callback_lock;
 632
 633        /* Error queue, rarely used. */
 634        struct sk_buff_head     error_queue;
 635
 636        struct proto            *prot;
 637
 638#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 639        union {
 640                struct ipv6_pinfo       af_inet6;
 641        } net_pinfo;
 642#endif
 643
 644        union {
 645                struct tcp_opt          af_tcp;
 646#if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
 647                struct sctp_opt         af_sctp;
 648#endif
 649#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 650                struct raw_opt          tp_raw4;
 651#endif
 652#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 653                struct raw6_opt         tp_raw;
 654#endif /* CONFIG_IPV6 */
 655#if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
 656                struct spx_opt          af_spx;
 657#endif /* CONFIG_SPX */
 658
 659        } tp_pinfo;
 660
 661        int                     err, err_soft;  /* Soft holds errors that don't
 662                                                   cause failure but are the cause
 663                                                   of a persistent failure not just
 664                                                   'timed out' */
 665        unsigned short          ack_backlog;
 666        unsigned short          max_ack_backlog;
 667        __u32                   priority;
 668        unsigned short          type;
 669        unsigned char           localroute;     /* Route locally only */
 670        unsigned char           protocol;
 671        struct ucred            peercred;
 672        int                     rcvlowat;
 673        long                    rcvtimeo;
 674        long                    sndtimeo;
 675
 676#ifdef CONFIG_FILTER
 677        /* Socket Filtering Instructions */
 678        struct sk_filter        *filter;
 679#endif /* CONFIG_FILTER */
 680
 681        /* This is where all the private (optional) areas that don't
 682         * overlap will eventually live. 
 683         */
 684        union {
 685                void *destruct_hook;
 686                struct unix_opt af_unix;
 687#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 688                struct inet_opt af_inet;
 689#endif
 690#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
 691                struct atalk_sock       af_at;
 692#endif
 693#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
 694                struct ipx_opt          af_ipx;
 695#endif
 696#if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
 697                struct dn_scp           dn;
 698#endif
 699#if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
 700                struct packet_opt       *af_packet;
 701#endif
 702#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
 703                x25_cb                  *x25;
 704#endif
 705#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
 706                ax25_cb                 *ax25;
 707#endif
 708#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
 709                nr_cb                   *nr;
 710#endif
 711#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
 712                rose_cb                 *rose;
 713#endif
 714#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
 715                struct pppox_opt        *pppox;
 716#endif
 717                struct netlink_opt      *af_netlink;
 718#if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
 719                struct econet_opt       *af_econet;
 720#endif
 721#if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
 722                struct atm_vcc          *af_atm;
 723#endif
 724#if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
 725                struct irda_sock        *irda;
 726#endif
 727#if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
 728               struct wanpipe_opt      *af_wanpipe;
 729#endif
 730        } protinfo;             
 731
 732
 733        /* This part is used for the timeout functions. */
 734        struct timer_list       timer;          /* This is the sock cleanup timer. */
 735        struct timeval          stamp;
 736
 737        /* Identd and reporting IO signals */
 738        struct socket           *socket;
 739
 740        /* RPC layer private data */
 741        void                    *user_data;
 742  
 743        /* Callbacks */
 744        void                    (*state_change)(struct sock *sk);
 745        void                    (*data_ready)(struct sock *sk,int bytes);
 746        void                    (*write_space)(struct sock *sk);
 747        void                    (*error_report)(struct sock *sk);
 748
 749        int                     (*backlog_rcv) (struct sock *sk,
 750                                                struct sk_buff *skb);  
 751        void                    (*destruct)(struct sock *sk);
 752};
 753
 754/* The per-socket spinlock must be held here. */
 755#define sk_add_backlog(__sk, __skb)                     \
 756do {    if((__sk)->backlog.tail == NULL) {              \
 757                (__sk)->backlog.head =                  \
 758                     (__sk)->backlog.tail = (__skb);    \
 759        } else {                                        \
 760                ((__sk)->backlog.tail)->next = (__skb); \
 761                (__sk)->backlog.tail = (__skb);         \
 762        }                                               \
 763        (__skb)->next = NULL;                           \
 764} while(0)
 765
 766/* IP protocol blocks we attach to sockets.
 767 * socket layer -> transport layer interface
 768 * transport -> network interface is defined by struct inet_proto
 769 */
 770struct proto {
 771        void                    (*close)(struct sock *sk, 
 772                                        long timeout);
 773        int                     (*connect)(struct sock *sk,
 774                                        struct sockaddr *uaddr, 
 775                                        int addr_len);
 776        int                     (*disconnect)(struct sock *sk, int flags);
 777
 778        struct sock *           (*accept) (struct sock *sk, int flags, int *err);
 779
 780        int                     (*ioctl)(struct sock *sk, int cmd,
 781                                         unsigned long arg);
 782        int                     (*init)(struct sock *sk);
 783        int                     (*destroy)(struct sock *sk);
 784        void                    (*shutdown)(struct sock *sk, int how);
 785        int                     (*setsockopt)(struct sock *sk, int level, 
 786                                        int optname, char *optval, int optlen);
 787        int                     (*getsockopt)(struct sock *sk, int level, 
 788                                        int optname, char *optval, 
 789                                        int *option);    
 790        int                     (*sendmsg)(struct sock *sk, struct msghdr *msg,
 791                                           int len);
 792        int                     (*recvmsg)(struct sock *sk, struct msghdr *msg,
 793                                        int len, int noblock, int flags, 
 794                                        int *addr_len);
 795        int                     (*bind)(struct sock *sk, 
 796                                        struct sockaddr *uaddr, int addr_len);
 797
 798        int                     (*backlog_rcv) (struct sock *sk, 
 799                                                struct sk_buff *skb);
 800
 801        /* Keeping track of sk's, looking them up, and port selection methods. */
 802        void                    (*hash)(struct sock *sk);
 803        void                    (*unhash)(struct sock *sk);
 804        int                     (*get_port)(struct sock *sk, unsigned short snum);
 805
 806        char                    name[32];
 807
 808        struct {
 809                int inuse;
 810                u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
 811        } stats[NR_CPUS];
 812};
 813
 814/* Called with local bh disabled */
 815static __inline__ void sock_prot_inc_use(struct proto *prot)
 816{
 817        prot->stats[smp_processor_id()].inuse++;
 818}
 819
 820static __inline__ void sock_prot_dec_use(struct proto *prot)
 821{
 822        prot->stats[smp_processor_id()].inuse--;
 823}
 824
 825/* About 10 seconds */
 826#define SOCK_DESTROY_TIME (10*HZ)
 827
 828/* Sockets 0-1023 can't be bound to unless you are superuser */
 829#define PROT_SOCK       1024
 830
 831#define SHUTDOWN_MASK   3
 832#define RCV_SHUTDOWN    1
 833#define SEND_SHUTDOWN   2
 834
 835#define SOCK_SNDBUF_LOCK        1
 836#define SOCK_RCVBUF_LOCK        2
 837#define SOCK_BINDADDR_LOCK      4
 838#define SOCK_BINDPORT_LOCK      8
 839
 840
 841/* Used by processes to "lock" a socket state, so that
 842 * interrupts and bottom half handlers won't change it
 843 * from under us. It essentially blocks any incoming
 844 * packets, so that we won't get any new data or any
 845 * packets that change the state of the socket.
 846 *
 847 * While locked, BH processing will add new packets to
 848 * the backlog queue.  This queue is processed by the
 849 * owner of the socket lock right before it is released.
 850 *
 851 * Since ~2.3.5 it is also exclusive sleep lock serializing
 852 * accesses from user process context.
 853 */
 854extern void __lock_sock(struct sock *sk);
 855extern void __release_sock(struct sock *sk);
 856#define lock_sock(__sk) \
 857do {    spin_lock_bh(&((__sk)->lock.slock)); \
 858        if ((__sk)->lock.users != 0) \
 859                __lock_sock(__sk); \
 860        (__sk)->lock.users = 1; \
 861        spin_unlock_bh(&((__sk)->lock.slock)); \
 862} while(0)
 863
 864#define release_sock(__sk) \
 865do {    spin_lock_bh(&((__sk)->lock.slock)); \
 866        if ((__sk)->backlog.tail != NULL) \
 867                __release_sock(__sk); \
 868        (__sk)->lock.users = 0; \
 869        if (waitqueue_active(&((__sk)->lock.wq))) wake_up(&((__sk)->lock.wq)); \
 870        spin_unlock_bh(&((__sk)->lock.slock)); \
 871} while(0)
 872
 873/* BH context may only use the following locking interface. */
 874#define bh_lock_sock(__sk)      spin_lock(&((__sk)->lock.slock))
 875#define bh_unlock_sock(__sk)    spin_unlock(&((__sk)->lock.slock))
 876
 877extern struct sock *            sk_alloc(int family, int priority, int zero_it);
 878extern void                     sk_free(struct sock *sk);
 879
 880extern struct sk_buff           *sock_wmalloc(struct sock *sk,
 881                                              unsigned long size, int force,
 882                                              int priority);
 883extern struct sk_buff           *sock_rmalloc(struct sock *sk,
 884                                              unsigned long size, int force,
 885                                              int priority);
 886extern void                     sock_wfree(struct sk_buff *skb);
 887extern void                     sock_rfree(struct sk_buff *skb);
 888
 889extern int                      sock_setsockopt(struct socket *sock, int level,
 890                                                int op, char *optval,
 891                                                int optlen);
 892
 893extern int                      sock_getsockopt(struct socket *sock, int level,
 894                                                int op, char *optval, 
 895                                                int *optlen);
 896extern struct sk_buff           *sock_alloc_send_skb(struct sock *sk,
 897                                                     unsigned long size,
 898                                                     int noblock,
 899                                                     int *errcode);
 900extern struct sk_buff           *sock_alloc_send_pskb(struct sock *sk,
 901                                                      unsigned long header_len,
 902                                                      unsigned long data_len,
 903                                                      int noblock,
 904                                                      int *errcode);
 905extern void *sock_kmalloc(struct sock *sk, int size, int priority);
 906extern void sock_kfree_s(struct sock *sk, void *mem, int size);
 907
 908/*
 909 * Functions to fill in entries in struct proto_ops when a protocol
 910 * does not implement a particular function.
 911 */
 912extern int                      sock_no_release(struct socket *);
 913extern int                      sock_no_bind(struct socket *, 
 914                                             struct sockaddr *, int);
 915extern int                      sock_no_connect(struct socket *,
 916                                                struct sockaddr *, int, int);
 917extern int                      sock_no_socketpair(struct socket *,
 918                                                   struct socket *);
 919extern int                      sock_no_accept(struct socket *,
 920                                               struct socket *, int);
 921extern int                      sock_no_getname(struct socket *,
 922                                                struct sockaddr *, int *, int);
 923extern unsigned int             sock_no_poll(struct file *, struct socket *,
 924                                             struct poll_table_struct *);
 925extern int                      sock_no_ioctl(struct socket *, unsigned int,
 926                                              unsigned long);
 927extern int                      sock_no_listen(struct socket *, int);
 928extern int                      sock_no_shutdown(struct socket *, int);
 929extern int                      sock_no_getsockopt(struct socket *, int , int,
 930                                                   char *, int *);
 931extern int                      sock_no_setsockopt(struct socket *, int, int,
 932                                                   char *, int);
 933extern int                      sock_no_fcntl(struct socket *, 
 934                                              unsigned int, unsigned long);
 935extern int                      sock_no_sendmsg(struct socket *,
 936                                                struct msghdr *, int,
 937                                                struct scm_cookie *);
 938extern int                      sock_no_recvmsg(struct socket *,
 939                                                struct msghdr *, int, int,
 940                                                struct scm_cookie *);
 941extern int                      sock_no_mmap(struct file *file,
 942                                             struct socket *sock,
 943                                             struct vm_area_struct *vma);
 944extern ssize_t                  sock_no_sendpage(struct socket *sock,
 945                                                struct page *page,
 946                                                int offset, size_t size, 
 947                                                int flags);
 948
 949/*
 950 *      Default socket callbacks and setup code
 951 */
 952 
 953extern void sock_def_destruct(struct sock *);
 954
 955/* Initialise core socket variables */
 956extern void sock_init_data(struct socket *sock, struct sock *sk);
 957
 958extern void sklist_remove_socket(struct sock **list, struct sock *sk);
 959extern void sklist_insert_socket(struct sock **list, struct sock *sk);
 960extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
 961
 962#ifdef CONFIG_FILTER
 963
 964/**
 965 *      sk_filter - run a packet through a socket filter
 966 *      @sk: sock associated with &sk_buff
 967 *      @skb: buffer to filter
 968 *      @needlock: set to 1 if the sock is not locked by caller.
 969 *
 970 * Run the filter code and then cut skb->data to correct size returned by
 971 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 972 * than pkt_len we keep whole skb->data. This is the socket level
 973 * wrapper to sk_run_filter. It returns 0 if the packet should
 974 * be accepted or -EPERM if the packet should be tossed.
 975 */
 976
 977static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
 978{
 979        int err = 0;
 980
 981        if (sk->filter) {
 982                struct sk_filter *filter;
 983                
 984                if (needlock)
 985                        bh_lock_sock(sk);
 986                
 987                filter = sk->filter;
 988                if (filter) {
 989                        int pkt_len = sk_run_filter(skb, filter->insns,
 990                                                    filter->len);
 991                        if (!pkt_len)
 992                                err = -EPERM;
 993                        else
 994                                skb_trim(skb, pkt_len);
 995                }
 996
 997                if (needlock)
 998                        bh_unlock_sock(sk);
 999        }
1000        return err;
1001}
1002
1003/**
1004 *      sk_filter_release: Release a socket filter
1005 *      @sk: socket
1006 *      @fp: filter to remove
1007 *
1008 *      Remove a filter from a socket and release its resources.
1009 */
1010 
1011static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
1012{
1013        unsigned int size = sk_filter_len(fp);
1014
1015        atomic_sub(size, &sk->omem_alloc);
1016
1017        if (atomic_dec_and_test(&fp->refcnt))
1018                kfree(fp);
1019}
1020
1021static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1022{
1023        atomic_inc(&fp->refcnt);
1024        atomic_add(sk_filter_len(fp), &sk->omem_alloc);
1025}
1026
1027#else
1028
1029static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
1030{
1031        return 0;
1032}
1033
1034#endif /* CONFIG_FILTER */
1035
1036/*
1037 * Socket reference counting postulates.
1038 *
1039 * * Each user of socket SHOULD hold a reference count.
1040 * * Each access point to socket (an hash table bucket, reference from a list,
1041 *   running timer, skb in flight MUST hold a reference count.
1042 * * When reference count hits 0, it means it will never increase back.
1043 * * When reference count hits 0, it means that no references from
1044 *   outside exist to this socket and current process on current CPU
1045 *   is last user and may/should destroy this socket.
1046 * * sk_free is called from any context: process, BH, IRQ. When
1047 *   it is called, socket has no references from outside -> sk_free
1048 *   may release descendant resources allocated by the socket, but
1049 *   to the time when it is called, socket is NOT referenced by any
1050 *   hash tables, lists etc.
1051 * * Packets, delivered from outside (from network or from another process)
1052 *   and enqueued on receive/error queues SHOULD NOT grab reference count,
1053 *   when they sit in queue. Otherwise, packets will leak to hole, when
1054 *   socket is looked up by one cpu and unhasing is made by another CPU.
1055 *   It is true for udp/raw, netlink (leak to receive and error queues), tcp
1056 *   (leak to backlog). Packet socket does all the processing inside
1057 *   BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
1058 *   use separate SMP lock, so that they are prone too.
1059 */
1060
1061/* Grab socket reference count. This operation is valid only
1062   when sk is ALREADY grabbed f.e. it is found in hash table
1063   or a list and the lookup is made under lock preventing hash table
1064   modifications.
1065 */
1066
1067static inline void sock_hold(struct sock *sk)
1068{
1069        atomic_inc(&sk->refcnt);
1070}
1071
1072/* Ungrab socket in the context, which assumes that socket refcnt
1073   cannot hit zero, f.e. it is true in context of any socketcall.
1074 */
1075static inline void __sock_put(struct sock *sk)
1076{
1077        atomic_dec(&sk->refcnt);
1078}
1079
1080/* Ungrab socket and destroy it, if it was the last reference. */
1081static inline void sock_put(struct sock *sk)
1082{
1083        if (atomic_dec_and_test(&sk->refcnt))
1084                sk_free(sk);
1085}
1086
1087/* Detach socket from process context.
1088 * Announce socket dead, detach it from wait queue and inode.
1089 * Note that parent inode held reference count on this struct sock,
1090 * we do not release it in this function, because protocol
1091 * probably wants some additional cleanups or even continuing
1092 * to work with this socket (TCP).
1093 */
1094static inline void sock_orphan(struct sock *sk)
1095{
1096        write_lock_bh(&sk->callback_lock);
1097        sk->dead = 1;
1098        sk->socket = NULL;
1099        sk->sleep = NULL;
1100        write_unlock_bh(&sk->callback_lock);
1101}
1102
1103static inline void sock_graft(struct sock *sk, struct socket *parent)
1104{
1105        write_lock_bh(&sk->callback_lock);
1106        sk->sleep = &parent->wait;
1107        parent->sk = sk;
1108        sk->socket = parent;
1109        write_unlock_bh(&sk->callback_lock);
1110}
1111
1112static inline int sock_i_uid(struct sock *sk)
1113{
1114        int uid;
1115
1116        read_lock(&sk->callback_lock);
1117        uid = sk->socket ? sk->socket->inode->i_uid : 0;
1118        read_unlock(&sk->callback_lock);
1119        return uid;
1120}
1121
1122static inline unsigned long sock_i_ino(struct sock *sk)
1123{
1124        unsigned long ino;
1125
1126        read_lock(&sk->callback_lock);
1127        ino = sk->socket ? sk->socket->inode->i_ino : 0;
1128        read_unlock(&sk->callback_lock);
1129        return ino;
1130}
1131
1132static inline struct dst_entry *
1133__sk_dst_get(struct sock *sk)
1134{
1135        return sk->dst_cache;
1136}
1137
1138static inline struct dst_entry *
1139sk_dst_get(struct sock *sk)
1140{
1141        struct dst_entry *dst;
1142
1143        read_lock(&sk->dst_lock);
1144        dst = sk->dst_cache;
1145        if (dst)
1146                dst_hold(dst);
1147        read_unlock(&sk->dst_lock);
1148        return dst;
1149}
1150
1151static inline void
1152__sk_dst_set(struct sock *sk, struct dst_entry *dst)
1153{
1154        struct dst_entry *old_dst;
1155
1156        old_dst = sk->dst_cache;
1157        sk->dst_cache = dst;
1158        dst_release(old_dst);
1159}
1160
1161static inline void
1162sk_dst_set(struct sock *sk, struct dst_entry *dst)
1163{
1164        write_lock(&sk->dst_lock);
1165        __sk_dst_set(sk, dst);
1166        write_unlock(&sk->dst_lock);
1167}
1168
1169static inline void
1170__sk_dst_reset(struct sock *sk)
1171{
1172        struct dst_entry *old_dst;
1173
1174        old_dst = sk->dst_cache;
1175        sk->dst_cache = NULL;
1176        dst_release(old_dst);
1177}
1178
1179static inline void
1180sk_dst_reset(struct sock *sk)
1181{
1182        write_lock(&sk->dst_lock);
1183        __sk_dst_reset(sk);
1184        write_unlock(&sk->dst_lock);
1185}
1186
1187static inline struct dst_entry *
1188__sk_dst_check(struct sock *sk, u32 cookie)
1189{
1190        struct dst_entry *dst = sk->dst_cache;
1191
1192        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1193                sk->dst_cache = NULL;
1194                return NULL;
1195        }
1196
1197        return dst;
1198}
1199
1200static inline struct dst_entry *
1201sk_dst_check(struct sock *sk, u32 cookie)
1202{
1203        struct dst_entry *dst = sk_dst_get(sk);
1204
1205        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1206                sk_dst_reset(sk);
1207                return NULL;
1208        }
1209
1210        return dst;
1211}
1212
1213
1214/*
1215 *      Queue a received datagram if it will fit. Stream and sequenced
1216 *      protocols can't normally use this as they need to fit buffers in
1217 *      and play with them.
1218 *
1219 *      Inlined as it's very short and called for pretty much every
1220 *      packet ever received.
1221 */
1222
1223static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1224{
1225        sock_hold(sk);
1226        skb->sk = sk;
1227        skb->destructor = sock_wfree;
1228        atomic_add(skb->truesize, &sk->wmem_alloc);
1229}
1230
1231static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
1232{
1233        skb->sk = sk;
1234        skb->destructor = sock_rfree;
1235        atomic_add(skb->truesize, &sk->rmem_alloc);
1236}
1237
1238static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1239{
1240        int err = 0;
1241        int skb_len;
1242
1243        /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1244           number of warnings when compiling with -W --ANK
1245         */
1246        if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) {
1247                err = -ENOMEM;
1248                goto out;
1249        }
1250
1251        /* It would be deadlock, if sock_queue_rcv_skb is used
1252           with socket lock! We assume that users of this
1253           function are lock free.
1254        */
1255        err = sk_filter(sk, skb, 1);
1256        if (err)
1257                goto out;
1258
1259        skb->dev = NULL;
1260        skb_set_owner_r(skb, sk);
1261
1262        /* Cache the SKB length before we tack it onto the receive
1263         * queue.  Once it is added it no longer belongs to us and
1264         * may be freed by other threads of control pulling packets
1265         * from the queue.
1266         */
1267        skb_len = skb->len;
1268
1269        skb_queue_tail(&sk->receive_queue, skb);
1270        if (!sk->dead)
1271                sk->data_ready(sk,skb_len);
1272out:
1273        return err;
1274}
1275
1276static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
1277{
1278        /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1279           number of warnings when compiling with -W --ANK
1280         */
1281        if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
1282                return -ENOMEM;
1283        skb_set_owner_r(skb, sk);
1284        skb_queue_tail(&sk->error_queue,skb);
1285        if (!sk->dead)
1286                sk->data_ready(sk,skb->len);
1287        return 0;
1288}
1289
1290/*
1291 *      Recover an error report and clear atomically
1292 */
1293 
1294static inline int sock_error(struct sock *sk)
1295{
1296        int err=xchg(&sk->err,0);
1297        return -err;
1298}
1299
1300static inline unsigned long sock_wspace(struct sock *sk)
1301{
1302        int amt = 0;
1303
1304        if (!(sk->shutdown & SEND_SHUTDOWN)) {
1305                amt = sk->sndbuf - atomic_read(&sk->wmem_alloc);
1306                if (amt < 0) 
1307                        amt = 0;
1308        }
1309        return amt;
1310}
1311
1312static inline void sk_wake_async(struct sock *sk, int how, int band)
1313{
1314        if (sk->socket && sk->socket->fasync_list)
1315                sock_wake_async(sk->socket, how, band);
1316}
1317
1318#define SOCK_MIN_SNDBUF 2048
1319#define SOCK_MIN_RCVBUF 256
1320
1321/*
1322 *      Default write policy as shown to user space via poll/select/SIGIO
1323 */
1324static inline int sock_writeable(struct sock *sk) 
1325{
1326        return atomic_read(&sk->wmem_alloc) < (sk->sndbuf / 2);
1327}
1328
1329static inline int gfp_any(void)
1330{
1331        return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
1332}
1333
1334static inline long sock_rcvtimeo(struct sock *sk, int noblock)
1335{
1336        return noblock ? 0 : sk->rcvtimeo;
1337}
1338
1339static inline long sock_sndtimeo(struct sock *sk, int noblock)
1340{
1341        return noblock ? 0 : sk->sndtimeo;
1342}
1343
1344static inline int sock_rcvlowat(struct sock *sk, int waitall, int len)
1345{
1346        return (waitall ? len : min_t(int, sk->rcvlowat, len)) ? : 1;
1347}
1348
1349/* Alas, with timeout socket operations are not restartable.
1350 * Compare this to poll().
1351 */
1352static inline int sock_intr_errno(long timeo)
1353{
1354        return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
1355}
1356
1357static __inline__ void
1358sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1359{
1360        if (sk->rcvtstamp)
1361                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(skb->stamp), &skb->stamp);
1362        else
1363                sk->stamp = skb->stamp;
1364}
1365
1366/* 
1367 *      Enable debug/info messages 
1368 */
1369
1370#if 0
1371#define NETDEBUG(x)     do { } while (0)
1372#else
1373#define NETDEBUG(x)     do { x; } while (0)
1374#endif
1375
1376/*
1377 * Macros for sleeping on a socket. Use them like this:
1378 *
1379 * SOCK_SLEEP_PRE(sk)
1380 * if (condition)
1381 *      schedule();
1382 * SOCK_SLEEP_POST(sk)
1383 *
1384 */
1385
1386#define SOCK_SLEEP_PRE(sk)      { struct task_struct *tsk = current; \
1387                                DECLARE_WAITQUEUE(wait, tsk); \
1388                                tsk->state = TASK_INTERRUPTIBLE; \
1389                                add_wait_queue((sk)->sleep, &wait); \
1390                                release_sock(sk);
1391
1392#define SOCK_SLEEP_POST(sk)     tsk->state = TASK_RUNNING; \
1393                                remove_wait_queue((sk)->sleep, &wait); \
1394                                lock_sock(sk); \
1395                                }
1396
1397extern __u32 sysctl_wmem_max;
1398extern __u32 sysctl_rmem_max;
1399
1400#endif  /* _SOCK_H */
1401
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.