linux-old/include/net/sock.h
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Definitions for the AF_INET socket handler.
   7 *
   8 * Version:     @(#)sock.h      1.0.4   05/13/93
   9 *
  10 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  13 *              Florian La Roche <flla@stud.uni-sb.de>
  14 *
  15 * Fixes:
  16 *              Alan Cox        :       Volatiles in skbuff pointers. See
  17 *                                      skbuff comments. May be overdone,
  18 *                                      better to prove they can be removed
  19 *                                      than the reverse.
  20 *              Alan Cox        :       Added a zapped field for tcp to note
  21 *                                      a socket is reset and must stay shut up
  22 *              Alan Cox        :       New fields for options
  23 *      Pauline Middelink       :       identd support
  24 *              Alan Cox        :       Eliminate low level recv/recvfrom
  25 *              David S. Miller :       New socket lookup architecture.
  26 *              Steve Whitehouse:       Default routines for sock_ops
  27 *
  28 *              This program is free software; you can redistribute it and/or
  29 *              modify it under the terms of the GNU General Public License
  30 *              as published by the Free Software Foundation; either version
  31 *              2 of the License, or (at your option) any later version.
  32 */
  33#ifndef _SOCK_H
  34#define _SOCK_H
  35
  36#include <linux/config.h>
  37#include <linux/timer.h>
  38#include <linux/cache.h>
  39#include <linux/in.h>           /* struct sockaddr_in */
  40
  41#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
  42#include <linux/in6.h>          /* struct sockaddr_in6 */
  43#include <linux/ipv6.h>         /* dest_cache, inet6_options */
  44#include <linux/icmpv6.h>
  45#include <net/if_inet6.h>       /* struct ipv6_mc_socklist */
  46#endif
  47
  48#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
  49#include <linux/icmp.h>
  50#endif
  51#include <linux/tcp.h>          /* struct tcphdr */
  52#if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
  53#include <net/sctp/structs.h>   /* struct sctp_opt */
  54#endif
  55
  56#include <linux/netdevice.h>
  57#include <linux/skbuff.h>       /* struct sk_buff */
  58#include <net/protocol.h>               /* struct inet_protocol */
  59#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
  60#include <net/x25.h>
  61#endif
  62#if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
  63#include <linux/if_wanpipe.h>
  64#endif
  65
  66#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
  67#include <net/ax25.h>
  68#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
  69#include <net/netrom.h>
  70#endif
  71#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
  72#include <net/rose.h>
  73#endif
  74#endif
  75
  76#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
  77#include <linux/if_pppox.h>
  78#include <linux/ppp_channel.h>   /* struct ppp_channel */
  79#endif
  80
  81#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
  82#if defined(CONFIG_SPX) || defined(CONFIG_SPX_MODULE)
  83#include <net/spx.h>
  84#else
  85#include <net/ipx.h>
  86#endif /* CONFIG_SPX */
  87#endif /* CONFIG_IPX */
  88
  89#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
  90#include <linux/atalk.h>
  91#endif
  92
  93#if defined(CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
  94#include <net/dn.h>
  95#endif
  96
  97#if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
  98#include <net/irda/irda.h>
  99#endif
 100
 101#if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
 102struct atm_vcc;
 103#endif
 104
 105#ifdef CONFIG_FILTER
 106#include <linux/filter.h>
 107#endif
 108
 109#include <asm/atomic.h>
 110#include <net/dst.h>
 111
 112
 113/* The AF_UNIX specific socket options */
 114struct unix_opt {
 115        struct unix_address     *addr;
 116        struct dentry *         dentry;
 117        struct vfsmount *       mnt;
 118        struct semaphore        readsem;
 119        struct sock *           other;
 120        struct sock **          list;
 121        struct sock *           gc_tree;
 122        atomic_t                inflight;
 123        rwlock_t                lock;
 124        wait_queue_head_t       peer_wait;
 125};
 126
 127
 128/* Once the IPX ncpd patches are in these are going into protinfo. */
 129#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
 130struct ipx_opt {
 131        ipx_address             dest_addr;
 132        ipx_interface           *intrfc;
 133        unsigned short          port;
 134#ifdef CONFIG_IPX_INTERN
 135        unsigned char           node[IPX_NODE_LEN];
 136#endif
 137        unsigned short          type;
 138/* 
 139 * To handle special ncp connection-handling sockets for mars_nwe,
 140 * the connection number must be stored in the socket.
 141 */
 142        unsigned short          ipx_ncp_conn;
 143};
 144#endif
 145
 146#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 147struct ipv6_pinfo {
 148        struct in6_addr         saddr;
 149        struct in6_addr         rcv_saddr;
 150        struct in6_addr         daddr;
 151        struct in6_addr         *daddr_cache;
 152
 153        __u32                   flow_label;
 154        __u32                   frag_size;
 155        int                     hop_limit;
 156        int                     mcast_hops;
 157        int                     mcast_oif;
 158
 159        /* pktoption flags */
 160        union {
 161                struct {
 162                        __u8    srcrt:2,
 163                                rxinfo:1,
 164                                rxhlim:1,
 165                                hopopts:1,
 166                                dstopts:1,
 167                                authhdr:1,
 168                                rxflow:1;
 169                } bits;
 170                __u8            all;
 171        } rxopt;
 172
 173        /* sockopt flags */
 174        __u8                    mc_loop:1,
 175                                recverr:1,
 176                                sndflow:1,
 177                                pmtudisc:2,
 178                                ipv6only:1;
 179
 180        struct ipv6_mc_socklist *ipv6_mc_list;
 181        struct ipv6_ac_socklist *ipv6_ac_list;
 182        struct ipv6_fl_socklist *ipv6_fl_list;
 183        __u32                   dst_cookie;
 184
 185        struct ipv6_txoptions   *opt;
 186        struct sk_buff          *pktoptions;
 187};
 188
 189struct raw6_opt {
 190        __u32                   checksum;       /* perform checksum */
 191        __u32                   offset;         /* checksum offset  */
 192
 193        struct icmp6_filter     filter;
 194};
 195
 196#define __ipv6_only_sock(sk)    ((sk)->net_pinfo.af_inet6.ipv6only)
 197#define ipv6_only_sock(sk)      ((sk)->family == PF_INET6 && \
 198                                 (sk)->net_pinfo.af_inet6.ipv6only)
 199#else
 200#define __ipv6_only_sock(sk)    0
 201#define ipv6_only_sock(sk)      0
 202#endif /* IPV6 */
 203
 204#if defined(CONFIG_INET) || defined(CONFIG_INET_MODULE)
 205struct raw_opt {
 206        struct icmp_filter      filter;
 207};
 208#endif
 209
 210#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 211struct inet_opt
 212{
 213        int                     ttl;                    /* TTL setting */
 214        int                     tos;                    /* TOS */
 215        unsigned                cmsg_flags;
 216        struct ip_options       *opt;
 217        unsigned char           hdrincl;                /* Include headers ? */
 218        __u8                    mc_ttl;                 /* Multicasting TTL */
 219        __u8                    mc_loop;                /* Loopback */
 220        unsigned                recverr : 1,
 221                                freebind : 1;
 222        __u16                   id;                     /* ID counter for DF pkts */
 223        __u8                    pmtudisc;
 224        int                     mc_index;               /* Multicast device index */
 225        __u32                   mc_addr;
 226        struct ip_mc_socklist   *mc_list;               /* Group array */
 227};
 228#endif
 229
 230#if defined(CONFIG_PPPOE) || defined (CONFIG_PPPOE_MODULE)
 231struct pppoe_opt
 232{
 233        struct net_device      *dev;      /* device associated with socket*/
 234        struct pppoe_addr       pa;       /* what this socket is bound to*/
 235        struct sockaddr_pppox   relay;    /* what socket data will be
 236                                             relayed to (PPPoE relaying) */
 237};
 238
 239struct pppox_opt
 240{
 241        struct ppp_channel      chan;
 242        struct sock             *sk;
 243        struct pppox_opt        *next;    /* for hash table */
 244        union {
 245                struct pppoe_opt pppoe;
 246        } proto;
 247};
 248#define pppoe_dev       proto.pppoe.dev
 249#define pppoe_pa        proto.pppoe.pa
 250#define pppoe_relay     proto.pppoe.relay
 251#endif
 252
 253/* This defines a selective acknowledgement block. */
 254struct tcp_sack_block {
 255        __u32   start_seq;
 256        __u32   end_seq;
 257};
 258
 259struct tcp_opt {
 260        int     tcp_header_len; /* Bytes of tcp header to send          */
 261
 262/*
 263 *      Header prediction flags
 264 *      0x5?10 << 16 + snd_wnd in net byte order
 265 */
 266        __u32   pred_flags;
 267
 268/*
 269 *      RFC793 variables by their proper names. This means you can
 270 *      read the code and the spec side by side (and laugh ...)
 271 *      See RFC793 and RFC1122. The RFC writes these in capitals.
 272 */
 273        __u32   rcv_nxt;        /* What we want to receive next         */
 274        __u32   snd_nxt;        /* Next sequence we send                */
 275
 276        __u32   snd_una;        /* First byte we want an ack for        */
 277        __u32   snd_sml;        /* Last byte of the most recently transmitted small packet */
 278        __u32   rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
 279        __u32   lsndtime;       /* timestamp of last sent data packet (for restart window) */
 280
 281        /* Delayed ACK control data */
 282        struct {
 283                __u8    pending;        /* ACK is pending */
 284                __u8    quick;          /* Scheduled number of quick acks       */
 285                __u8    pingpong;       /* The session is interactive           */
 286                __u8    blocked;        /* Delayed ACK was blocked by socket lock*/
 287                __u32   ato;            /* Predicted tick of soft clock         */
 288                unsigned long timeout;  /* Currently scheduled timeout          */
 289                __u32   lrcvtime;       /* timestamp of last received data packet*/
 290                __u16   last_seg_size;  /* Size of last incoming segment        */
 291                __u16   rcv_mss;        /* MSS used for delayed ACK decisions   */ 
 292        } ack;
 293
 294        /* Data for direct copy to user */
 295        struct {
 296                struct sk_buff_head     prequeue;
 297                struct task_struct      *task;
 298                struct iovec            *iov;
 299                int                     memory;
 300                int                     len;
 301        } ucopy;
 302
 303        __u32   snd_wl1;        /* Sequence for window update           */
 304        __u32   snd_wnd;        /* The window we expect to receive      */
 305        __u32   max_window;     /* Maximal window ever seen from peer   */
 306        __u32   pmtu_cookie;    /* Last pmtu seen by socket             */
 307        __u16   mss_cache;      /* Cached effective mss, not including SACKS */
 308        __u16   mss_clamp;      /* Maximal mss, negotiated at connection setup */
 309        __u16   ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
 310        __u8    ca_state;       /* State of fast-retransmit machine     */
 311        __u8    retransmits;    /* Number of unrecovered RTO timeouts.  */
 312
 313        __u8    reordering;     /* Packet reordering metric.            */
 314        __u8    queue_shrunk;   /* Write queue has been shrunk recently.*/
 315        __u8    defer_accept;   /* User waits for some data after accept() */
 316
 317/* RTT measurement */
 318        __u8    backoff;        /* backoff                              */
 319        __u32   srtt;           /* smothed round trip time << 3         */
 320        __u32   mdev;           /* medium deviation                     */
 321        __u32   mdev_max;       /* maximal mdev for the last rtt period */
 322        __u32   rttvar;         /* smoothed mdev_max                    */
 323        __u32   rtt_seq;        /* sequence number to update rttvar     */
 324        __u32   rto;            /* retransmit timeout                   */
 325
 326        __u32   packets_out;    /* Packets which are "in flight"        */
 327        __u32   left_out;       /* Packets which leaved network         */
 328        __u32   retrans_out;    /* Retransmitted packets out            */
 329
 330
 331/*
 332 *      Slow start and congestion control (see also Nagle, and Karn & Partridge)
 333 */
 334        __u32   snd_ssthresh;   /* Slow start size threshold            */
 335        __u32   snd_cwnd;       /* Sending congestion window            */
 336        __u16   snd_cwnd_cnt;   /* Linear increase counter              */
 337        __u16   snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 338        __u32   snd_cwnd_used;
 339        __u32   snd_cwnd_stamp;
 340
 341        /* Two commonly used timers in both sender and receiver paths. */
 342        unsigned long           timeout;
 343        struct timer_list       retransmit_timer;       /* Resend (no ack)      */
 344        struct timer_list       delack_timer;           /* Ack delay            */
 345
 346        struct sk_buff_head     out_of_order_queue; /* Out of order segments go here */
 347
 348        struct tcp_func         *af_specific;   /* Operations which are AF_INET{4,6} specific   */
 349        struct sk_buff          *send_head;     /* Front of stuff to transmit                   */
 350        struct page             *sndmsg_page;   /* Cached page for sendmsg                      */
 351        u32                     sndmsg_off;     /* Cached offset for sendmsg                    */
 352
 353        __u32   rcv_wnd;        /* Current receiver window              */
 354        __u32   rcv_wup;        /* rcv_nxt on last window update sent   */
 355        __u32   write_seq;      /* Tail(+1) of data held in tcp send buffer */
 356        __u32   pushed_seq;     /* Last pushed seq, required to talk to windows */
 357        __u32   copied_seq;     /* Head of yet unread data              */
 358/*
 359 *      Options received (usually on last packet, some only on SYN packets).
 360 */
 361        char    tstamp_ok,      /* TIMESTAMP seen on SYN packet         */
 362                wscale_ok,      /* Wscale seen on SYN packet            */
 363                sack_ok;        /* SACK seen on SYN packet              */
 364        char    saw_tstamp;     /* Saw TIMESTAMP on last packet         */
 365        __u8    snd_wscale;     /* Window scaling received from sender  */
 366        __u8    rcv_wscale;     /* Window scaling to send to receiver   */
 367        __u8    nonagle;        /* Disable Nagle algorithm?             */
 368        __u8    keepalive_probes; /* num of allowed keep alive probes   */
 369
 370/*      PAWS/RTTM data  */
 371        __u32   rcv_tsval;      /* Time stamp value                     */
 372        __u32   rcv_tsecr;      /* Time stamp echo reply                */
 373        __u32   ts_recent;      /* Time stamp to echo next              */
 374        long    ts_recent_stamp;/* Time we stored ts_recent (for aging) */
 375
 376/*      SACKs data      */
 377        __u16   user_mss;       /* mss requested by user in ioctl */
 378        __u8    dsack;          /* D-SACK is scheduled                  */
 379        __u8    eff_sacks;      /* Size of SACK array to send with next packet */
 380        struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
 381        struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
 382
 383        __u32   window_clamp;   /* Maximal window to advertise          */
 384        __u32   rcv_ssthresh;   /* Current window clamp                 */
 385        __u8    probes_out;     /* unanswered 0 window probes           */
 386        __u8    num_sacks;      /* Number of SACK blocks                */
 387        __u16   advmss;         /* Advertised MSS                       */
 388
 389        __u8    syn_retries;    /* num of allowed syn retries */
 390        __u8    ecn_flags;      /* ECN status bits.                     */
 391        __u16   prior_ssthresh; /* ssthresh saved at recovery start     */
 392        __u32   lost_out;       /* Lost packets                         */
 393        __u32   sacked_out;     /* SACK'd packets                       */
 394        __u32   fackets_out;    /* FACK'd packets                       */
 395        __u32   high_seq;       /* snd_nxt at onset of congestion       */
 396
 397        __u32   retrans_stamp;  /* Timestamp of the last retransmit,
 398                                 * also used in SYN-SENT to remember stamp of
 399                                 * the first SYN. */
 400        __u32   undo_marker;    /* tracking retrans started here. */
 401        int     undo_retrans;   /* number of undoable retransmissions. */
 402        __u32   urg_seq;        /* Seq of received urgent pointer */
 403        __u16   urg_data;       /* Saved octet of OOB data and control flags */
 404        __u8    pending;        /* Scheduled timer event        */
 405        __u8    urg_mode;       /* In urgent mode               */
 406        __u32   snd_up;         /* Urgent pointer               */
 407
 408        /* The syn_wait_lock is necessary only to avoid tcp_get_info having
 409         * to grab the main lock sock while browsing the listening hash
 410         * (otherwise it's deadlock prone).
 411         * This lock is acquired in read mode only from tcp_get_info() and
 412         * it's acquired in write mode _only_ from code that is actively
 413         * changing the syn_wait_queue. All readers that are holding
 414         * the master sock lock don't need to grab this lock in read mode
 415         * too as the syn_wait_queue writes are always protected from
 416         * the main sock lock.
 417         */
 418        rwlock_t                syn_wait_lock;
 419        struct tcp_listen_opt   *listen_opt;
 420
 421        /* FIFO of established children */
 422        struct open_request     *accept_queue;
 423        struct open_request     *accept_queue_tail;
 424
 425        int                     write_pending;  /* A write to socket waits to start. */
 426
 427        unsigned int            keepalive_time;   /* time before keep alive takes place */
 428        unsigned int            keepalive_intvl;  /* time interval between keep alive probes */
 429        int                     linger2;
 430
 431        int                     frto_counter; /* Number of new acks after RTO */
 432        __u32                   frto_highmark; /* snd_nxt when RTO occurred */
 433
 434        unsigned long last_synq_overflow; 
 435
 436/* Receiver side RTT estimation */
 437        struct {
 438                __u32   rtt;
 439                __u32   seq;
 440                __u32   time;
 441        } rcv_rtt_est;
 442
 443/* Receiver queue space */
 444        struct {
 445                int     space;
 446                __u32   seq;
 447                __u32   time;
 448        } rcvq_space;
 449
 450/* TCP Westwood structure */
 451        struct {
 452                __u32    bw_ns_est;        /* first bandwidth estimation..not too smoothed 8) */
 453                __u32    bw_est;           /* bandwidth estimate */
 454                __u32    rtt_win_sx;       /* here starts a new evaluation... */
 455                __u32    bk;
 456                __u32    snd_una;          /* used for evaluating the number of acked bytes */
 457                __u32    cumul_ack;
 458                __u32    accounted;
 459                __u32    rtt;
 460                __u32    rtt_min;          /* minimum observed RTT */
 461        } westwood;
 462
 463/* Vegas variables */
 464        struct {
 465                __u32   beg_snd_nxt;    /* right edge during last RTT */
 466                __u32   beg_snd_una;    /* left edge  during last RTT */
 467                __u32   beg_snd_cwnd;   /* saves the size of the cwnd */
 468                __u8    do_vegas;       /* do vegas for this connection */
 469                __u8    doing_vegas_now;/* if true, do vegas for this RTT */
 470                __u16   cntRTT;         /* # of RTTs measured within last RTT */
 471                __u32   minRTT;         /* min of RTTs measured within last RTT (in usec) */
 472                __u32   baseRTT;        /* the min of all Vegas RTT measurements seen (in usec) */
 473        } vegas;
 474
 475        /* BI TCP Parameters */
 476        struct {
 477                __u32   cnt;            /* increase cwnd by 1 after this number of ACKs */
 478                __u32   last_max_cwnd;  /* last maximium snd_cwnd */
 479                __u32   last_cwnd;      /* the last snd_cwnd */
 480                __u32   last_stamp;     /* time when updated last_cwnd */
 481        } bictcp;
 482};
 483
 484        
 485/*
 486 * This structure really needs to be cleaned up.
 487 * Most of it is for TCP, and not used by any of
 488 * the other protocols.
 489 */
 490
 491/*
 492 * The idea is to start moving to a newer struct gradualy
 493 * 
 494 * IMHO the newer struct should have the following format:
 495 * 
 496 *      struct sock {
 497 *              sockmem [mem, proto, callbacks]
 498 *
 499 *              union or struct {
 500 *                      ax25;
 501 *              } ll_pinfo;
 502 *      
 503 *              union {
 504 *                      ipv4;
 505 *                      ipv6;
 506 *                      ipx;
 507 *                      netrom;
 508 *                      rose;
 509 *                      x25;
 510 *              } net_pinfo;
 511 *
 512 *              union {
 513 *                      tcp;
 514 *                      udp;
 515 *                      spx;
 516 *                      netrom;
 517 *              } tp_pinfo;
 518 *
 519 *      }
 520 *
 521 * The idea failed because IPv6 transition asssumes dual IP/IPv6 sockets.
 522 * So, net_pinfo is IPv6 are really, and protinfo unifies all another
 523 * private areas.
 524 */
 525
 526/* Define this to get the sk->debug debugging facility. */
 527#define SOCK_DEBUGGING
 528#ifdef SOCK_DEBUGGING
 529#define SOCK_DEBUG(sk, msg...) do { if((sk) && ((sk)->debug)) printk(KERN_DEBUG msg); } while (0)
 530#else
 531#define SOCK_DEBUG(sk, msg...) do { } while (0)
 532#endif
 533
 534/* This is the per-socket lock.  The spinlock provides a synchronization
 535 * between user contexts and software interrupt processing, whereas the
 536 * mini-semaphore synchronizes multiple users amongst themselves.
 537 */
 538typedef struct {
 539        spinlock_t              slock;
 540        unsigned int            users;
 541        wait_queue_head_t       wq;
 542} socket_lock_t;
 543
 544#define sock_lock_init(__sk) \
 545do {    spin_lock_init(&((__sk)->lock.slock)); \
 546        (__sk)->lock.users = 0; \
 547        init_waitqueue_head(&((__sk)->lock.wq)); \
 548} while(0)
 549
 550struct sock {
 551        /* Socket demultiplex comparisons on incoming packets. */
 552        __u32                   daddr;          /* Foreign IPv4 addr                    */
 553        __u32                   rcv_saddr;      /* Bound local IPv4 addr                */
 554        __u16                   dport;          /* Destination port                     */
 555        unsigned short          num;            /* Local port                           */
 556        int                     bound_dev_if;   /* Bound device index if != 0           */
 557
 558        /* Main hash linkage for various protocol lookup tables. */
 559        struct sock             *next;
 560        struct sock             **pprev;
 561        struct sock             *bind_next;
 562        struct sock             **bind_pprev;
 563
 564        volatile unsigned char  state,          /* Connection state                     */
 565                                zapped;         /* In ax25 & ipx means not linked       */
 566        __u16                   sport;          /* Source port                          */
 567
 568        unsigned short          family;         /* Address family                       */
 569        unsigned char           reuse;          /* SO_REUSEADDR setting                 */
 570        unsigned char           shutdown;
 571        atomic_t                refcnt;         /* Reference count                      */
 572
 573        socket_lock_t           lock;           /* Synchronizer...                      */
 574        int                     rcvbuf;         /* Size of receive buffer in bytes      */
 575
 576        wait_queue_head_t       *sleep;         /* Sock wait queue                      */
 577        struct dst_entry        *dst_cache;     /* Destination cache                    */
 578        rwlock_t                dst_lock;
 579        atomic_t                rmem_alloc;     /* Receive queue bytes committed        */
 580        struct sk_buff_head     receive_queue;  /* Incoming packets                     */
 581        atomic_t                wmem_alloc;     /* Transmit queue bytes committed       */
 582        struct sk_buff_head     write_queue;    /* Packet sending queue                 */
 583        atomic_t                omem_alloc;     /* "o" is "option" or "other" */
 584        int                     wmem_queued;    /* Persistent queue size */
 585        int                     forward_alloc;  /* Space allocated forward. */
 586        __u32                   saddr;          /* Sending source                       */
 587        unsigned int            allocation;     /* Allocation mode                      */
 588        int                     sndbuf;         /* Size of send buffer in bytes         */
 589        struct sock             *prev;
 590
 591        /* Not all are volatile, but some are, so we might as well say they all are.
 592         * XXX Make this a flag word -DaveM
 593         */
 594        volatile char           dead,
 595                                done,
 596                                urginline,
 597                                keepopen,
 598                                linger,
 599                                destroy,
 600                                no_check,
 601                                broadcast,
 602                                bsdism;
 603        unsigned char           debug;
 604        unsigned char           rcvtstamp;
 605        unsigned char           use_write_queue;
 606        unsigned char           userlocks;
 607        /* Hole of 3 bytes. Try to pack. */
 608        int                     route_caps;
 609        int                     proc;
 610        unsigned long           lingertime;
 611
 612        int                     hashent;
 613        struct sock             *pair;
 614
 615        /* The backlog queue is special, it is always used with
 616         * the per-socket spinlock held and requires low latency
 617         * access.  Therefore we special case it's implementation.
 618         */
 619        struct {
 620                struct sk_buff *head;
 621                struct sk_buff *tail;
 622        } backlog;
 623
 624        rwlock_t                callback_lock;
 625
 626        /* Error queue, rarely used. */
 627        struct sk_buff_head     error_queue;
 628
 629        struct proto            *prot;
 630
 631#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 632        union {
 633                struct ipv6_pinfo       af_inet6;
 634        } net_pinfo;
 635#endif
 636
 637        union {
 638                struct tcp_opt          af_tcp;
 639#if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
 640                struct sctp_opt         af_sctp;
 641#endif
 642#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 643                struct raw_opt          tp_raw4;
 644#endif
 645#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 646                struct raw6_opt         tp_raw;
 647#endif /* CONFIG_IPV6 */
 648#if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
 649                struct spx_opt          af_spx;
 650#endif /* CONFIG_SPX */
 651
 652        } tp_pinfo;
 653
 654        int                     err, err_soft;  /* Soft holds errors that don't
 655                                                   cause failure but are the cause
 656                                                   of a persistent failure not just
 657                                                   'timed out' */
 658        unsigned short          ack_backlog;
 659        unsigned short          max_ack_backlog;
 660        __u32                   priority;
 661        unsigned short          type;
 662        unsigned char           localroute;     /* Route locally only */
 663        unsigned char           protocol;
 664        struct ucred            peercred;
 665        int                     rcvlowat;
 666        long                    rcvtimeo;
 667        long                    sndtimeo;
 668
 669#ifdef CONFIG_FILTER
 670        /* Socket Filtering Instructions */
 671        struct sk_filter        *filter;
 672#endif /* CONFIG_FILTER */
 673
 674        /* This is where all the private (optional) areas that don't
 675         * overlap will eventually live. 
 676         */
 677        union {
 678                void *destruct_hook;
 679                struct unix_opt af_unix;
 680#if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
 681                struct inet_opt af_inet;
 682#endif
 683#if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
 684                struct atalk_sock       af_at;
 685#endif
 686#if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
 687                struct ipx_opt          af_ipx;
 688#endif
 689#if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
 690                struct dn_scp           dn;
 691#endif
 692#if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
 693                struct packet_opt       *af_packet;
 694#endif
 695#if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
 696                x25_cb                  *x25;
 697#endif
 698#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
 699                ax25_cb                 *ax25;
 700#endif
 701#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
 702                nr_cb                   *nr;
 703#endif
 704#if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
 705                rose_cb                 *rose;
 706#endif
 707#if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
 708                struct pppox_opt        *pppox;
 709#endif
 710                struct netlink_opt      *af_netlink;
 711#if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
 712                struct econet_opt       *af_econet;
 713#endif
 714#if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
 715                struct atm_vcc          *af_atm;
 716#endif
 717#if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
 718                struct irda_sock        *irda;
 719#endif
 720#if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
 721               struct wanpipe_opt      *af_wanpipe;
 722#endif
 723        } protinfo;             
 724
 725
 726        /* This part is used for the timeout functions. */
 727        struct timer_list       timer;          /* This is the sock cleanup timer. */
 728        struct timeval          stamp;
 729
 730        /* Identd and reporting IO signals */
 731        struct socket           *socket;
 732
 733        /* RPC layer private data */
 734        void                    *user_data;
 735  
 736        /* Callbacks */
 737        void                    (*state_change)(struct sock *sk);
 738        void                    (*data_ready)(struct sock *sk,int bytes);
 739        void                    (*write_space)(struct sock *sk);
 740        void                    (*error_report)(struct sock *sk);
 741
 742        int                     (*backlog_rcv) (struct sock *sk,
 743                                                struct sk_buff *skb);  
 744        void                    (*destruct)(struct sock *sk);
 745};
 746
 747/* The per-socket spinlock must be held here. */
 748#define sk_add_backlog(__sk, __skb)                     \
 749do {    if((__sk)->backlog.tail == NULL) {              \
 750                (__sk)->backlog.head =                  \
 751                     (__sk)->backlog.tail = (__skb);    \
 752        } else {                                        \
 753                ((__sk)->backlog.tail)->next = (__skb); \
 754                (__sk)->backlog.tail = (__skb);         \
 755        }                                               \
 756        (__skb)->next = NULL;                           \
 757} while(0)
 758
 759/* IP protocol blocks we attach to sockets.
 760 * socket layer -> transport layer interface
 761 * transport -> network interface is defined by struct inet_proto
 762 */
 763struct proto {
 764        void                    (*close)(struct sock *sk, 
 765                                        long timeout);
 766        int                     (*connect)(struct sock *sk,
 767                                        struct sockaddr *uaddr, 
 768                                        int addr_len);
 769        int                     (*disconnect)(struct sock *sk, int flags);
 770
 771        struct sock *           (*accept) (struct sock *sk, int flags, int *err);
 772
 773        int                     (*ioctl)(struct sock *sk, int cmd,
 774                                         unsigned long arg);
 775        int                     (*init)(struct sock *sk);
 776        int                     (*destroy)(struct sock *sk);
 777        void                    (*shutdown)(struct sock *sk, int how);
 778        int                     (*setsockopt)(struct sock *sk, int level, 
 779                                        int optname, char *optval, int optlen);
 780        int                     (*getsockopt)(struct sock *sk, int level, 
 781                                        int optname, char *optval, 
 782                                        int *option);    
 783        int                     (*sendmsg)(struct sock *sk, struct msghdr *msg,
 784                                           int len);
 785        int                     (*recvmsg)(struct sock *sk, struct msghdr *msg,
 786                                        int len, int noblock, int flags, 
 787                                        int *addr_len);
 788        int                     (*bind)(struct sock *sk, 
 789                                        struct sockaddr *uaddr, int addr_len);
 790
 791        int                     (*backlog_rcv) (struct sock *sk, 
 792                                                struct sk_buff *skb);
 793
 794        /* Keeping track of sk's, looking them up, and port selection methods. */
 795        void                    (*hash)(struct sock *sk);
 796        void                    (*unhash)(struct sock *sk);
 797        int                     (*get_port)(struct sock *sk, unsigned short snum);
 798
 799        char                    name[32];
 800
 801        struct {
 802                int inuse;
 803                u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
 804        } stats[NR_CPUS];
 805};
 806
 807/* Called with local bh disabled */
 808static __inline__ void sock_prot_inc_use(struct proto *prot)
 809{
 810        prot->stats[smp_processor_id()].inuse++;
 811}
 812
 813static __inline__ void sock_prot_dec_use(struct proto *prot)
 814{
 815        prot->stats[smp_processor_id()].inuse--;
 816}
 817
 818/* About 10 seconds */
 819#define SOCK_DESTROY_TIME (10*HZ)
 820
 821/* Sockets 0-1023 can't be bound to unless you are superuser */
 822#define PROT_SOCK       1024
 823
 824#define SHUTDOWN_MASK   3
 825#define RCV_SHUTDOWN    1
 826#define SEND_SHUTDOWN   2
 827
 828#define SOCK_SNDBUF_LOCK        1
 829#define SOCK_RCVBUF_LOCK        2
 830#define SOCK_BINDADDR_LOCK      4
 831#define SOCK_BINDPORT_LOCK      8
 832
 833
 834/* Used by processes to "lock" a socket state, so that
 835 * interrupts and bottom half handlers won't change it
 836 * from under us. It essentially blocks any incoming
 837 * packets, so that we won't get any new data or any
 838 * packets that change the state of the socket.
 839 *
 840 * While locked, BH processing will add new packets to
 841 * the backlog queue.  This queue is processed by the
 842 * owner of the socket lock right before it is released.
 843 *
 844 * Since ~2.3.5 it is also exclusive sleep lock serializing
 845 * accesses from user process context.
 846 */
 847extern void __lock_sock(struct sock *sk);
 848extern void __release_sock(struct sock *sk);
 849#define lock_sock(__sk) \
 850do {    spin_lock_bh(&((__sk)->lock.slock)); \
 851        if ((__sk)->lock.users != 0) \
 852                __lock_sock(__sk); \
 853        (__sk)->lock.users = 1; \
 854        spin_unlock_bh(&((__sk)->lock.slock)); \
 855} while(0)
 856
 857#define release_sock(__sk) \
 858do {    spin_lock_bh(&((__sk)->lock.slock)); \
 859        if ((__sk)->backlog.tail != NULL) \
 860                __release_sock(__sk); \
 861        (__sk)->lock.users = 0; \
 862        if (waitqueue_active(&((__sk)->lock.wq))) wake_up(&((__sk)->lock.wq)); \
 863        spin_unlock_bh(&((__sk)->lock.slock)); \
 864} while(0)
 865
 866/* BH context may only use the following locking interface. */
 867#define bh_lock_sock(__sk)      spin_lock(&((__sk)->lock.slock))
 868#define bh_unlock_sock(__sk)    spin_unlock(&((__sk)->lock.slock))
 869
 870extern struct sock *            sk_alloc(int family, int priority, int zero_it);
 871extern void                     sk_free(struct sock *sk);
 872
 873extern struct sk_buff           *sock_wmalloc(struct sock *sk,
 874                                              unsigned long size, int force,
 875                                              int priority);
 876extern struct sk_buff           *sock_rmalloc(struct sock *sk,
 877                                              unsigned long size, int force,
 878                                              int priority);
 879extern void                     sock_wfree(struct sk_buff *skb);
 880extern void                     sock_rfree(struct sk_buff *skb);
 881
 882extern int                      sock_setsockopt(struct socket *sock, int level,
 883                                                int op, char *optval,
 884                                                int optlen);
 885
 886extern int                      sock_getsockopt(struct socket *sock, int level,
 887                                                int op, char *optval, 
 888                                                int *optlen);
 889extern struct sk_buff           *sock_alloc_send_skb(struct sock *sk,
 890                                                     unsigned long size,
 891                                                     int noblock,
 892                                                     int *errcode);
 893extern struct sk_buff           *sock_alloc_send_pskb(struct sock *sk,
 894                                                      unsigned long header_len,
 895                                                      unsigned long data_len,
 896                                                      int noblock,
 897                                                      int *errcode);
 898extern void *sock_kmalloc(struct sock *sk, int size, int priority);
 899extern void sock_kfree_s(struct sock *sk, void *mem, int size);
 900
 901/*
 902 * Functions to fill in entries in struct proto_ops when a protocol
 903 * does not implement a particular function.
 904 */
 905extern int                      sock_no_release(struct socket *);
 906extern int                      sock_no_bind(struct socket *, 
 907                                             struct sockaddr *, int);
 908extern int                      sock_no_connect(struct socket *,
 909                                                struct sockaddr *, int, int);
 910extern int                      sock_no_socketpair(struct socket *,
 911                                                   struct socket *);
 912extern int                      sock_no_accept(struct socket *,
 913                                               struct socket *, int);
 914extern int                      sock_no_getname(struct socket *,
 915                                                struct sockaddr *, int *, int);
 916extern unsigned int             sock_no_poll(struct file *, struct socket *,
 917                                             struct poll_table_struct *);
 918extern int                      sock_no_ioctl(struct socket *, unsigned int,
 919                                              unsigned long);
 920extern int                      sock_no_listen(struct socket *, int);
 921extern int                      sock_no_shutdown(struct socket *, int);
 922extern int                      sock_no_getsockopt(struct socket *, int , int,
 923                                                   char *, int *);
 924extern int                      sock_no_setsockopt(struct socket *, int, int,
 925                                                   char *, int);
 926extern int                      sock_no_fcntl(struct socket *, 
 927                                              unsigned int, unsigned long);
 928extern int                      sock_no_sendmsg(struct socket *,
 929                                                struct msghdr *, int,
 930                                                struct scm_cookie *);
 931extern int                      sock_no_recvmsg(struct socket *,
 932                                                struct msghdr *, int, int,
 933                                                struct scm_cookie *);
 934extern int                      sock_no_mmap(struct file *file,
 935                                             struct socket *sock,
 936                                             struct vm_area_struct *vma);
 937extern ssize_t                  sock_no_sendpage(struct socket *sock,
 938                                                struct page *page,
 939                                                int offset, size_t size, 
 940                                                int flags);
 941
 942/*
 943 *      Default socket callbacks and setup code
 944 */
 945 
 946extern void sock_def_destruct(struct sock *);
 947
 948/* Initialise core socket variables */
 949extern void sock_init_data(struct socket *sock, struct sock *sk);
 950
 951extern void sklist_remove_socket(struct sock **list, struct sock *sk);
 952extern void sklist_insert_socket(struct sock **list, struct sock *sk);
 953extern void sklist_destroy_socket(struct sock **list, struct sock *sk);
 954
 955#ifdef CONFIG_FILTER
 956
 957/**
 958 *      sk_filter - run a packet through a socket filter
 959 *      @sk: sock associated with &sk_buff
 960 *      @skb: buffer to filter
 961 *      @needlock: set to 1 if the sock is not locked by caller.
 962 *
 963 * Run the filter code and then cut skb->data to correct size returned by
 964 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 965 * than pkt_len we keep whole skb->data. This is the socket level
 966 * wrapper to sk_run_filter. It returns 0 if the packet should
 967 * be accepted or -EPERM if the packet should be tossed.
 968 */
 969
 970static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
 971{
 972        int err = 0;
 973
 974        if (sk->filter) {
 975                struct sk_filter *filter;
 976                
 977                if (needlock)
 978                        bh_lock_sock(sk);
 979                
 980                filter = sk->filter;
 981                if (filter) {
 982                        int pkt_len = sk_run_filter(skb, filter->insns,
 983                                                    filter->len);
 984                        if (!pkt_len)
 985                                err = -EPERM;
 986                        else
 987                                skb_trim(skb, pkt_len);
 988                }
 989
 990                if (needlock)
 991                        bh_unlock_sock(sk);
 992        }
 993        return err;
 994}
 995
 996/**
 997 *      sk_filter_release: Release a socket filter
 998 *      @sk: socket
 999 *      @fp: filter to remove
1000 *
1001 *      Remove a filter from a socket and release its resources.
1002 */
1003 
1004static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp)
1005{
1006        unsigned int size = sk_filter_len(fp);
1007
1008        atomic_sub(size, &sk->omem_alloc);
1009
1010        if (atomic_dec_and_test(&fp->refcnt))
1011                kfree(fp);
1012}
1013
1014static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1015{
1016        atomic_inc(&fp->refcnt);
1017        atomic_add(sk_filter_len(fp), &sk->omem_alloc);
1018}
1019
1020#else
1021
1022static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
1023{
1024        return 0;
1025}
1026
1027#endif /* CONFIG_FILTER */
1028
1029/*
1030 * Socket reference counting postulates.
1031 *
1032 * * Each user of socket SHOULD hold a reference count.
1033 * * Each access point to socket (an hash table bucket, reference from a list,
1034 *   running timer, skb in flight MUST hold a reference count.
1035 * * When reference count hits 0, it means it will never increase back.
1036 * * When reference count hits 0, it means that no references from
1037 *   outside exist to this socket and current process on current CPU
1038 *   is last user and may/should destroy this socket.
1039 * * sk_free is called from any context: process, BH, IRQ. When
1040 *   it is called, socket has no references from outside -> sk_free
1041 *   may release descendant resources allocated by the socket, but
1042 *   to the time when it is called, socket is NOT referenced by any
1043 *   hash tables, lists etc.
1044 * * Packets, delivered from outside (from network or from another process)
1045 *   and enqueued on receive/error queues SHOULD NOT grab reference count,
1046 *   when they sit in queue. Otherwise, packets will leak to hole, when
1047 *   socket is looked up by one cpu and unhasing is made by another CPU.
1048 *   It is true for udp/raw, netlink (leak to receive and error queues), tcp
1049 *   (leak to backlog). Packet socket does all the processing inside
1050 *   BR_NETPROTO_LOCK, so that it has not this race condition. UNIX sockets
1051 *   use separate SMP lock, so that they are prone too.
1052 */
1053
1054/* Grab socket reference count. This operation is valid only
1055   when sk is ALREADY grabbed f.e. it is found in hash table
1056   or a list and the lookup is made under lock preventing hash table
1057   modifications.
1058 */
1059
1060static inline void sock_hold(struct sock *sk)
1061{
1062        atomic_inc(&sk->refcnt);
1063}
1064
1065/* Ungrab socket in the context, which assumes that socket refcnt
1066   cannot hit zero, f.e. it is true in context of any socketcall.
1067 */
1068static inline void __sock_put(struct sock *sk)
1069{
1070        atomic_dec(&sk->refcnt);
1071}
1072
1073/* Ungrab socket and destroy it, if it was the last reference. */
1074static inline void sock_put(struct sock *sk)
1075{
1076        if (atomic_dec_and_test(&sk->refcnt))
1077                sk_free(sk);
1078}
1079
1080/* Detach socket from process context.
1081 * Announce socket dead, detach it from wait queue and inode.
1082 * Note that parent inode held reference count on this struct sock,
1083 * we do not release it in this function, because protocol
1084 * probably wants some additional cleanups or even continuing
1085 * to work with this socket (TCP).
1086 */
1087static inline void sock_orphan(struct sock *sk)
1088{
1089        write_lock_bh(&sk->callback_lock);
1090        sk->dead = 1;
1091        sk->socket = NULL;
1092        sk->sleep = NULL;
1093        write_unlock_bh(&sk->callback_lock);
1094}
1095
1096static inline void sock_graft(struct sock *sk, struct socket *parent)
1097{
1098        write_lock_bh(&sk->callback_lock);
1099        sk->sleep = &parent->wait;
1100        parent->sk = sk;
1101        sk->socket = parent;
1102        write_unlock_bh(&sk->callback_lock);
1103}
1104
1105static inline int sock_i_uid(struct sock *sk)
1106{
1107        int uid;
1108
1109        read_lock(&sk->callback_lock);
1110        uid = sk->socket ? sk->socket->inode->i_uid : 0;
1111        read_unlock(&sk->callback_lock);
1112        return uid;
1113}
1114
1115static inline unsigned long sock_i_ino(struct sock *sk)
1116{
1117        unsigned long ino;
1118
1119        read_lock(&sk->callback_lock);
1120        ino = sk->socket ? sk->socket->inode->i_ino : 0;
1121        read_unlock(&sk->callback_lock);
1122        return ino;
1123}
1124
1125static inline struct dst_entry *
1126__sk_dst_get(struct sock *sk)
1127{
1128        return sk->dst_cache;
1129}
1130
1131static inline struct dst_entry *
1132sk_dst_get(struct sock *sk)
1133{
1134        struct dst_entry *dst;
1135
1136        read_lock(&sk->dst_lock);
1137        dst = sk->dst_cache;
1138        if (dst)
1139                dst_hold(dst);
1140        read_unlock(&sk->dst_lock);
1141        return dst;
1142}
1143
1144static inline void
1145__sk_dst_set(struct sock *sk, struct dst_entry *dst)
1146{
1147        struct dst_entry *old_dst;
1148
1149        old_dst = sk->dst_cache;
1150        sk->dst_cache = dst;
1151        dst_release(old_dst);
1152}
1153
1154static inline void
1155sk_dst_set(struct sock *sk, struct dst_entry *dst)
1156{
1157        write_lock(&sk->dst_lock);
1158        __sk_dst_set(sk, dst);
1159        write_unlock(&sk->dst_lock);
1160}
1161
1162static inline void
1163__sk_dst_reset(struct sock *sk)
1164{
1165        struct dst_entry *old_dst;
1166
1167        old_dst = sk->dst_cache;
1168        sk->dst_cache = NULL;
1169        dst_release(old_dst);
1170}
1171
1172static inline void
1173sk_dst_reset(struct sock *sk)
1174{
1175        write_lock(&sk->dst_lock);
1176        __sk_dst_reset(sk);
1177        write_unlock(&sk->dst_lock);
1178}
1179
1180static inline struct dst_entry *
1181__sk_dst_check(struct sock *sk, u32 cookie)
1182{
1183        struct dst_entry *dst = sk->dst_cache;
1184
1185        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1186                sk->dst_cache = NULL;
1187                return NULL;
1188        }
1189
1190        return dst;
1191}
1192
1193static inline struct dst_entry *
1194sk_dst_check(struct sock *sk, u32 cookie)
1195{
1196        struct dst_entry *dst = sk_dst_get(sk);
1197
1198        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
1199                sk_dst_reset(sk);
1200                return NULL;
1201        }
1202
1203        return dst;
1204}
1205
1206
1207/*
1208 *      Queue a received datagram if it will fit. Stream and sequenced
1209 *      protocols can't normally use this as they need to fit buffers in
1210 *      and play with them.
1211 *
1212 *      Inlined as it's very short and called for pretty much every
1213 *      packet ever received.
1214 */
1215
1216static inline void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1217{
1218        sock_hold(sk);
1219        skb->sk = sk;
1220        skb->destructor = sock_wfree;
1221        atomic_add(skb->truesize, &sk->wmem_alloc);
1222}
1223
1224static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
1225{
1226        skb->sk = sk;
1227        skb->destructor = sock_rfree;
1228        atomic_add(skb->truesize, &sk->rmem_alloc);
1229}
1230
1231static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1232{
1233        int err = 0;
1234        int skb_len;
1235
1236        /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1237           number of warnings when compiling with -W --ANK
1238         */
1239        if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf) {
1240                err = -ENOMEM;
1241                goto out;
1242        }
1243
1244        /* It would be deadlock, if sock_queue_rcv_skb is used
1245           with socket lock! We assume that users of this
1246           function are lock free.
1247        */
1248        err = sk_filter(sk, skb, 1);
1249        if (err)
1250                goto out;
1251
1252        skb->dev = NULL;
1253        skb_set_owner_r(skb, sk);
1254
1255        /* Cache the SKB length before we tack it onto the receive
1256         * queue.  Once it is added it no longer belongs to us and
1257         * may be freed by other threads of control pulling packets
1258         * from the queue.
1259         */
1260        skb_len = skb->len;
1261
1262        skb_queue_tail(&sk->receive_queue, skb);
1263        if (!sk->dead)
1264                sk->data_ready(sk,skb_len);
1265out:
1266        return err;
1267}
1268
1269static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
1270{
1271        /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
1272           number of warnings when compiling with -W --ANK
1273         */
1274        if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
1275                return -ENOMEM;
1276        skb_set_owner_r(skb, sk);
1277        skb_queue_tail(&sk->error_queue,skb);
1278        if (!sk->dead)
1279                sk->data_ready(sk,skb->len);
1280        return 0;
1281}
1282
1283/*
1284 *      Recover an error report and clear atomically
1285 */
1286 
1287static inline int sock_error(struct sock *sk)
1288{
1289        int err=xchg(&sk->err,0);
1290        return -err;
1291}
1292
1293static inline unsigned long sock_wspace(struct sock *sk)
1294{
1295        int amt = 0;
1296
1297        if (!(sk->shutdown & SEND_SHUTDOWN)) {
1298                amt = sk->sndbuf - atomic_read(&sk->wmem_alloc);
1299                if (amt < 0) 
1300                        amt = 0;
1301        }
1302        return amt;
1303}
1304
1305static inline void sk_wake_async(struct sock *sk, int how, int band)
1306{
1307        if (sk->socket && sk->socket->fasync_list)
1308                sock_wake_async(sk->socket, how, band);
1309}
1310
1311#define SOCK_MIN_SNDBUF 2048
1312#define SOCK_MIN_RCVBUF 256
1313
1314/*
1315 *      Default write policy as shown to user space via poll/select/SIGIO
1316 */
1317static inline int sock_writeable(struct sock *sk) 
1318{
1319        return atomic_read(&sk->wmem_alloc) < (sk->sndbuf / 2);
1320}
1321
1322static inline int gfp_any(void)
1323{
1324        return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
1325}
1326
1327static inline long sock_rcvtimeo(struct sock *sk, int noblock)
1328{
1329        return noblock ? 0 : sk->rcvtimeo;
1330}
1331
1332static inline long sock_sndtimeo(struct sock *sk, int noblock)
1333{
1334        return noblock ? 0 : sk->sndtimeo;
1335}
1336
1337static inline int sock_rcvlowat(struct sock *sk, int waitall, int len)
1338{
1339        return (waitall ? len : min_t(int, sk->rcvlowat, len)) ? : 1;
1340}
1341
1342/* Alas, with timeout socket operations are not restartable.
1343 * Compare this to poll().
1344 */
1345static inline int sock_intr_errno(long timeo)
1346{
1347        return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
1348}
1349
1350static __inline__ void
1351sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
1352{
1353        if (sk->rcvtstamp)
1354                put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(skb->stamp), &skb->stamp);
1355        else
1356                sk->stamp = skb->stamp;
1357}
1358
1359/* 
1360 *      Enable debug/info messages 
1361 */
1362
1363#if 0
1364#define NETDEBUG(x)     do { } while (0)
1365#else
1366#define NETDEBUG(x)     do { x; } while (0)
1367#endif
1368
1369/*
1370 * Macros for sleeping on a socket. Use them like this:
1371 *
1372 * SOCK_SLEEP_PRE(sk)
1373 * if (condition)
1374 *      schedule();
1375 * SOCK_SLEEP_POST(sk)
1376 *
1377 */
1378
1379#define SOCK_SLEEP_PRE(sk)      { struct task_struct *tsk = current; \
1380                                DECLARE_WAITQUEUE(wait, tsk); \
1381                                tsk->state = TASK_INTERRUPTIBLE; \
1382                                add_wait_queue((sk)->sleep, &wait); \
1383                                release_sock(sk);
1384
1385#define SOCK_SLEEP_POST(sk)     tsk->state = TASK_RUNNING; \
1386                                remove_wait_queue((sk)->sleep, &wait); \
1387                                lock_sock(sk); \
1388                                }
1389
1390extern __u32 sysctl_wmem_max;
1391extern __u32 sysctl_rmem_max;
1392
1393#endif  /* _SOCK_H */
1394
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.