linux/net/core/sock.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              Generic socket support routines. Memory allocators, socket lock/release
   7 *              handler for protocols to use and generic option handler.
   8 *
   9 *
  10 * Authors:     Ross Biro
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *              Florian La Roche, <flla@stud.uni-sb.de>
  13 *              Alan Cox, <A.Cox@swansea.ac.uk>
  14 *
  15 * Fixes:
  16 *              Alan Cox        :       Numerous verify_area() problems
  17 *              Alan Cox        :       Connecting on a connecting socket
  18 *                                      now returns an error for tcp.
  19 *              Alan Cox        :       sock->protocol is set correctly.
  20 *                                      and is not sometimes left as 0.
  21 *              Alan Cox        :       connect handles icmp errors on a
  22 *                                      connect properly. Unfortunately there
  23 *                                      is a restart syscall nasty there. I
  24 *                                      can't match BSD without hacking the C
  25 *                                      library. Ideas urgently sought!
  26 *              Alan Cox        :       Disallow bind() to addresses that are
  27 *                                      not ours - especially broadcast ones!!
  28 *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
  29 *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
  30 *                                      instead they leave that for the DESTROY timer.
  31 *              Alan Cox        :       Clean up error flag in accept
  32 *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
  33 *                                      was buggy. Put a remove_sock() in the handler
  34 *                                      for memory when we hit 0. Also altered the timer
  35 *                                      code. The ACK stuff can wait and needs major
  36 *                                      TCP layer surgery.
  37 *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
  38 *                                      and fixed timer/inet_bh race.
  39 *              Alan Cox        :       Added zapped flag for TCP
  40 *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
  41 *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
  42 *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
  43 *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
  44 *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
  45 *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
  46 *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
  47 *      Pauline Middelink       :       identd support
  48 *              Alan Cox        :       Fixed connect() taking signals I think.
  49 *              Alan Cox        :       SO_LINGER supported
  50 *              Alan Cox        :       Error reporting fixes
  51 *              Anonymous       :       inet_create tidied up (sk->reuse setting)
  52 *              Alan Cox        :       inet sockets don't set sk->type!
  53 *              Alan Cox        :       Split socket option code
  54 *              Alan Cox        :       Callbacks
  55 *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
  56 *              Alex            :       Removed restriction on inet fioctl
  57 *              Alan Cox        :       Splitting INET from NET core
  58 *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
  59 *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
  60 *              Alan Cox        :       Split IP from generic code
  61 *              Alan Cox        :       New kfree_skbmem()
  62 *              Alan Cox        :       Make SO_DEBUG superuser only.
  63 *              Alan Cox        :       Allow anyone to clear SO_DEBUG
  64 *                                      (compatibility fix)
  65 *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
  66 *              Alan Cox        :       Allocator for a socket is settable.
  67 *              Alan Cox        :       SO_ERROR includes soft errors.
  68 *              Alan Cox        :       Allow NULL arguments on some SO_ opts
  69 *              Alan Cox        :       Generic socket allocation to make hooks
  70 *                                      easier (suggested by Craig Metz).
  71 *              Michael Pall    :       SO_ERROR returns positive errno again
  72 *              Steve Whitehouse:       Added default destructor to free
  73 *                                      protocol private data.
  74 *              Steve Whitehouse:       Added various other default routines
  75 *                                      common to several socket families.
  76 *              Chris Evans     :       Call suser() check last on F_SETOWN
  77 *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
  78 *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
  79 *              Andi Kleen      :       Fix write_space callback
  80 *              Chris Evans     :       Security fixes - signedness again
  81 *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
  82 *
  83 * To Fix:
  84 *
  85 *
  86 *              This program is free software; you can redistribute it and/or
  87 *              modify it under the terms of the GNU General Public License
  88 *              as published by the Free Software Foundation; either version
  89 *              2 of the License, or (at your option) any later version.
  90 */
  91
  92#include <linux/capability.h>
  93#include <linux/errno.h>
  94#include <linux/types.h>
  95#include <linux/socket.h>
  96#include <linux/in.h>
  97#include <linux/kernel.h>
  98#include <linux/module.h>
  99#include <linux/proc_fs.h>
 100#include <linux/seq_file.h>
 101#include <linux/sched.h>
 102#include <linux/timer.h>
 103#include <linux/string.h>
 104#include <linux/sockios.h>
 105#include <linux/net.h>
 106#include <linux/mm.h>
 107#include <linux/slab.h>
 108#include <linux/interrupt.h>
 109#include <linux/poll.h>
 110#include <linux/tcp.h>
 111#include <linux/init.h>
 112#include <linux/highmem.h>
 113#include <linux/user_namespace.h>
 114
 115#include <asm/uaccess.h>
 116#include <asm/system.h>
 117
 118#include <linux/netdevice.h>
 119#include <net/protocol.h>
 120#include <linux/skbuff.h>
 121#include <net/net_namespace.h>
 122#include <net/request_sock.h>
 123#include <net/sock.h>
 124#include <linux/net_tstamp.h>
 125#include <net/xfrm.h>
 126#include <linux/ipsec.h>
 127#include <net/cls_cgroup.h>
 128
 129#include <linux/filter.h>
 130
 131#include <trace/events/sock.h>
 132
 133#ifdef CONFIG_INET
 134#include <net/tcp.h>
 135#endif
 136
 137/*
 138 * Each address family might have different locking rules, so we have
 139 * one slock key per address family:
 140 */
 141static struct lock_class_key af_family_keys[AF_MAX];
 142static struct lock_class_key af_family_slock_keys[AF_MAX];
 143
 144/*
 145 * Make lock validator output more readable. (we pre-construct these
 146 * strings build-time, so that runtime initialization of socket
 147 * locks is fast):
 148 */
 149static const char *const af_family_key_strings[AF_MAX+1] = {
 150  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
 151  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
 152  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
 153  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
 154  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
 155  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
 156  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
 157  "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
 158  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
 159  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
 160  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
 161  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
 162  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
 163  "sk_lock-AF_NFC"   , "sk_lock-AF_MAX"
 164};
 165static const char *const af_family_slock_key_strings[AF_MAX+1] = {
 166  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
 167  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
 168  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
 169  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
 170  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
 171  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
 172  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
 173  "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
 174  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
 175  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
 176  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
 177  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
 178  "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
 179  "slock-AF_NFC"   , "slock-AF_MAX"
 180};
 181static const char *const af_family_clock_key_strings[AF_MAX+1] = {
 182  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
 183  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
 184  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
 185  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
 186  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
 187  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
 188  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
 189  "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
 190  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
 191  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
 192  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
 193  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
 194  "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
 195  "clock-AF_NFC"   , "clock-AF_MAX"
 196};
 197
 198/*
 199 * sk_callback_lock locking rules are per-address-family,
 200 * so split the lock classes by using a per-AF key:
 201 */
 202static struct lock_class_key af_callback_keys[AF_MAX];
 203
 204/* Take into consideration the size of the struct sk_buff overhead in the
 205 * determination of these values, since that is non-constant across
 206 * platforms.  This makes socket queueing behavior and performance
 207 * not depend upon such differences.
 208 */
 209#define _SK_MEM_PACKETS         256
 210#define _SK_MEM_OVERHEAD        SKB_TRUESIZE(256)
 211#define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 212#define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 213
 214/* Run time adjustable parameters. */
 215__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 216__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 217__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 218__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 219
 220/* Maximal space eaten by iovec or ancillary data plus some space */
 221int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 222EXPORT_SYMBOL(sysctl_optmem_max);
 223
 224#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
 225int net_cls_subsys_id = -1;
 226EXPORT_SYMBOL_GPL(net_cls_subsys_id);
 227#endif
 228
 229static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 230{
 231        struct timeval tv;
 232
 233        if (optlen < sizeof(tv))
 234                return -EINVAL;
 235        if (copy_from_user(&tv, optval, sizeof(tv)))
 236                return -EFAULT;
 237        if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
 238                return -EDOM;
 239
 240        if (tv.tv_sec < 0) {
 241                static int warned __read_mostly;
 242
 243                *timeo_p = 0;
 244                if (warned < 10 && net_ratelimit()) {
 245                        warned++;
 246                        printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
 247                               "tries to set negative timeout\n",
 248                                current->comm, task_pid_nr(current));
 249                }
 250                return 0;
 251        }
 252        *timeo_p = MAX_SCHEDULE_TIMEOUT;
 253        if (tv.tv_sec == 0 && tv.tv_usec == 0)
 254                return 0;
 255        if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
 256                *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
 257        return 0;
 258}
 259
 260static void sock_warn_obsolete_bsdism(const char *name)
 261{
 262        static int warned;
 263        static char warncomm[TASK_COMM_LEN];
 264        if (strcmp(warncomm, current->comm) && warned < 5) {
 265                strcpy(warncomm,  current->comm);
 266                printk(KERN_WARNING "process `%s' is using obsolete "
 267                       "%s SO_BSDCOMPAT\n", warncomm, name);
 268                warned++;
 269        }
 270}
 271
 272static void sock_disable_timestamp(struct sock *sk, int flag)
 273{
 274        if (sock_flag(sk, flag)) {
 275                sock_reset_flag(sk, flag);
 276                if (!sock_flag(sk, SOCK_TIMESTAMP) &&
 277                    !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
 278                        net_disable_timestamp();
 279                }
 280        }
 281}
 282
 283
 284int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 285{
 286        int err;
 287        int skb_len;
 288        unsigned long flags;
 289        struct sk_buff_head *list = &sk->sk_receive_queue;
 290
 291        if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
 292                atomic_inc(&sk->sk_drops);
 293                trace_sock_rcvqueue_full(sk, skb);
 294                return -ENOMEM;
 295        }
 296
 297        err = sk_filter(sk, skb);
 298        if (err)
 299                return err;
 300
 301        if (!sk_rmem_schedule(sk, skb->truesize)) {
 302                atomic_inc(&sk->sk_drops);
 303                return -ENOBUFS;
 304        }
 305
 306        skb->dev = NULL;
 307        skb_set_owner_r(skb, sk);
 308
 309        /* Cache the SKB length before we tack it onto the receive
 310         * queue.  Once it is added it no longer belongs to us and
 311         * may be freed by other threads of control pulling packets
 312         * from the queue.
 313         */
 314        skb_len = skb->len;
 315
 316        /* we escape from rcu protected region, make sure we dont leak
 317         * a norefcounted dst
 318         */
 319        skb_dst_force(skb);
 320
 321        spin_lock_irqsave(&list->lock, flags);
 322        skb->dropcount = atomic_read(&sk->sk_drops);
 323        __skb_queue_tail(list, skb);
 324        spin_unlock_irqrestore(&list->lock, flags);
 325
 326        if (!sock_flag(sk, SOCK_DEAD))
 327                sk->sk_data_ready(sk, skb_len);
 328        return 0;
 329}
 330EXPORT_SYMBOL(sock_queue_rcv_skb);
 331
 332int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 333{
 334        int rc = NET_RX_SUCCESS;
 335
 336        if (sk_filter(sk, skb))
 337                goto discard_and_relse;
 338
 339        skb->dev = NULL;
 340
 341        if (sk_rcvqueues_full(sk, skb)) {
 342                atomic_inc(&sk->sk_drops);
 343                goto discard_and_relse;
 344        }
 345        if (nested)
 346                bh_lock_sock_nested(sk);
 347        else
 348                bh_lock_sock(sk);
 349        if (!sock_owned_by_user(sk)) {
 350                /*
 351                 * trylock + unlock semantics:
 352                 */
 353                mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
 354
 355                rc = sk_backlog_rcv(sk, skb);
 356
 357                mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
 358        } else if (sk_add_backlog(sk, skb)) {
 359                bh_unlock_sock(sk);
 360                atomic_inc(&sk->sk_drops);
 361                goto discard_and_relse;
 362        }
 363
 364        bh_unlock_sock(sk);
 365out:
 366        sock_put(sk);
 367        return rc;
 368discard_and_relse:
 369        kfree_skb(skb);
 370        goto out;
 371}
 372EXPORT_SYMBOL(sk_receive_skb);
 373
 374void sk_reset_txq(struct sock *sk)
 375{
 376        sk_tx_queue_clear(sk);
 377}
 378EXPORT_SYMBOL(sk_reset_txq);
 379
 380struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 381{
 382        struct dst_entry *dst = __sk_dst_get(sk);
 383
 384        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 385                sk_tx_queue_clear(sk);
 386                RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
 387                dst_release(dst);
 388                return NULL;
 389        }
 390
 391        return dst;
 392}
 393EXPORT_SYMBOL(__sk_dst_check);
 394
 395struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 396{
 397        struct dst_entry *dst = sk_dst_get(sk);
 398
 399        if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 400                sk_dst_reset(sk);
 401                dst_release(dst);
 402                return NULL;
 403        }
 404
 405        return dst;
 406}
 407EXPORT_SYMBOL(sk_dst_check);
 408
 409static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
 410{
 411        int ret = -ENOPROTOOPT;
 412#ifdef CONFIG_NETDEVICES
 413        struct net *net = sock_net(sk);
 414        char devname[IFNAMSIZ];
 415        int index;
 416
 417        /* Sorry... */
 418        ret = -EPERM;
 419        if (!capable(CAP_NET_RAW))
 420                goto out;
 421
 422        ret = -EINVAL;
 423        if (optlen < 0)
 424                goto out;
 425
 426        /* Bind this socket to a particular device like "eth0",
 427         * as specified in the passed interface name. If the
 428         * name is "" or the option length is zero the socket
 429         * is not bound.
 430         */
 431        if (optlen > IFNAMSIZ - 1)
 432                optlen = IFNAMSIZ - 1;
 433        memset(devname, 0, sizeof(devname));
 434
 435        ret = -EFAULT;
 436        if (copy_from_user(devname, optval, optlen))
 437                goto out;
 438
 439        index = 0;
 440        if (devname[0] != '\0') {
 441                struct net_device *dev;
 442
 443                rcu_read_lock();
 444                dev = dev_get_by_name_rcu(net, devname);
 445                if (dev)
 446                        index = dev->ifindex;
 447                rcu_read_unlock();
 448                ret = -ENODEV;
 449                if (!dev)
 450                        goto out;
 451        }
 452
 453        lock_sock(sk);
 454        sk->sk_bound_dev_if = index;
 455        sk_dst_reset(sk);
 456        release_sock(sk);
 457
 458        ret = 0;
 459
 460out:
 461#endif
 462
 463        return ret;
 464}
 465
 466static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
 467{
 468        if (valbool)
 469                sock_set_flag(sk, bit);
 470        else
 471                sock_reset_flag(sk, bit);
 472}
 473
 474/*
 475 *      This is meant for all protocols to use and covers goings on
 476 *      at the socket level. Everything here is generic.
 477 */
 478
 479int sock_setsockopt(struct socket *sock, int level, int optname,
 480                    char __user *optval, unsigned int optlen)
 481{
 482        struct sock *sk = sock->sk;
 483        int val;
 484        int valbool;
 485        struct linger ling;
 486        int ret = 0;
 487
 488        /*
 489         *      Options without arguments
 490         */
 491
 492        if (optname == SO_BINDTODEVICE)
 493                return sock_bindtodevice(sk, optval, optlen);
 494
 495        if (optlen < sizeof(int))
 496                return -EINVAL;
 497
 498        if (get_user(val, (int __user *)optval))
 499                return -EFAULT;
 500
 501        valbool = val ? 1 : 0;
 502
 503        lock_sock(sk);
 504
 505        switch (optname) {
 506        case SO_DEBUG:
 507                if (val && !capable(CAP_NET_ADMIN))
 508                        ret = -EACCES;
 509                else
 510                        sock_valbool_flag(sk, SOCK_DBG, valbool);
 511                break;
 512        case SO_REUSEADDR:
 513                sk->sk_reuse = valbool;
 514                break;
 515        case SO_TYPE:
 516        case SO_PROTOCOL:
 517        case SO_DOMAIN:
 518        case SO_ERROR:
 519                ret = -ENOPROTOOPT;
 520                break;
 521        case SO_DONTROUTE:
 522                sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
 523                break;
 524        case SO_BROADCAST:
 525                sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
 526                break;
 527        case SO_SNDBUF:
 528                /* Don't error on this BSD doesn't and if you think
 529                   about it this is right. Otherwise apps have to
 530                   play 'guess the biggest size' games. RCVBUF/SNDBUF
 531                   are treated in BSD as hints */
 532
 533                if (val > sysctl_wmem_max)
 534                        val = sysctl_wmem_max;
 535set_sndbuf:
 536                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
 537                if ((val * 2) < SOCK_MIN_SNDBUF)
 538                        sk->sk_sndbuf = SOCK_MIN_SNDBUF;
 539                else
 540                        sk->sk_sndbuf = val * 2;
 541
 542                /*
 543                 *      Wake up sending tasks if we
 544                 *      upped the value.
 545                 */
 546                sk->sk_write_space(sk);
 547                break;
 548
 549        case SO_SNDBUFFORCE:
 550                if (!capable(CAP_NET_ADMIN)) {
 551                        ret = -EPERM;
 552                        break;
 553                }
 554                goto set_sndbuf;
 555
 556        case SO_RCVBUF:
 557                /* Don't error on this BSD doesn't and if you think
 558                   about it this is right. Otherwise apps have to
 559                   play 'guess the biggest size' games. RCVBUF/SNDBUF
 560                   are treated in BSD as hints */
 561
 562                if (val > sysctl_rmem_max)
 563                        val = sysctl_rmem_max;
 564set_rcvbuf:
 565                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 566                /*
 567                 * We double it on the way in to account for
 568                 * "struct sk_buff" etc. overhead.   Applications
 569                 * assume that the SO_RCVBUF setting they make will
 570                 * allow that much actual data to be received on that
 571                 * socket.
 572                 *
 573                 * Applications are unaware that "struct sk_buff" and
 574                 * other overheads allocate from the receive buffer
 575                 * during socket buffer allocation.
 576                 *
 577                 * And after considering the possible alternatives,
 578                 * returning the value we actually used in getsockopt
 579                 * is the most desirable behavior.
 580                 */
 581                if ((val * 2) < SOCK_MIN_RCVBUF)
 582                        sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
 583                else
 584                        sk->sk_rcvbuf = val * 2;
 585                break;
 586
 587        case SO_RCVBUFFORCE:
 588                if (!capable(CAP_NET_ADMIN)) {
 589                        ret = -EPERM;
 590                        break;
 591                }
 592                goto set_rcvbuf;
 593
 594        case SO_KEEPALIVE:
 595#ifdef CONFIG_INET
 596                if (sk->sk_protocol == IPPROTO_TCP)
 597                        tcp_set_keepalive(sk, valbool);
 598#endif
 599                sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
 600                break;
 601
 602        case SO_OOBINLINE:
 603                sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
 604                break;
 605
 606        case SO_NO_CHECK:
 607                sk->sk_no_check = valbool;
 608                break;
 609
 610        case SO_PRIORITY:
 611                if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
 612                        sk->sk_priority = val;
 613                else
 614                        ret = -EPERM;
 615                break;
 616
 617        case SO_LINGER:
 618                if (optlen < sizeof(ling)) {
 619                        ret = -EINVAL;  /* 1003.1g */
 620                        break;
 621                }
 622                if (copy_from_user(&ling, optval, sizeof(ling))) {
 623                        ret = -EFAULT;
 624                        break;
 625                }
 626                if (!ling.l_onoff)
 627                        sock_reset_flag(sk, SOCK_LINGER);
 628                else {
 629#if (BITS_PER_LONG == 32)
 630                        if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
 631                                sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
 632                        else
 633#endif
 634                                sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
 635                        sock_set_flag(sk, SOCK_LINGER);
 636                }
 637                break;
 638
 639        case SO_BSDCOMPAT:
 640                sock_warn_obsolete_bsdism("setsockopt");
 641                break;
 642
 643        case SO_PASSCRED:
 644                if (valbool)
 645                        set_bit(SOCK_PASSCRED, &sock->flags);
 646                else
 647                        clear_bit(SOCK_PASSCRED, &sock->flags);
 648                break;
 649
 650        case SO_TIMESTAMP:
 651        case SO_TIMESTAMPNS:
 652                if (valbool)  {
 653                        if (optname == SO_TIMESTAMP)
 654                                sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 655                        else
 656                                sock_set_flag(sk, SOCK_RCVTSTAMPNS);
 657                        sock_set_flag(sk, SOCK_RCVTSTAMP);
 658                        sock_enable_timestamp(sk, SOCK_TIMESTAMP);
 659                } else {
 660                        sock_reset_flag(sk, SOCK_RCVTSTAMP);
 661                        sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 662                }
 663                break;
 664
 665        case SO_TIMESTAMPING:
 666                if (val & ~SOF_TIMESTAMPING_MASK) {
 667                        ret = -EINVAL;
 668                        break;
 669                }
 670                sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
 671                                  val & SOF_TIMESTAMPING_TX_HARDWARE);
 672                sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
 673                                  val & SOF_TIMESTAMPING_TX_SOFTWARE);
 674                sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
 675                                  val & SOF_TIMESTAMPING_RX_HARDWARE);
 676                if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
 677                        sock_enable_timestamp(sk,
 678                                              SOCK_TIMESTAMPING_RX_SOFTWARE);
 679                else
 680                        sock_disable_timestamp(sk,
 681                                               SOCK_TIMESTAMPING_RX_SOFTWARE);
 682                sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
 683                                  val & SOF_TIMESTAMPING_SOFTWARE);
 684                sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
 685                                  val & SOF_TIMESTAMPING_SYS_HARDWARE);
 686                sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
 687                                  val & SOF_TIMESTAMPING_RAW_HARDWARE);
 688                break;
 689
 690        case SO_RCVLOWAT:
 691                if (val < 0)
 692                        val = INT_MAX;
 693                sk->sk_rcvlowat = val ? : 1;
 694                break;
 695
 696        case SO_RCVTIMEO:
 697                ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
 698                break;
 699
 700        case SO_SNDTIMEO:
 701                ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
 702                break;
 703
 704        case SO_ATTACH_FILTER:
 705                ret = -EINVAL;
 706                if (optlen == sizeof(struct sock_fprog)) {
 707                        struct sock_fprog fprog;
 708
 709                        ret = -EFAULT;
 710                        if (copy_from_user(&fprog, optval, sizeof(fprog)))
 711                                break;
 712
 713                        ret = sk_attach_filter(&fprog, sk);
 714                }
 715                break;
 716
 717        case SO_DETACH_FILTER:
 718                ret = sk_detach_filter(sk);
 719                break;
 720
 721        case SO_PASSSEC:
 722                if (valbool)
 723                        set_bit(SOCK_PASSSEC, &sock->flags);
 724                else
 725                        clear_bit(SOCK_PASSSEC, &sock->flags);
 726                break;
 727        case SO_MARK:
 728                if (!capable(CAP_NET_ADMIN))
 729                        ret = -EPERM;
 730                else
 731                        sk->sk_mark = val;
 732                break;
 733
 734                /* We implement the SO_SNDLOWAT etc to
 735                   not be settable (1003.1g 5.3) */
 736        case SO_RXQ_OVFL:
 737                sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
 738                break;
 739        default:
 740                ret = -ENOPROTOOPT;
 741                break;
 742        }
 743        release_sock(sk);
 744        return ret;
 745}
 746EXPORT_SYMBOL(sock_setsockopt);
 747
 748
 749void cred_to_ucred(struct pid *pid, const struct cred *cred,
 750                   struct ucred *ucred)
 751{
 752        ucred->pid = pid_vnr(pid);
 753        ucred->uid = ucred->gid = -1;
 754        if (cred) {
 755                struct user_namespace *current_ns = current_user_ns();
 756
 757                ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
 758                ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
 759        }
 760}
 761EXPORT_SYMBOL_GPL(cred_to_ucred);
 762
 763int sock_getsockopt(struct socket *sock, int level, int optname,
 764                    char __user *optval, int __user *optlen)
 765{
 766        struct sock *sk = sock->sk;
 767
 768        union {
 769                int val;
 770                struct linger ling;
 771                struct timeval tm;
 772        } v;
 773
 774        int lv = sizeof(int);
 775        int len;
 776
 777        if (get_user(len, optlen))
 778                return -EFAULT;
 779        if (len < 0)
 780                return -EINVAL;
 781
 782        memset(&v, 0, sizeof(v));
 783
 784        switch (optname) {
 785        case SO_DEBUG:
 786                v.val = sock_flag(sk, SOCK_DBG);
 787                break;
 788
 789        case SO_DONTROUTE:
 790                v.val = sock_flag(sk, SOCK_LOCALROUTE);
 791                break;
 792
 793        case SO_BROADCAST:
 794                v.val = !!sock_flag(sk, SOCK_BROADCAST);
 795                break;
 796
 797        case SO_SNDBUF:
 798                v.val = sk->sk_sndbuf;
 799                break;
 800
 801        case SO_RCVBUF:
 802                v.val = sk->sk_rcvbuf;
 803                break;
 804
 805        case SO_REUSEADDR:
 806                v.val = sk->sk_reuse;
 807                break;
 808
 809        case SO_KEEPALIVE:
 810                v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
 811                break;
 812
 813        case SO_TYPE:
 814                v.val = sk->sk_type;
 815                break;
 816
 817        case SO_PROTOCOL:
 818                v.val = sk->sk_protocol;
 819                break;
 820
 821        case SO_DOMAIN:
 822                v.val = sk->sk_family;
 823                break;
 824
 825        case SO_ERROR:
 826                v.val = -sock_error(sk);
 827                if (v.val == 0)
 828                        v.val = xchg(&sk->sk_err_soft, 0);
 829                break;
 830
 831        case SO_OOBINLINE:
 832                v.val = !!sock_flag(sk, SOCK_URGINLINE);
 833                break;
 834
 835        case SO_NO_CHECK:
 836                v.val = sk->sk_no_check;
 837                break;
 838
 839        case SO_PRIORITY:
 840                v.val = sk->sk_priority;
 841                break;
 842
 843        case SO_LINGER:
 844                lv              = sizeof(v.ling);
 845                v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
 846                v.ling.l_linger = sk->sk_lingertime / HZ;
 847                break;
 848
 849        case SO_BSDCOMPAT:
 850                sock_warn_obsolete_bsdism("getsockopt");
 851                break;
 852
 853        case SO_TIMESTAMP:
 854                v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
 855                                !sock_flag(sk, SOCK_RCVTSTAMPNS);
 856                break;
 857
 858        case SO_TIMESTAMPNS:
 859                v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
 860                break;
 861
 862        case SO_TIMESTAMPING:
 863                v.val = 0;
 864                if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
 865                        v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
 866                if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
 867                        v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
 868                if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
 869                        v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
 870                if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
 871                        v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
 872                if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
 873                        v.val |= SOF_TIMESTAMPING_SOFTWARE;
 874                if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
 875                        v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
 876                if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
 877                        v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
 878                break;
 879
 880        case SO_RCVTIMEO:
 881                lv = sizeof(struct timeval);
 882                if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
 883                        v.tm.tv_sec = 0;
 884                        v.tm.tv_usec = 0;
 885                } else {
 886                        v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
 887                        v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
 888                }
 889                break;
 890
 891        case SO_SNDTIMEO:
 892                lv = sizeof(struct timeval);
 893                if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
 894                        v.tm.tv_sec = 0;
 895                        v.tm.tv_usec = 0;
 896                } else {
 897                        v.tm.tv_sec = sk->sk_sndtimeo / HZ;
 898                        v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
 899                }
 900                break;
 901
 902        case SO_RCVLOWAT:
 903                v.val = sk->sk_rcvlowat;
 904                break;
 905
 906        case SO_SNDLOWAT:
 907                v.val = 1;
 908                break;
 909
 910        case SO_PASSCRED:
 911                v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
 912                break;
 913
 914        case SO_PEERCRED:
 915        {
 916                struct ucred peercred;
 917                if (len > sizeof(peercred))
 918                        len = sizeof(peercred);
 919                cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
 920                if (copy_to_user(optval, &peercred, len))
 921                        return -EFAULT;
 922                goto lenout;
 923        }
 924
 925        case SO_PEERNAME:
 926        {
 927                char address[128];
 928
 929                if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
 930                        return -ENOTCONN;
 931                if (lv < len)
 932                        return -EINVAL;
 933                if (copy_to_user(optval, address, len))
 934                        return -EFAULT;
 935                goto lenout;
 936        }
 937
 938        /* Dubious BSD thing... Probably nobody even uses it, but
 939         * the UNIX standard wants it for whatever reason... -DaveM
 940         */
 941        case SO_ACCEPTCONN:
 942                v.val = sk->sk_state == TCP_LISTEN;
 943                break;
 944
 945        case SO_PASSSEC:
 946                v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
 947                break;
 948
 949        case SO_PEERSEC:
 950                return security_socket_getpeersec_stream(sock, optval, optlen, len);
 951
 952        case SO_MARK:
 953                v.val = sk->sk_mark;
 954                break;
 955
 956        case SO_RXQ_OVFL:
 957                v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
 958                break;
 959
 960        default:
 961                return -ENOPROTOOPT;
 962        }
 963
 964        if (len > lv)
 965                len = lv;
 966        if (copy_to_user(optval, &v, len))
 967                return -EFAULT;
 968lenout:
 969        if (put_user(len, optlen))
 970                return -EFAULT;
 971        return 0;
 972}
 973
 974/*
 975 * Initialize an sk_lock.
 976 *
 977 * (We also register the sk_lock with the lock validator.)
 978 */
 979static inline void sock_lock_init(struct sock *sk)
 980{
 981        sock_lock_init_class_and_name(sk,
 982                        af_family_slock_key_strings[sk->sk_family],
 983                        af_family_slock_keys + sk->sk_family,
 984                        af_family_key_strings[sk->sk_family],
 985                        af_family_keys + sk->sk_family);
 986}
 987
 988/*
 989 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
 990 * even temporarly, because of RCU lookups. sk_node should also be left as is.
 991 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
 992 */
 993static void sock_copy(struct sock *nsk, const struct sock *osk)
 994{
 995#ifdef CONFIG_SECURITY_NETWORK
 996        void *sptr = nsk->sk_security;
 997#endif
 998        memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
 999
1000        memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1001               osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1002
1003#ifdef CONFIG_SECURITY_NETWORK
1004        nsk->sk_security = sptr;
1005        security_sk_clone(osk, nsk);
1006#endif
1007}
1008
1009/*
1010 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
1011 * un-modified. Special care is taken when initializing object to zero.
1012 */
1013static inline void sk_prot_clear_nulls(struct sock *sk, int size)
1014{
1015        if (offsetof(struct sock, sk_node.next) != 0)
1016                memset(sk, 0, offsetof(struct sock, sk_node.next));
1017        memset(&sk->sk_node.pprev, 0,
1018               size - offsetof(struct sock, sk_node.pprev));
1019}
1020
1021void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1022{
1023        unsigned long nulls1, nulls2;
1024
1025        nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1026        nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1027        if (nulls1 > nulls2)
1028                swap(nulls1, nulls2);
1029
1030        if (nulls1 != 0)
1031                memset((char *)sk, 0, nulls1);
1032        memset((char *)sk + nulls1 + sizeof(void *), 0,
1033               nulls2 - nulls1 - sizeof(void *));
1034        memset((char *)sk + nulls2 + sizeof(void *), 0,
1035               size - nulls2 - sizeof(void *));
1036}
1037EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1038
1039static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1040                int family)
1041{
1042        struct sock *sk;
1043        struct kmem_cache *slab;
1044
1045        slab = prot->slab;
1046        if (slab != NULL) {
1047                sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1048                if (!sk)
1049                        return sk;
1050                if (priority & __GFP_ZERO) {
1051                        if (prot->clear_sk)
1052                                prot->clear_sk(sk, prot->obj_size);
1053                        else
1054                                sk_prot_clear_nulls(sk, prot->obj_size);
1055                }
1056        } else
1057                sk = kmalloc(prot->obj_size, priority);
1058
1059        if (sk != NULL) {
1060                kmemcheck_annotate_bitfield(sk, flags);
1061
1062                if (security_sk_alloc(sk, family, priority))
1063                        goto out_free;
1064
1065                if (!try_module_get(prot->owner))
1066                        goto out_free_sec;
1067                sk_tx_queue_clear(sk);
1068        }
1069
1070        return sk;
1071
1072out_free_sec:
1073        security_sk_free(sk);
1074out_free:
1075        if (slab != NULL)
1076                kmem_cache_free(slab, sk);
1077        else
1078                kfree(sk);
1079        return NULL;
1080}
1081
1082static void sk_prot_free(struct proto *prot, struct sock *sk)
1083{
1084        struct kmem_cache *slab;
1085        struct module *owner;
1086
1087        owner = prot->owner;
1088        slab = prot->slab;
1089
1090        security_sk_free(sk);
1091        if (slab != NULL)
1092                kmem_cache_free(slab, sk);
1093        else
1094                kfree(sk);
1095        module_put(owner);
1096}
1097
1098#ifdef CONFIG_CGROUPS
1099void sock_update_classid(struct sock *sk)
1100{
1101        u32 classid;
1102
1103        rcu_read_lock();  /* doing current task, which cannot vanish. */
1104        classid = task_cls_classid(current);
1105        rcu_read_unlock();
1106        if (classid && classid != sk->sk_classid)
1107                sk->sk_classid = classid;
1108}
1109EXPORT_SYMBOL(sock_update_classid);
1110#endif
1111
1112/**
1113 *      sk_alloc - All socket objects are allocated here
1114 *      @net: the applicable net namespace
1115 *      @family: protocol family
1116 *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1117 *      @prot: struct proto associated with this new sock instance
1118 */
1119struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1120                      struct proto *prot)
1121{
1122        struct sock *sk;
1123
1124        sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1125        if (sk) {
1126                sk->sk_family = family;
1127                /*
1128                 * See comment in struct sock definition to understand
1129                 * why we need sk_prot_creator -acme
1130                 */
1131                sk->sk_prot = sk->sk_prot_creator = prot;
1132                sock_lock_init(sk);
1133                sock_net_set(sk, get_net(net));
1134                atomic_set(&sk->sk_wmem_alloc, 1);
1135
1136                sock_update_classid(sk);
1137        }
1138
1139        return sk;
1140}
1141EXPORT_SYMBOL(sk_alloc);
1142
1143static void __sk_free(struct sock *sk)
1144{
1145        struct sk_filter *filter;
1146
1147        if (sk->sk_destruct)
1148                sk->sk_destruct(sk);
1149
1150        filter = rcu_dereference_check(sk->sk_filter,
1151                                       atomic_read(&sk->sk_wmem_alloc) == 0);
1152        if (filter) {
1153                sk_filter_uncharge(sk, filter);
1154                RCU_INIT_POINTER(sk->sk_filter, NULL);
1155        }
1156
1157        sock_disable_timestamp(sk, SOCK_TIMESTAMP);
1158        sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
1159
1160        if (atomic_read(&sk->sk_omem_alloc))
1161                printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
1162                       __func__, atomic_read(&sk->sk_omem_alloc));
1163
1164        if (sk->sk_peer_cred)
1165                put_cred(sk->sk_peer_cred);
1166        put_pid(sk->sk_peer_pid);
1167        put_net(sock_net(sk));
1168        sk_prot_free(sk->sk_prot_creator, sk);
1169}
1170
1171void sk_free(struct sock *sk)
1172{
1173        /*
1174         * We subtract one from sk_wmem_alloc and can know if
1175         * some packets are still in some tx queue.
1176         * If not null, sock_wfree() will call __sk_free(sk) later
1177         */
1178        if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1179                __sk_free(sk);
1180}
1181EXPORT_SYMBOL(sk_free);
1182
1183/*
1184 * Last sock_put should drop reference to sk->sk_net. It has already
1185 * been dropped in sk_change_net. Taking reference to stopping namespace
1186 * is not an option.
1187 * Take reference to a socket to remove it from hash _alive_ and after that
1188 * destroy it in the context of init_net.
1189 */
1190void sk_release_kernel(struct sock *sk)
1191{
1192        if (sk == NULL || sk->sk_socket == NULL)
1193                return;
1194
1195        sock_hold(sk);
1196        sock_release(sk->sk_socket);
1197        release_net(sock_net(sk));
1198        sock_net_set(sk, get_net(&init_net));
1199        sock_put(sk);
1200}
1201EXPORT_SYMBOL(sk_release_kernel);
1202
1203struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1204{
1205        struct sock *newsk;
1206
1207        newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1208        if (newsk != NULL) {
1209                struct sk_filter *filter;
1210
1211                sock_copy(newsk, sk);
1212
1213                /* SANITY */
1214                get_net(sock_net(newsk));
1215                sk_node_init(&newsk->sk_node);
1216                sock_lock_init(newsk);
1217                bh_lock_sock(newsk);
1218                newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
1219                newsk->sk_backlog.len = 0;
1220
1221                atomic_set(&newsk->sk_rmem_alloc, 0);
1222                /*
1223                 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1224                 */
1225                atomic_set(&newsk->sk_wmem_alloc, 1);
1226                atomic_set(&newsk->sk_omem_alloc, 0);
1227                skb_queue_head_init(&newsk->sk_receive_queue);
1228                skb_queue_head_init(&newsk->sk_write_queue);
1229#ifdef CONFIG_NET_DMA
1230                skb_queue_head_init(&newsk->sk_async_wait_queue);
1231#endif
1232
1233                spin_lock_init(&newsk->sk_dst_lock);
1234                rwlock_init(&newsk->sk_callback_lock);
1235                lockdep_set_class_and_name(&newsk->sk_callback_lock,
1236                                af_callback_keys + newsk->sk_family,
1237                                af_family_clock_key_strings[newsk->sk_family]);
1238
1239                newsk->sk_dst_cache     = NULL;
1240                newsk->sk_wmem_queued   = 0;
1241                newsk->sk_forward_alloc = 0;
1242                newsk->sk_send_head     = NULL;
1243                newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1244
1245                sock_reset_flag(newsk, SOCK_DONE);
1246                skb_queue_head_init(&newsk->sk_error_queue);
1247
1248                filter = rcu_dereference_protected(newsk->sk_filter, 1);
1249                if (filter != NULL)
1250                        sk_filter_charge(newsk, filter);
1251
1252                if (unlikely(xfrm_sk_clone_policy(newsk))) {
1253                        /* It is still raw copy of parent, so invalidate
1254                         * destructor and make plain sk_free() */
1255                        newsk->sk_destruct = NULL;
1256                        bh_unlock_sock(newsk);
1257                        sk_free(newsk);
1258                        newsk = NULL;
1259                        goto out;
1260                }
1261
1262                newsk->sk_err      = 0;
1263                newsk->sk_priority = 0;
1264                /*
1265                 * Before updating sk_refcnt, we must commit prior changes to memory
1266                 * (Documentation/RCU/rculist_nulls.txt for details)
1267                 */
1268                smp_wmb();
1269                atomic_set(&newsk->sk_refcnt, 2);
1270
1271                /*
1272                 * Increment the counter in the same struct proto as the master
1273                 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1274                 * is the same as sk->sk_prot->socks, as this field was copied
1275                 * with memcpy).
1276                 *
1277                 * This _changes_ the previous behaviour, where
1278                 * tcp_create_openreq_child always was incrementing the
1279                 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1280                 * to be taken into account in all callers. -acme
1281                 */
1282                sk_refcnt_debug_inc(newsk);
1283                sk_set_socket(newsk, NULL);
1284                newsk->sk_wq = NULL;
1285
1286                if (newsk->sk_prot->sockets_allocated)
1287                        percpu_counter_inc(newsk->sk_prot->sockets_allocated);
1288
1289                if (sock_flag(newsk, SOCK_TIMESTAMP) ||
1290                    sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1291                        net_enable_timestamp();
1292        }
1293out:
1294        return newsk;
1295}
1296EXPORT_SYMBOL_GPL(sk_clone);
1297
1298void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1299{
1300        __sk_dst_set(sk, dst);
1301        sk->sk_route_caps = dst->dev->features;
1302        if (sk->sk_route_caps & NETIF_F_GSO)
1303                sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1304        sk->sk_route_caps &= ~sk->sk_route_nocaps;
1305        if (sk_can_gso(sk)) {
1306                if (dst->header_len) {
1307                        sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1308                } else {
1309                        sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1310                        sk->sk_gso_max_size = dst->dev->gso_max_size;
1311                }
1312        }
1313}
1314EXPORT_SYMBOL_GPL(sk_setup_caps);
1315
1316void __init sk_init(void)
1317{
1318        if (totalram_pages <= 4096) {
1319                sysctl_wmem_max = 32767;
1320                sysctl_rmem_max = 32767;
1321                sysctl_wmem_default = 32767;
1322                sysctl_rmem_default = 32767;
1323        } else if (totalram_pages >= 131072) {
1324                sysctl_wmem_max = 131071;
1325                sysctl_rmem_max = 131071;
1326        }
1327}
1328
1329/*
1330 *      Simple resource managers for sockets.
1331 */
1332
1333
1334/*
1335 * Write buffer destructor automatically called from kfree_skb.
1336 */
1337void sock_wfree(struct sk_buff *skb)
1338{
1339        struct sock *sk = skb->sk;
1340        unsigned int len = skb->truesize;
1341
1342        if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1343                /*
1344                 * Keep a reference on sk_wmem_alloc, this will be released
1345                 * after sk_write_space() call
1346                 */
1347                atomic_sub(len - 1, &sk->sk_wmem_alloc);
1348                sk->sk_write_space(sk);
1349                len = 1;
1350        }
1351        /*
1352         * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1353         * could not do because of in-flight packets
1354         */
1355        if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1356                __sk_free(sk);
1357}
1358EXPORT_SYMBOL(sock_wfree);
1359
1360/*
1361 * Read buffer destructor automatically called from kfree_skb.
1362 */
1363void sock_rfree(struct sk_buff *skb)
1364{
1365        struct sock *sk = skb->sk;
1366        unsigned int len = skb->truesize;
1367
1368        atomic_sub(len, &sk->sk_rmem_alloc);
1369        sk_mem_uncharge(sk, len);
1370}
1371EXPORT_SYMBOL(sock_rfree);
1372
1373
1374int sock_i_uid(struct sock *sk)
1375{
1376        int uid;
1377
1378        read_lock_bh(&sk->sk_callback_lock);
1379        uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1380        read_unlock_bh(&sk->sk_callback_lock);
1381        return uid;
1382}
1383EXPORT_SYMBOL(sock_i_uid);
1384
1385unsigned long sock_i_ino(struct sock *sk)
1386{
1387        unsigned long ino;
1388
1389        read_lock_bh(&sk->sk_callback_lock);
1390        ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1391        read_unlock_bh(&sk->sk_callback_lock);
1392        return ino;
1393}
1394EXPORT_SYMBOL(sock_i_ino);
1395
1396/*
1397 * Allocate a skb from the socket's send buffer.
1398 */
1399struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1400                             gfp_t priority)
1401{
1402        if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1403                struct sk_buff *skb = alloc_skb(size, priority);
1404                if (skb) {
1405                        skb_set_owner_w(skb, sk);
1406                        return skb;
1407                }
1408        }
1409        return NULL;
1410}
1411EXPORT_SYMBOL(sock_wmalloc);
1412
1413/*
1414 * Allocate a skb from the socket's receive buffer.
1415 */
1416struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1417                             gfp_t priority)
1418{
1419        if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1420                struct sk_buff *skb = alloc_skb(size, priority);
1421                if (skb) {
1422                        skb_set_owner_r(skb, sk);
1423                        return skb;
1424                }
1425        }
1426        return NULL;
1427}
1428
1429/*
1430 * Allocate a memory block from the socket's option memory buffer.
1431 */
1432void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1433{
1434        if ((unsigned)size <= sysctl_optmem_max &&
1435            atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1436                void *mem;
1437                /* First do the add, to avoid the race if kmalloc
1438                 * might sleep.
1439                 */
1440                atomic_add(size, &sk->sk_omem_alloc);
1441                mem = kmalloc(size, priority);
1442                if (mem)
1443                        return mem;
1444                atomic_sub(size, &sk->sk_omem_alloc);
1445        }
1446        return NULL;
1447}
1448EXPORT_SYMBOL(sock_kmalloc);
1449
1450/*
1451 * Free an option memory block.
1452 */
1453void sock_kfree_s(struct sock *sk, void *mem, int size)
1454{
1455        kfree(mem);
1456        atomic_sub(size, &sk->sk_omem_alloc);
1457}
1458EXPORT_SYMBOL(sock_kfree_s);
1459
1460/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1461   I think, these locks should be removed for datagram sockets.
1462 */
1463static long sock_wait_for_wmem(struct sock *sk, long timeo)
1464{
1465        DEFINE_WAIT(wait);
1466
1467        clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1468        for (;;) {
1469                if (!timeo)
1470                        break;
1471                if (signal_pending(current))
1472                        break;
1473                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1474                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1475                if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1476                        break;
1477                if (sk->sk_shutdown & SEND_SHUTDOWN)
1478                        break;
1479                if (sk->sk_err)
1480                        break;
1481                timeo = schedule_timeout(timeo);
1482        }
1483        finish_wait(sk_sleep(sk), &wait);
1484        return timeo;
1485}
1486
1487
1488/*
1489 *      Generic send/receive buffer handlers
1490 */
1491
1492struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1493                                     unsigned long data_len, int noblock,
1494                                     int *errcode)
1495{
1496        struct sk_buff *skb;
1497        gfp_t gfp_mask;
1498        long timeo;
1499        int err;
1500
1501        gfp_mask = sk->sk_allocation;
1502        if (gfp_mask & __GFP_WAIT)
1503                gfp_mask |= __GFP_REPEAT;
1504
1505        timeo = sock_sndtimeo(sk, noblock);
1506        while (1) {
1507                err = sock_error(sk);
1508                if (err != 0)
1509                        goto failure;
1510
1511                err = -EPIPE;
1512                if (sk->sk_shutdown & SEND_SHUTDOWN)
1513                        goto failure;
1514
1515                if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1516                        skb = alloc_skb(header_len, gfp_mask);
1517                        if (skb) {
1518                                int npages;
1519                                int i;
1520
1521                                /* No pages, we're done... */
1522                                if (!data_len)
1523                                        break;
1524
1525                                npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1526                                skb->truesize += data_len;
1527                                skb_shinfo(skb)->nr_frags = npages;
1528                                for (i = 0; i < npages; i++) {
1529                                        struct page *page;
1530
1531                                        page = alloc_pages(sk->sk_allocation, 0);
1532                                        if (!page) {
1533                                                err = -ENOBUFS;
1534                                                skb_shinfo(skb)->nr_frags = i;
1535                                                kfree_skb(skb);
1536                                                goto failure;
1537                                        }
1538
1539                                        __skb_fill_page_desc(skb, i,
1540                                                        page, 0,
1541                                                        (data_len >= PAGE_SIZE ?
1542                                                         PAGE_SIZE :
1543                                                         data_len));
1544                                        data_len -= PAGE_SIZE;
1545                                }
1546
1547                                /* Full success... */
1548                                break;
1549                        }
1550                        err = -ENOBUFS;
1551                        goto failure;
1552                }
1553                set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1554                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1555                err = -EAGAIN;
1556                if (!timeo)
1557                        goto failure;
1558                if (signal_pending(current))
1559                        goto interrupted;
1560                timeo = sock_wait_for_wmem(sk, timeo);
1561        }
1562
1563        skb_set_owner_w(skb, sk);
1564        return skb;
1565
1566interrupted:
1567        err = sock_intr_errno(timeo);
1568failure:
1569        *errcode = err;
1570        return NULL;
1571}
1572EXPORT_SYMBOL(sock_alloc_send_pskb);
1573
1574struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1575                                    int noblock, int *errcode)
1576{
1577        return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1578}
1579EXPORT_SYMBOL(sock_alloc_send_skb);
1580
1581static void __lock_sock(struct sock *sk)
1582        __releases(&sk->sk_lock.slock)
1583        __acquires(&sk->sk_lock.slock)
1584{
1585        DEFINE_WAIT(wait);
1586
1587        for (;;) {
1588                prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1589                                        TASK_UNINTERRUPTIBLE);
1590                spin_unlock_bh(&sk->sk_lock.slock);
1591                schedule();
1592                spin_lock_bh(&sk->sk_lock.slock);
1593                if (!sock_owned_by_user(sk))
1594                        break;
1595        }
1596        finish_wait(&sk->sk_lock.wq, &wait);
1597}
1598
1599static void __release_sock(struct sock *sk)
1600        __releases(&sk->sk_lock.slock)
1601        __acquires(&sk->sk_lock.slock)
1602{
1603        struct sk_buff *skb = sk->sk_backlog.head;
1604
1605        do {
1606                sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1607                bh_unlock_sock(sk);
1608
1609                do {
1610                        struct sk_buff *next = skb->next;
1611
1612                        WARN_ON_ONCE(skb_dst_is_noref(skb));
1613                        skb->next = NULL;
1614                        sk_backlog_rcv(sk, skb);
1615
1616                        /*
1617                         * We are in process context here with softirqs
1618                         * disabled, use cond_resched_softirq() to preempt.
1619                         * This is safe to do because we've taken the backlog
1620                         * queue private:
1621                         */
1622                        cond_resched_softirq();
1623
1624                        skb = next;
1625                } while (skb != NULL);
1626
1627                bh_lock_sock(sk);
1628        } while ((skb = sk->sk_backlog.head) != NULL);
1629
1630        /*
1631         * Doing the zeroing here guarantee we can not loop forever
1632         * while a wild producer attempts to flood us.
1633         */
1634        sk->sk_backlog.len = 0;
1635}
1636
1637/**
1638 * sk_wait_data - wait for data to arrive at sk_receive_queue
1639 * @sk:    sock to wait on
1640 * @timeo: for how long
1641 *
1642 * Now socket state including sk->sk_err is changed only under lock,
1643 * hence we may omit checks after joining wait queue.
1644 * We check receive queue before schedule() only as optimization;
1645 * it is very likely that release_sock() added new data.
1646 */
1647int sk_wait_data(struct sock *sk, long *timeo)
1648{
1649        int rc;
1650        DEFINE_WAIT(wait);
1651
1652        prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1653        set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1654        rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1655        clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1656        finish_wait(sk_sleep(sk), &wait);
1657        return rc;
1658}
1659EXPORT_SYMBOL(sk_wait_data);
1660
1661/**
1662 *      __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1663 *      @sk: socket
1664 *      @size: memory size to allocate
1665 *      @kind: allocation type
1666 *
1667 *      If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1668 *      rmem allocation. This function assumes that protocols which have
1669 *      memory_pressure use sk_wmem_queued as write buffer accounting.
1670 */
1671int __sk_mem_schedule(struct sock *sk, int size, int kind)
1672{
1673        struct proto *prot = sk->sk_prot;
1674        int amt = sk_mem_pages(size);
1675        long allocated;
1676
1677        sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1678        allocated = atomic_long_add_return(amt, prot->memory_allocated);
1679
1680        /* Under limit. */
1681        if (allocated <= prot->sysctl_mem[0]) {
1682                if (prot->memory_pressure && *prot->memory_pressure)
1683                        *prot->memory_pressure = 0;
1684                return 1;
1685        }
1686
1687        /* Under pressure. */
1688        if (allocated > prot->sysctl_mem[1])
1689                if (prot->enter_memory_pressure)
1690                        prot->enter_memory_pressure(sk);
1691
1692        /* Over hard limit. */
1693        if (allocated > prot->sysctl_mem[2])
1694                goto suppress_allocation;
1695
1696        /* guarantee minimum buffer size under pressure */
1697        if (kind == SK_MEM_RECV) {
1698                if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1699                        return 1;
1700        } else { /* SK_MEM_SEND */
1701                if (sk->sk_type == SOCK_STREAM) {
1702                        if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1703                                return 1;
1704                } else if (atomic_read(&sk->sk_wmem_alloc) <
1705                           prot->sysctl_wmem[0])
1706                                return 1;
1707        }
1708
1709        if (prot->memory_pressure) {
1710                int alloc;
1711
1712                if (!*prot->memory_pressure)
1713                        return 1;
1714                alloc = percpu_counter_read_positive(prot->sockets_allocated);
1715                if (prot->sysctl_mem[2] > alloc *
1716                    sk_mem_pages(sk->sk_wmem_queued +
1717                                 atomic_read(&sk->sk_rmem_alloc) +
1718                                 sk->sk_forward_alloc))
1719                        return 1;
1720        }
1721
1722suppress_allocation:
1723
1724        if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1725                sk_stream_moderate_sndbuf(sk);
1726
1727                /* Fail only if socket is _under_ its sndbuf.
1728                 * In this case we cannot block, so that we have to fail.
1729                 */
1730                if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1731                        return 1;
1732        }
1733
1734        trace_sock_exceed_buf_limit(sk, prot, allocated);
1735
1736        /* Alas. Undo changes. */
1737        sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1738        atomic_long_sub(amt, prot->memory_allocated);
1739        return 0;
1740}
1741EXPORT_SYMBOL(__sk_mem_schedule);
1742
1743/**
1744 *      __sk_reclaim - reclaim memory_allocated
1745 *      @sk: socket
1746 */
1747void __sk_mem_reclaim(struct sock *sk)
1748{
1749        struct proto *prot = sk->sk_prot;
1750
1751        atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
1752                   prot->memory_allocated);
1753        sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1754
1755        if (prot->memory_pressure && *prot->memory_pressure &&
1756            (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1757                *prot->memory_pressure = 0;
1758}
1759EXPORT_SYMBOL(__sk_mem_reclaim);
1760
1761
1762/*
1763 * Set of default routines for initialising struct proto_ops when
1764 * the protocol does not support a particular function. In certain
1765 * cases where it makes no sense for a protocol to have a "do nothing"
1766 * function, some default processing is provided.
1767 */
1768
1769int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1770{
1771        return -EOPNOTSUPP;
1772}
1773EXPORT_SYMBOL(sock_no_bind);
1774
1775int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1776                    int len, int flags)
1777{
1778        return -EOPNOTSUPP;
1779}
1780EXPORT_SYMBOL(sock_no_connect);
1781
1782int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1783{
1784        return -EOPNOTSUPP;
1785}
1786EXPORT_SYMBOL(sock_no_socketpair);
1787
1788int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1789{
1790        return -EOPNOTSUPP;
1791}
1792EXPORT_SYMBOL(sock_no_accept);
1793
1794int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1795                    int *len, int peer)
1796{
1797        return -EOPNOTSUPP;
1798}
1799EXPORT_SYMBOL(sock_no_getname);
1800
1801unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1802{
1803        return 0;
1804}
1805EXPORT_SYMBOL(sock_no_poll);
1806
1807int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1808{
1809        return -EOPNOTSUPP;
1810}
1811EXPORT_SYMBOL(sock_no_ioctl);
1812
1813int sock_no_listen(struct socket *sock, int backlog)
1814{
1815        return -EOPNOTSUPP;
1816}
1817EXPORT_SYMBOL(sock_no_listen);
1818
1819int sock_no_shutdown(struct socket *sock, int how)
1820{
1821        return -EOPNOTSUPP;
1822}
1823EXPORT_SYMBOL(sock_no_shutdown);
1824
1825int sock_no_setsockopt(struct socket *sock, int level, int optname,
1826                    char __user *optval, unsigned int optlen)
1827{
1828        return -EOPNOTSUPP;
1829}
1830EXPORT_SYMBOL(sock_no_setsockopt);
1831
1832int sock_no_getsockopt(struct socket *sock, int level, int optname,
1833                    char __user *optval, int __user *optlen)
1834{
1835        return -EOPNOTSUPP;
1836}
1837EXPORT_SYMBOL(sock_no_getsockopt);
1838
1839int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1840                    size_t len)
1841{
1842        return -EOPNOTSUPP;
1843}
1844EXPORT_SYMBOL(sock_no_sendmsg);
1845
1846int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1847                    size_t len, int flags)
1848{
1849        return -EOPNOTSUPP;
1850}
1851EXPORT_SYMBOL(sock_no_recvmsg);
1852
1853int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1854{
1855        /* Mirror missing mmap method error code */
1856        return -ENODEV;
1857}
1858EXPORT_SYMBOL(sock_no_mmap);
1859
1860ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1861{
1862        ssize_t res;
1863        struct msghdr msg = {.msg_flags = flags};
1864        struct kvec iov;
1865        char *kaddr = kmap(page);
1866        iov.iov_base = kaddr + offset;
1867        iov.iov_len = size;
1868        res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1869        kunmap(page);
1870        return res;
1871}
1872EXPORT_SYMBOL(sock_no_sendpage);
1873
1874/*
1875 *      Default Socket Callbacks
1876 */
1877
1878static void sock_def_wakeup(struct sock *sk)
1879{
1880        struct socket_wq *wq;
1881
1882        rcu_read_lock();
1883        wq = rcu_dereference(sk->sk_wq);
1884        if (wq_has_sleeper(wq))
1885                wake_up_interruptible_all(&wq->wait);
1886        rcu_read_unlock();
1887}
1888
1889static void sock_def_error_report(struct sock *sk)
1890{
1891        struct socket_wq *wq;
1892
1893        rcu_read_lock();
1894        wq = rcu_dereference(sk->sk_wq);
1895        if (wq_has_sleeper(wq))
1896                wake_up_interruptible_poll(&wq->wait, POLLERR);
1897        sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1898        rcu_read_unlock();
1899}
1900
1901static void sock_def_readable(struct sock *sk, int len)
1902{
1903        struct socket_wq *wq;
1904
1905        rcu_read_lock();
1906        wq = rcu_dereference(sk->sk_wq);
1907        if (wq_has_sleeper(wq))
1908                wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
1909                                                POLLRDNORM | POLLRDBAND);
1910        sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1911        rcu_read_unlock();
1912}
1913
1914static void sock_def_write_space(struct sock *sk)
1915{
1916        struct socket_wq *wq;
1917
1918        rcu_read_lock();
1919
1920        /* Do not wake up a writer until he can make "significant"
1921         * progress.  --DaveM
1922         */
1923        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1924                wq = rcu_dereference(sk->sk_wq);
1925                if (wq_has_sleeper(wq))
1926                        wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1927                                                POLLWRNORM | POLLWRBAND);
1928
1929                /* Should agree with poll, otherwise some programs break */
1930                if (sock_writeable(sk))
1931                        sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1932        }
1933
1934        rcu_read_unlock();
1935}
1936
1937static void sock_def_destruct(struct sock *sk)
1938{
1939        kfree(sk->sk_protinfo);
1940}
1941
1942void sk_send_sigurg(struct sock *sk)
1943{
1944        if (sk->sk_socket && sk->sk_socket->file)
1945                if (send_sigurg(&sk->sk_socket->file->f_owner))
1946                        sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1947}
1948EXPORT_SYMBOL(sk_send_sigurg);
1949
1950void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1951                    unsigned long expires)
1952{
1953        if (!mod_timer(timer, expires))
1954                sock_hold(sk);
1955}
1956EXPORT_SYMBOL(sk_reset_timer);
1957
1958void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1959{
1960        if (timer_pending(timer) && del_timer(timer))
1961                __sock_put(sk);
1962}
1963EXPORT_SYMBOL(sk_stop_timer);
1964
1965void sock_init_data(struct socket *sock, struct sock *sk)
1966{
1967        skb_queue_head_init(&sk->sk_receive_queue);
1968        skb_queue_head_init(&sk->sk_write_queue);
1969        skb_queue_head_init(&sk->sk_error_queue);
1970#ifdef CONFIG_NET_DMA
1971        skb_queue_head_init(&sk->sk_async_wait_queue);
1972#endif
1973
1974        sk->sk_send_head        =       NULL;
1975
1976        init_timer(&sk->sk_timer);
1977
1978        sk->sk_allocation       =       GFP_KERNEL;
1979        sk->sk_rcvbuf           =       sysctl_rmem_default;
1980        sk->sk_sndbuf           =       sysctl_wmem_default;
1981        sk->sk_state            =       TCP_CLOSE;
1982        sk_set_socket(sk, sock);
1983
1984        sock_set_flag(sk, SOCK_ZAPPED);
1985
1986        if (sock) {
1987                sk->sk_type     =       sock->type;
1988                sk->sk_wq       =       sock->wq;
1989                sock->sk        =       sk;
1990        } else
1991                sk->sk_wq       =       NULL;
1992
1993        spin_lock_init(&sk->sk_dst_lock);
1994        rwlock_init(&sk->sk_callback_lock);
1995        lockdep_set_class_and_name(&sk->sk_callback_lock,
1996                        af_callback_keys + sk->sk_family,
1997                        af_family_clock_key_strings[sk->sk_family]);
1998
1999        sk->sk_state_change     =       sock_def_wakeup;
2000        sk->sk_data_ready       =       sock_def_readable;
2001        sk->sk_write_space      =       sock_def_write_space;
2002        sk->sk_error_report     =       sock_def_error_report;
2003        sk->sk_destruct         =       sock_def_destruct;
2004
2005        sk->sk_sndmsg_page      =       NULL;
2006        sk->sk_sndmsg_off       =       0;
2007
2008        sk->sk_peer_pid         =       NULL;
2009        sk->sk_peer_cred        =       NULL;
2010        sk->sk_write_pending    =       0;
2011        sk->sk_rcvlowat         =       1;
2012        sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
2013        sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
2014
2015        sk->sk_stamp = ktime_set(-1L, 0);
2016
2017        /*
2018         * Before updating sk_refcnt, we must commit prior changes to memory
2019         * (Documentation/RCU/rculist_nulls.txt for details)
2020         */
2021        smp_wmb();
2022        atomic_set(&sk->sk_refcnt, 1);
2023        atomic_set(&sk->sk_drops, 0);
2024}
2025EXPORT_SYMBOL(sock_init_data);
2026
2027void lock_sock_nested(struct sock *sk, int subclass)
2028{
2029        might_sleep();
2030        spin_lock_bh(&sk->sk_lock.slock);
2031        if (sk->sk_lock.owned)
2032                __lock_sock(sk);
2033        sk->sk_lock.owned = 1;
2034        spin_unlock(&sk->sk_lock.slock);
2035        /*
2036         * The sk_lock has mutex_lock() semantics here:
2037         */
2038        mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2039        local_bh_enable();
2040}
2041EXPORT_SYMBOL(lock_sock_nested);
2042
2043void release_sock(struct sock *sk)
2044{
2045        /*
2046         * The sk_lock has mutex_unlock() semantics:
2047         */
2048        mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2049
2050        spin_lock_bh(&sk->sk_lock.slock);
2051        if (sk->sk_backlog.tail)
2052                __release_sock(sk);
2053        sk->sk_lock.owned = 0;
2054        if (waitqueue_active(&sk->sk_lock.wq))
2055                wake_up(&sk->sk_lock.wq);
2056        spin_unlock_bh(&sk->sk_lock.slock);
2057}
2058EXPORT_SYMBOL(release_sock);
2059
2060/**
2061 * lock_sock_fast - fast version of lock_sock
2062 * @sk: socket
2063 *
2064 * This version should be used for very small section, where process wont block
2065 * return false if fast path is taken
2066 *   sk_lock.slock locked, owned = 0, BH disabled
2067 * return true if slow path is taken
2068 *   sk_lock.slock unlocked, owned = 1, BH enabled
2069 */
2070bool lock_sock_fast(struct sock *sk)
2071{
2072        might_sleep();
2073        spin_lock_bh(&sk->sk_lock.slock);
2074
2075        if (!sk->sk_lock.owned)
2076                /*
2077                 * Note : We must disable BH
2078                 */
2079                return false;
2080
2081        __lock_sock(sk);
2082        sk->sk_lock.owned = 1;
2083        spin_unlock(&sk->sk_lock.slock);
2084        /*
2085         * The sk_lock has mutex_lock() semantics here:
2086         */
2087        mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2088        local_bh_enable();
2089        return true;
2090}
2091EXPORT_SYMBOL(lock_sock_fast);
2092
2093int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2094{
2095        struct timeval tv;
2096        if (!sock_flag(sk, SOCK_TIMESTAMP))
2097                sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2098        tv = ktime_to_timeval(sk->sk_stamp);
2099        if (tv.tv_sec == -1)
2100                return -ENOENT;
2101        if (tv.tv_sec == 0) {
2102                sk->sk_stamp = ktime_get_real();
2103                tv = ktime_to_timeval(sk->sk_stamp);
2104        }
2105        return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2106}
2107EXPORT_SYMBOL(sock_get_timestamp);
2108
2109int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2110{
2111        struct timespec ts;
2112        if (!sock_flag(sk, SOCK_TIMESTAMP))
2113                sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2114        ts = ktime_to_timespec(sk->sk_stamp);
2115        if (ts.tv_sec == -1)
2116                return -ENOENT;
2117        if (ts.tv_sec == 0) {
2118                sk->sk_stamp = ktime_get_real();
2119                ts = ktime_to_timespec(sk->sk_stamp);
2120        }
2121        return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2122}
2123EXPORT_SYMBOL(sock_get_timestampns);
2124
2125void sock_enable_timestamp(struct sock *sk, int flag)
2126{
2127        if (!sock_flag(sk, flag)) {
2128                sock_set_flag(sk, flag);
2129                /*
2130                 * we just set one of the two flags which require net
2131                 * time stamping, but time stamping might have been on
2132                 * already because of the other one
2133                 */
2134                if (!sock_flag(sk,
2135                                flag == SOCK_TIMESTAMP ?
2136                                SOCK_TIMESTAMPING_RX_SOFTWARE :
2137                                SOCK_TIMESTAMP))
2138                        net_enable_timestamp();
2139        }
2140}
2141
2142/*
2143 *      Get a socket option on an socket.
2144 *
2145 *      FIX: POSIX 1003.1g is very ambiguous here. It states that
2146 *      asynchronous errors should be reported by getsockopt. We assume
2147 *      this means if you specify SO_ERROR (otherwise whats the point of it).
2148 */
2149int sock_common_getsockopt(struct socket *sock, int level, int optname,
2150                           char __user *optval, int __user *optlen)
2151{
2152        struct sock *sk = sock->sk;
2153
2154        return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2155}
2156EXPORT_SYMBOL(sock_common_getsockopt);
2157
2158#ifdef CONFIG_COMPAT
2159int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2160                                  char __user *optval, int __user *optlen)
2161{
2162        struct sock *sk = sock->sk;
2163
2164        if (sk->sk_prot->compat_getsockopt != NULL)
2165                return sk->sk_prot->compat_getsockopt(sk, level, optname,
2166                                                      optval, optlen);
2167        return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2168}
2169EXPORT_SYMBOL(compat_sock_common_getsockopt);
2170#endif
2171
2172int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2173                        struct msghdr *msg, size_t size, int flags)
2174{
2175        struct sock *sk = sock->sk;
2176        int addr_len = 0;
2177        int err;
2178
2179        err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2180                                   flags & ~MSG_DONTWAIT, &addr_len);
2181        if (err >= 0)
2182                msg->msg_namelen = addr_len;
2183        return err;
2184}
2185EXPORT_SYMBOL(sock_common_recvmsg);
2186
2187/*
2188 *      Set socket options on an inet socket.
2189 */
2190int sock_common_setsockopt(struct socket *sock, int level, int optname,
2191                           char __user *optval, unsigned int optlen)
2192{
2193        struct sock *sk = sock->sk;
2194
2195        return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2196}
2197EXPORT_SYMBOL(sock_common_setsockopt);
2198
2199#ifdef CONFIG_COMPAT
2200int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2201                                  char __user *optval, unsigned int optlen)
2202{
2203        struct sock *sk = sock->sk;
2204
2205        if (sk->sk_prot->compat_setsockopt != NULL)
2206                return sk->sk_prot->compat_setsockopt(sk, level, optname,
2207                                                      optval, optlen);
2208        return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2209}
2210EXPORT_SYMBOL(compat_sock_common_setsockopt);
2211#endif
2212
2213void sk_common_release(struct sock *sk)
2214{
2215        if (sk->sk_prot->destroy)
2216                sk->sk_prot->destroy(sk);
2217
2218        /*
2219         * Observation: when sock_common_release is called, processes have
2220         * no access to socket. But net still has.
2221         * Step one, detach it from networking:
2222         *
2223         * A. Remove from hash tables.
2224         */
2225
2226        sk->sk_prot->unhash(sk);
2227
2228        /*
2229         * In this point socket cannot receive new packets, but it is possible
2230         * that some packets are in flight because some CPU runs receiver and
2231         * did hash table lookup before we unhashed socket. They will achieve
2232         * receive queue and will be purged by socket destructor.
2233         *
2234         * Also we still have packets pending on receive queue and probably,
2235         * our own packets waiting in device queues. sock_destroy will drain
2236         * receive queue, but transmitted packets will delay socket destruction
2237         * until the last reference will be released.
2238         */
2239
2240        sock_orphan(sk);
2241
2242        xfrm_sk_free_policy(sk);
2243
2244        sk_refcnt_debug_release(sk);
2245        sock_put(sk);
2246}
2247EXPORT_SYMBOL(sk_common_release);
2248
2249static DEFINE_RWLOCK(proto_list_lock);
2250static LIST_HEAD(proto_list);
2251
2252#ifdef CONFIG_PROC_FS
2253#define PROTO_INUSE_NR  64      /* should be enough for the first time */
2254struct prot_inuse {
2255        int val[PROTO_INUSE_NR];
2256};
2257
2258static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2259
2260#ifdef CONFIG_NET_NS
2261void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2262{
2263        __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2264}
2265EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2266
2267int sock_prot_inuse_get(struct net *net, struct proto *prot)
2268{
2269        int cpu, idx = prot->inuse_idx;
2270        int res = 0;
2271
2272        for_each_possible_cpu(cpu)
2273                res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2274
2275        return res >= 0 ? res : 0;
2276}
2277EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2278
2279static int __net_init sock_inuse_init_net(struct net *net)
2280{
2281        net->core.inuse = alloc_percpu(struct prot_inuse);
2282        return net->core.inuse ? 0 : -ENOMEM;
2283}
2284
2285static void __net_exit sock_inuse_exit_net(struct net *net)
2286{
2287        free_percpu(net->core.inuse);
2288}
2289
2290static struct pernet_operations net_inuse_ops = {
2291        .init = sock_inuse_init_net,
2292        .exit = sock_inuse_exit_net,
2293};
2294
2295static __init int net_inuse_init(void)
2296{
2297        if (register_pernet_subsys(&net_inuse_ops))
2298                panic("Cannot initialize net inuse counters");
2299
2300        return 0;
2301}
2302
2303core_initcall(net_inuse_init);
2304#else
2305static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2306
2307void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2308{
2309        __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2310}
2311EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2312
2313int sock_prot_inuse_get(struct net *net, struct proto *prot)
2314{
2315        int cpu, idx = prot->inuse_idx;
2316        int res = 0;
2317
2318        for_each_possible_cpu(cpu)
2319                res += per_cpu(prot_inuse, cpu).val[idx];
2320
2321        return res >= 0 ? res : 0;
2322}
2323EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2324#endif
2325
2326static void assign_proto_idx(struct proto *prot)
2327{
2328        prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2329
2330        if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2331                printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
2332                return;
2333        }
2334
2335        set_bit(prot->inuse_idx, proto_inuse_idx);
2336}
2337
2338static void release_proto_idx(struct proto *prot)
2339{
2340        if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2341                clear_bit(prot->inuse_idx, proto_inuse_idx);
2342}
2343#else
2344static inline void assign_proto_idx(struct proto *prot)
2345{
2346}
2347
2348static inline void release_proto_idx(struct proto *prot)
2349{
2350}
2351#endif
2352
2353int proto_register(struct proto *prot, int alloc_slab)
2354{
2355        if (alloc_slab) {
2356                prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2357                                        SLAB_HWCACHE_ALIGN | prot->slab_flags,
2358                                        NULL);
2359
2360                if (prot->slab == NULL) {
2361                        printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
2362                               prot->name);
2363                        goto out;
2364                }
2365
2366                if (prot->rsk_prot != NULL) {
2367                        prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2368                        if (prot->rsk_prot->slab_name == NULL)
2369                                goto out_free_sock_slab;
2370
2371                        prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2372                                                                 prot->rsk_prot->obj_size, 0,
2373                                                                 SLAB_HWCACHE_ALIGN, NULL);
2374
2375                        if (prot->rsk_prot->slab == NULL) {
2376                                printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
2377                                       prot->name);
2378                                goto out_free_request_sock_slab_name;
2379                        }
2380                }
2381
2382                if (prot->twsk_prot != NULL) {
2383                        prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2384
2385                        if (prot->twsk_prot->twsk_slab_name == NULL)
2386                                goto out_free_request_sock_slab;
2387
2388                        prot->twsk_prot->twsk_slab =
2389                                kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2390                                                  prot->twsk_prot->twsk_obj_size,
2391                                                  0,
2392                                                  SLAB_HWCACHE_ALIGN |
2393                                                        prot->slab_flags,
2394                                                  NULL);
2395                        if (prot->twsk_prot->twsk_slab == NULL)
2396                                goto out_free_timewait_sock_slab_name;
2397                }
2398        }
2399
2400        write_lock(&proto_list_lock);
2401        list_add(&prot->node, &proto_list);
2402        assign_proto_idx(prot);
2403        write_unlock(&proto_list_lock);
2404        return 0;
2405
2406out_free_timewait_sock_slab_name:
2407        kfree(prot->twsk_prot->twsk_slab_name);
2408out_free_request_sock_slab:
2409        if (prot->rsk_prot && prot->rsk_prot->slab) {
2410                kmem_cache_destroy(prot->rsk_prot->slab);
2411                prot->rsk_prot->slab = NULL;
2412        }
2413out_free_request_sock_slab_name:
2414        if (prot->rsk_prot)
2415                kfree(prot->rsk_prot->slab_name);
2416out_free_sock_slab:
2417        kmem_cache_destroy(prot->slab);
2418        prot->slab = NULL;
2419out:
2420        return -ENOBUFS;
2421}
2422EXPORT_SYMBOL(proto_register);
2423
2424void proto_unregister(struct proto *prot)
2425{
2426        write_lock(&proto_list_lock);
2427        release_proto_idx(prot);
2428        list_del(&prot->node);
2429        write_unlock(&proto_list_lock);
2430
2431        if (prot->slab != NULL) {
2432                kmem_cache_destroy(prot->slab);
2433                prot->slab = NULL;
2434        }
2435
2436        if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2437                kmem_cache_destroy(prot->rsk_prot->slab);
2438                kfree(prot->rsk_prot->slab_name);
2439                prot->rsk_prot->slab = NULL;
2440        }
2441
2442        if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2443                kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2444                kfree(prot->twsk_prot->twsk_slab_name);
2445                prot->twsk_prot->twsk_slab = NULL;
2446        }
2447}
2448EXPORT_SYMBOL(proto_unregister);
2449
2450#ifdef CONFIG_PROC_FS
2451static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2452        __acquires(proto_list_lock)
2453{
2454        read_lock(&proto_list_lock);
2455        return seq_list_start_head(&proto_list, *pos);
2456}
2457
2458static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2459{
2460        return seq_list_next(v, &proto_list, pos);
2461}
2462
2463static void proto_seq_stop(struct seq_file *seq, void *v)
2464        __releases(proto_list_lock)
2465{
2466        read_unlock(&proto_list_lock);
2467}
2468
2469static char proto_method_implemented(const void *method)
2470{
2471        return method == NULL ? 'n' : 'y';
2472}
2473
2474static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2475{
2476        seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
2477                        "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2478                   proto->name,
2479                   proto->obj_size,
2480                   sock_prot_inuse_get(seq_file_net(seq), proto),
2481                   proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
2482                   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2483                   proto->max_header,
2484                   proto->slab == NULL ? "no" : "yes",
2485                   module_name(proto->owner),
2486                   proto_method_implemented(proto->close),
2487                   proto_method_implemented(proto->connect),
2488                   proto_method_implemented(proto->disconnect),
2489                   proto_method_implemented(proto->accept),
2490                   proto_method_implemented(proto->ioctl),
2491                   proto_method_implemented(proto->init),
2492                   proto_method_implemented(proto->destroy),
2493                   proto_method_implemented(proto->shutdown),
2494                   proto_method_implemented(proto->setsockopt),
2495                   proto_method_implemented(proto->getsockopt),
2496                   proto_method_implemented(proto->sendmsg),
2497                   proto_method_implemented(proto->recvmsg),
2498                   proto_method_implemented(proto->sendpage),
2499                   proto_method_implemented(proto->bind),
2500                   proto_method_implemented(proto->backlog_rcv),
2501                   proto_method_implemented(proto->hash),
2502                   proto_method_implemented(proto->unhash),
2503                   proto_method_implemented(proto->get_port),
2504                   proto_method_implemented(proto->enter_memory_pressure));
2505}
2506
2507static int proto_seq_show(struct seq_file *seq, void *v)
2508{
2509        if (v == &proto_list)
2510                seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2511                           "protocol",
2512                           "size",
2513                           "sockets",
2514                           "memory",
2515                           "press",
2516                           "maxhdr",
2517                           "slab",
2518                           "module",
2519                           "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2520        else
2521                proto_seq_printf(seq, list_entry(v, struct proto, node));
2522        return 0;
2523}
2524
2525static const struct seq_operations proto_seq_ops = {
2526        .start  = proto_seq_start,
2527        .next   = proto_seq_next,
2528        .stop   = proto_seq_stop,
2529        .show   = proto_seq_show,
2530};
2531
2532static int proto_seq_open(struct inode *inode, struct file *file)
2533{
2534        return seq_open_net(inode, file, &proto_seq_ops,
2535                            sizeof(struct seq_net_private));
2536}
2537
2538static const struct file_operations proto_seq_fops = {
2539        .owner          = THIS_MODULE,
2540        .open           = proto_seq_open,
2541        .read           = seq_read,
2542        .llseek         = seq_lseek,
2543        .release        = seq_release_net,
2544};
2545
2546static __net_init int proto_init_net(struct net *net)
2547{
2548        if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
2549                return -ENOMEM;
2550
2551        return 0;
2552}
2553
2554static __net_exit void proto_exit_net(struct net *net)
2555{
2556        proc_net_remove(net, "protocols");
2557}
2558
2559
2560static __net_initdata struct pernet_operations proto_net_ops = {
2561        .init = proto_init_net,
2562        .exit = proto_exit_net,
2563};
2564
2565static int __init proto_init(void)
2566{
2567        return register_pernet_subsys(&proto_net_ops);
2568}
2569
2570subsys_initcall(proto_init);
2571
2572#endif /* PROC_FS */
2573
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.