linux/net/packet/af_packet.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              PACKET - implements raw packet sockets.
   7 *
   8 * Authors:     Ross Biro
   9 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  10 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  11 *
  12 * Fixes:
  13 *              Alan Cox        :       verify_area() now used correctly
  14 *              Alan Cox        :       new skbuff lists, look ma no backlogs!
  15 *              Alan Cox        :       tidied skbuff lists.
  16 *              Alan Cox        :       Now uses generic datagram routines I
  17 *                                      added. Also fixed the peek/read crash
  18 *                                      from all old Linux datagram code.
  19 *              Alan Cox        :       Uses the improved datagram code.
  20 *              Alan Cox        :       Added NULL's for socket options.
  21 *              Alan Cox        :       Re-commented the code.
  22 *              Alan Cox        :       Use new kernel side addressing
  23 *              Rob Janssen     :       Correct MTU usage.
  24 *              Dave Platt      :       Counter leaks caused by incorrect
  25 *                                      interrupt locking and some slightly
  26 *                                      dubious gcc output. Can you read
  27 *                                      compiler: it said _VOLATILE_
  28 *      Richard Kooijman        :       Timestamp fixes.
  29 *              Alan Cox        :       New buffers. Use sk->mac.raw.
  30 *              Alan Cox        :       sendmsg/recvmsg support.
  31 *              Alan Cox        :       Protocol setting support
  32 *      Alexey Kuznetsov        :       Untied from IPv4 stack.
  33 *      Cyrus Durgin            :       Fixed kerneld for kmod.
  34 *      Michal Ostrowski        :       Module initialization cleanup.
  35 *         Ulises Alonso        :       Frame number limit removal and
  36 *                                      packet_set_ring memory leak.
  37 *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
  38 *                                      The convention is that longer addresses
  39 *                                      will simply extend the hardware address
  40 *                                      byte arrays at the end of sockaddr_ll
  41 *                                      and packet_mreq.
  42 *
  43 *              This program is free software; you can redistribute it and/or
  44 *              modify it under the terms of the GNU General Public License
  45 *              as published by the Free Software Foundation; either version
  46 *              2 of the License, or (at your option) any later version.
  47 *
  48 */
  49
  50#include <linux/types.h>
  51#include <linux/mm.h>
  52#include <linux/capability.h>
  53#include <linux/fcntl.h>
  54#include <linux/socket.h>
  55#include <linux/in.h>
  56#include <linux/inet.h>
  57#include <linux/netdevice.h>
  58#include <linux/if_packet.h>
  59#include <linux/wireless.h>
  60#include <linux/kernel.h>
  61#include <linux/kmod.h>
  62#include <net/net_namespace.h>
  63#include <net/ip.h>
  64#include <net/protocol.h>
  65#include <linux/skbuff.h>
  66#include <net/sock.h>
  67#include <linux/errno.h>
  68#include <linux/timer.h>
  69#include <asm/system.h>
  70#include <asm/uaccess.h>
  71#include <asm/ioctls.h>
  72#include <asm/page.h>
  73#include <asm/cacheflush.h>
  74#include <asm/io.h>
  75#include <linux/proc_fs.h>
  76#include <linux/seq_file.h>
  77#include <linux/poll.h>
  78#include <linux/module.h>
  79#include <linux/init.h>
  80#include <linux/mutex.h>
  81
  82#ifdef CONFIG_INET
  83#include <net/inet_common.h>
  84#endif
  85
  86/*
  87   Assumptions:
  88   - if device has no dev->hard_header routine, it adds and removes ll header
  89     inside itself. In this case ll header is invisible outside of device,
  90     but higher levels still should reserve dev->hard_header_len.
  91     Some devices are enough clever to reallocate skb, when header
  92     will not fit to reserved space (tunnel), another ones are silly
  93     (PPP).
  94   - packet socket receives packets with pulled ll header,
  95     so that SOCK_RAW should push it back.
  96
  97On receive:
  98-----------
  99
 100Incoming, dev->hard_header!=NULL
 101   mac_header -> ll header
 102   data       -> data
 103
 104Outgoing, dev->hard_header!=NULL
 105   mac_header -> ll header
 106   data       -> ll header
 107
 108Incoming, dev->hard_header==NULL
 109   mac_header -> UNKNOWN position. It is very likely, that it points to ll
 110                 header.  PPP makes it, that is wrong, because introduce
 111                 assymetry between rx and tx paths.
 112   data       -> data
 113
 114Outgoing, dev->hard_header==NULL
 115   mac_header -> data. ll header is still not built!
 116   data       -> data
 117
 118Resume
 119  If dev->hard_header==NULL we are unlikely to restore sensible ll header.
 120
 121
 122On transmit:
 123------------
 124
 125dev->hard_header != NULL
 126   mac_header -> ll header
 127   data       -> ll header
 128
 129dev->hard_header == NULL (ll header is added by device, we cannot control it)
 130   mac_header -> data
 131   data       -> data
 132
 133   We should set nh.raw on output to correct posistion,
 134   packet classifier depends on it.
 135 */
 136
 137/* Private packet socket structures. */
 138
 139struct packet_mclist
 140{
 141        struct packet_mclist    *next;
 142        int                     ifindex;
 143        int                     count;
 144        unsigned short          type;
 145        unsigned short          alen;
 146        unsigned char           addr[MAX_ADDR_LEN];
 147};
 148/* identical to struct packet_mreq except it has
 149 * a longer address field.
 150 */
 151struct packet_mreq_max
 152{
 153        int             mr_ifindex;
 154        unsigned short  mr_type;
 155        unsigned short  mr_alen;
 156        unsigned char   mr_address[MAX_ADDR_LEN];
 157};
 158
 159#ifdef CONFIG_PACKET_MMAP
 160static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
 161#endif
 162
 163static void packet_flush_mclist(struct sock *sk);
 164
 165struct packet_sock {
 166        /* struct sock has to be the first member of packet_sock */
 167        struct sock             sk;
 168        struct tpacket_stats    stats;
 169#ifdef CONFIG_PACKET_MMAP
 170        char *                  *pg_vec;
 171        unsigned int            head;
 172        unsigned int            frames_per_block;
 173        unsigned int            frame_size;
 174        unsigned int            frame_max;
 175        int                     copy_thresh;
 176#endif
 177        struct packet_type      prot_hook;
 178        spinlock_t              bind_lock;
 179        struct mutex            pg_vec_lock;
 180        unsigned int            running:1,      /* prot_hook is attached*/
 181                                auxdata:1,
 182                                origdev:1;
 183        int                     ifindex;        /* bound device         */
 184        __be16                  num;
 185        struct packet_mclist    *mclist;
 186#ifdef CONFIG_PACKET_MMAP
 187        atomic_t                mapped;
 188        unsigned int            pg_vec_order;
 189        unsigned int            pg_vec_pages;
 190        unsigned int            pg_vec_len;
 191        enum tpacket_versions   tp_version;
 192        unsigned int            tp_hdrlen;
 193        unsigned int            tp_reserve;
 194#endif
 195};
 196
 197struct packet_skb_cb {
 198        unsigned int origlen;
 199        union {
 200                struct sockaddr_pkt pkt;
 201                struct sockaddr_ll ll;
 202        } sa;
 203};
 204
 205#define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
 206
 207#ifdef CONFIG_PACKET_MMAP
 208
 209static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
 210                                 int status)
 211{
 212        unsigned int pg_vec_pos, frame_offset;
 213        union {
 214                struct tpacket_hdr *h1;
 215                struct tpacket2_hdr *h2;
 216                void *raw;
 217        } h;
 218
 219        pg_vec_pos = position / po->frames_per_block;
 220        frame_offset = position % po->frames_per_block;
 221
 222        h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
 223        switch (po->tp_version) {
 224        case TPACKET_V1:
 225                if (status != (h.h1->tp_status ? TP_STATUS_USER :
 226                                                TP_STATUS_KERNEL))
 227                        return NULL;
 228                break;
 229        case TPACKET_V2:
 230                if (status != (h.h2->tp_status ? TP_STATUS_USER :
 231                                                TP_STATUS_KERNEL))
 232                        return NULL;
 233                break;
 234        }
 235        return h.raw;
 236}
 237
 238static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 239{
 240        union {
 241                struct tpacket_hdr *h1;
 242                struct tpacket2_hdr *h2;
 243                void *raw;
 244        } h;
 245
 246        h.raw = frame;
 247        switch (po->tp_version) {
 248        case TPACKET_V1:
 249                h.h1->tp_status = status;
 250                break;
 251        case TPACKET_V2:
 252                h.h2->tp_status = status;
 253                break;
 254        }
 255}
 256#endif
 257
 258static inline struct packet_sock *pkt_sk(struct sock *sk)
 259{
 260        return (struct packet_sock *)sk;
 261}
 262
 263static void packet_sock_destruct(struct sock *sk)
 264{
 265        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 266        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 267
 268        if (!sock_flag(sk, SOCK_DEAD)) {
 269                printk("Attempt to release alive packet socket: %p\n", sk);
 270                return;
 271        }
 272
 273        sk_refcnt_debug_dec(sk);
 274}
 275
 276
 277static const struct proto_ops packet_ops;
 278
 279static const struct proto_ops packet_ops_spkt;
 280
 281static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
 282{
 283        struct sock *sk;
 284        struct sockaddr_pkt *spkt;
 285
 286        /*
 287         *      When we registered the protocol we saved the socket in the data
 288         *      field for just this event.
 289         */
 290
 291        sk = pt->af_packet_priv;
 292
 293        /*
 294         *      Yank back the headers [hope the device set this
 295         *      right or kerboom...]
 296         *
 297         *      Incoming packets have ll header pulled,
 298         *      push it back.
 299         *
 300         *      For outgoing ones skb->data == skb_mac_header(skb)
 301         *      so that this procedure is noop.
 302         */
 303
 304        if (skb->pkt_type == PACKET_LOOPBACK)
 305                goto out;
 306
 307        if (dev_net(dev) != sock_net(sk))
 308                goto out;
 309
 310        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
 311                goto oom;
 312
 313        /* drop any routing info */
 314        dst_release(skb->dst);
 315        skb->dst = NULL;
 316
 317        /* drop conntrack reference */
 318        nf_reset(skb);
 319
 320        spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 321
 322        skb_push(skb, skb->data - skb_mac_header(skb));
 323
 324        /*
 325         *      The SOCK_PACKET socket receives _all_ frames.
 326         */
 327
 328        spkt->spkt_family = dev->type;
 329        strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
 330        spkt->spkt_protocol = skb->protocol;
 331
 332        /*
 333         *      Charge the memory to the socket. This is done specifically
 334         *      to prevent sockets using all the memory up.
 335         */
 336
 337        if (sock_queue_rcv_skb(sk,skb) == 0)
 338                return 0;
 339
 340out:
 341        kfree_skb(skb);
 342oom:
 343        return 0;
 344}
 345
 346
 347/*
 348 *      Output a raw packet to a device layer. This bypasses all the other
 349 *      protocol layers and you must therefore supply it with a complete frame
 350 */
 351
 352static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 353                               struct msghdr *msg, size_t len)
 354{
 355        struct sock *sk = sock->sk;
 356        struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
 357        struct sk_buff *skb;
 358        struct net_device *dev;
 359        __be16 proto=0;
 360        int err;
 361
 362        /*
 363         *      Get and verify the address.
 364         */
 365
 366        if (saddr)
 367        {
 368                if (msg->msg_namelen < sizeof(struct sockaddr))
 369                        return(-EINVAL);
 370                if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
 371                        proto=saddr->spkt_protocol;
 372        }
 373        else
 374                return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
 375
 376        /*
 377         *      Find the device first to size check it
 378         */
 379
 380        saddr->spkt_device[13] = 0;
 381        dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
 382        err = -ENODEV;
 383        if (dev == NULL)
 384                goto out_unlock;
 385
 386        err = -ENETDOWN;
 387        if (!(dev->flags & IFF_UP))
 388                goto out_unlock;
 389
 390        /*
 391         *      You may not queue a frame bigger than the mtu. This is the lowest level
 392         *      raw protocol and you must do your own fragmentation at this level.
 393         */
 394
 395        err = -EMSGSIZE;
 396        if (len > dev->mtu + dev->hard_header_len)
 397                goto out_unlock;
 398
 399        err = -ENOBUFS;
 400        skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
 401
 402        /*
 403         *      If the write buffer is full, then tough. At this level the user gets to
 404         *      deal with the problem - do your own algorithmic backoffs. That's far
 405         *      more flexible.
 406         */
 407
 408        if (skb == NULL)
 409                goto out_unlock;
 410
 411        /*
 412         *      Fill it in
 413         */
 414
 415        /* FIXME: Save some space for broken drivers that write a
 416         * hard header at transmission time by themselves. PPP is the
 417         * notable one here. This should really be fixed at the driver level.
 418         */
 419        skb_reserve(skb, LL_RESERVED_SPACE(dev));
 420        skb_reset_network_header(skb);
 421
 422        /* Try to align data part correctly */
 423        if (dev->header_ops) {
 424                skb->data -= dev->hard_header_len;
 425                skb->tail -= dev->hard_header_len;
 426                if (len < dev->hard_header_len)
 427                        skb_reset_network_header(skb);
 428        }
 429
 430        /* Returns -EFAULT on error */
 431        err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
 432        skb->protocol = proto;
 433        skb->dev = dev;
 434        skb->priority = sk->sk_priority;
 435        if (err)
 436                goto out_free;
 437
 438        /*
 439         *      Now send it
 440         */
 441
 442        dev_queue_xmit(skb);
 443        dev_put(dev);
 444        return(len);
 445
 446out_free:
 447        kfree_skb(skb);
 448out_unlock:
 449        if (dev)
 450                dev_put(dev);
 451        return err;
 452}
 453
 454static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 455                                      unsigned int res)
 456{
 457        struct sk_filter *filter;
 458
 459        rcu_read_lock_bh();
 460        filter = rcu_dereference(sk->sk_filter);
 461        if (filter != NULL)
 462                res = sk_run_filter(skb, filter->insns, filter->len);
 463        rcu_read_unlock_bh();
 464
 465        return res;
 466}
 467
 468/*
 469   This function makes lazy skb cloning in hope that most of packets
 470   are discarded by BPF.
 471
 472   Note tricky part: we DO mangle shared skb! skb->data, skb->len
 473   and skb->cb are mangled. It works because (and until) packets
 474   falling here are owned by current CPU. Output packets are cloned
 475   by dev_queue_xmit_nit(), input packets are processed by net_bh
 476   sequencially, so that if we return skb to original state on exit,
 477   we will not harm anyone.
 478 */
 479
 480static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 481{
 482        struct sock *sk;
 483        struct sockaddr_ll *sll;
 484        struct packet_sock *po;
 485        u8 * skb_head = skb->data;
 486        int skb_len = skb->len;
 487        unsigned int snaplen, res;
 488
 489        if (skb->pkt_type == PACKET_LOOPBACK)
 490                goto drop;
 491
 492        sk = pt->af_packet_priv;
 493        po = pkt_sk(sk);
 494
 495        if (dev_net(dev) != sock_net(sk))
 496                goto drop;
 497
 498        skb->dev = dev;
 499
 500        if (dev->header_ops) {
 501                /* The device has an explicit notion of ll header,
 502                   exported to higher levels.
 503
 504                   Otherwise, the device hides datails of it frame
 505                   structure, so that corresponding packet head
 506                   never delivered to user.
 507                 */
 508                if (sk->sk_type != SOCK_DGRAM)
 509                        skb_push(skb, skb->data - skb_mac_header(skb));
 510                else if (skb->pkt_type == PACKET_OUTGOING) {
 511                        /* Special case: outgoing packets have ll header at head */
 512                        skb_pull(skb, skb_network_offset(skb));
 513                }
 514        }
 515
 516        snaplen = skb->len;
 517
 518        res = run_filter(skb, sk, snaplen);
 519        if (!res)
 520                goto drop_n_restore;
 521        if (snaplen > res)
 522                snaplen = res;
 523
 524        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 525            (unsigned)sk->sk_rcvbuf)
 526                goto drop_n_acct;
 527
 528        if (skb_shared(skb)) {
 529                struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 530                if (nskb == NULL)
 531                        goto drop_n_acct;
 532
 533                if (skb_head != skb->data) {
 534                        skb->data = skb_head;
 535                        skb->len = skb_len;
 536                }
 537                kfree_skb(skb);
 538                skb = nskb;
 539        }
 540
 541        BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
 542                     sizeof(skb->cb));
 543
 544        sll = &PACKET_SKB_CB(skb)->sa.ll;
 545        sll->sll_family = AF_PACKET;
 546        sll->sll_hatype = dev->type;
 547        sll->sll_protocol = skb->protocol;
 548        sll->sll_pkttype = skb->pkt_type;
 549        if (unlikely(po->origdev))
 550                sll->sll_ifindex = orig_dev->ifindex;
 551        else
 552                sll->sll_ifindex = dev->ifindex;
 553
 554        sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
 555
 556        PACKET_SKB_CB(skb)->origlen = skb->len;
 557
 558        if (pskb_trim(skb, snaplen))
 559                goto drop_n_acct;
 560
 561        skb_set_owner_r(skb, sk);
 562        skb->dev = NULL;
 563        dst_release(skb->dst);
 564        skb->dst = NULL;
 565
 566        /* drop conntrack reference */
 567        nf_reset(skb);
 568
 569        spin_lock(&sk->sk_receive_queue.lock);
 570        po->stats.tp_packets++;
 571        __skb_queue_tail(&sk->sk_receive_queue, skb);
 572        spin_unlock(&sk->sk_receive_queue.lock);
 573        sk->sk_data_ready(sk, skb->len);
 574        return 0;
 575
 576drop_n_acct:
 577        spin_lock(&sk->sk_receive_queue.lock);
 578        po->stats.tp_drops++;
 579        spin_unlock(&sk->sk_receive_queue.lock);
 580
 581drop_n_restore:
 582        if (skb_head != skb->data && skb_shared(skb)) {
 583                skb->data = skb_head;
 584                skb->len = skb_len;
 585        }
 586drop:
 587        kfree_skb(skb);
 588        return 0;
 589}
 590
 591#ifdef CONFIG_PACKET_MMAP
 592static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 593{
 594        struct sock *sk;
 595        struct packet_sock *po;
 596        struct sockaddr_ll *sll;
 597        union {
 598                struct tpacket_hdr *h1;
 599                struct tpacket2_hdr *h2;
 600                void *raw;
 601        } h;
 602        u8 * skb_head = skb->data;
 603        int skb_len = skb->len;
 604        unsigned int snaplen, res;
 605        unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
 606        unsigned short macoff, netoff, hdrlen;
 607        struct sk_buff *copy_skb = NULL;
 608        struct timeval tv;
 609        struct timespec ts;
 610
 611        if (skb->pkt_type == PACKET_LOOPBACK)
 612                goto drop;
 613
 614        sk = pt->af_packet_priv;
 615        po = pkt_sk(sk);
 616
 617        if (dev_net(dev) != sock_net(sk))
 618                goto drop;
 619
 620        if (dev->header_ops) {
 621                if (sk->sk_type != SOCK_DGRAM)
 622                        skb_push(skb, skb->data - skb_mac_header(skb));
 623                else if (skb->pkt_type == PACKET_OUTGOING) {
 624                        /* Special case: outgoing packets have ll header at head */
 625                        skb_pull(skb, skb_network_offset(skb));
 626                }
 627        }
 628
 629        if (skb->ip_summed == CHECKSUM_PARTIAL)
 630                status |= TP_STATUS_CSUMNOTREADY;
 631
 632        snaplen = skb->len;
 633
 634        res = run_filter(skb, sk, snaplen);
 635        if (!res)
 636                goto drop_n_restore;
 637        if (snaplen > res)
 638                snaplen = res;
 639
 640        if (sk->sk_type == SOCK_DGRAM) {
 641                macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
 642                                  po->tp_reserve;
 643        } else {
 644                unsigned maclen = skb_network_offset(skb);
 645                netoff = TPACKET_ALIGN(po->tp_hdrlen +
 646                                       (maclen < 16 ? 16 : maclen)) +
 647                        po->tp_reserve;
 648                macoff = netoff - maclen;
 649        }
 650
 651        if (macoff + snaplen > po->frame_size) {
 652                if (po->copy_thresh &&
 653                    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
 654                    (unsigned)sk->sk_rcvbuf) {
 655                        if (skb_shared(skb)) {
 656                                copy_skb = skb_clone(skb, GFP_ATOMIC);
 657                        } else {
 658                                copy_skb = skb_get(skb);
 659                                skb_head = skb->data;
 660                        }
 661                        if (copy_skb)
 662                                skb_set_owner_r(copy_skb, sk);
 663                }
 664                snaplen = po->frame_size - macoff;
 665                if ((int)snaplen < 0)
 666                        snaplen = 0;
 667        }
 668
 669        spin_lock(&sk->sk_receive_queue.lock);
 670        h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
 671        if (!h.raw)
 672                goto ring_is_full;
 673        po->head = po->head != po->frame_max ? po->head+1 : 0;
 674        po->stats.tp_packets++;
 675        if (copy_skb) {
 676                status |= TP_STATUS_COPY;
 677                __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
 678        }
 679        if (!po->stats.tp_drops)
 680                status &= ~TP_STATUS_LOSING;
 681        spin_unlock(&sk->sk_receive_queue.lock);
 682
 683        skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 684
 685        switch (po->tp_version) {
 686        case TPACKET_V1:
 687                h.h1->tp_len = skb->len;
 688                h.h1->tp_snaplen = snaplen;
 689                h.h1->tp_mac = macoff;
 690                h.h1->tp_net = netoff;
 691                if (skb->tstamp.tv64)
 692                        tv = ktime_to_timeval(skb->tstamp);
 693                else
 694                        do_gettimeofday(&tv);
 695                h.h1->tp_sec = tv.tv_sec;
 696                h.h1->tp_usec = tv.tv_usec;
 697                hdrlen = sizeof(*h.h1);
 698                break;
 699        case TPACKET_V2:
 700                h.h2->tp_len = skb->len;
 701                h.h2->tp_snaplen = snaplen;
 702                h.h2->tp_mac = macoff;
 703                h.h2->tp_net = netoff;
 704                if (skb->tstamp.tv64)
 705                        ts = ktime_to_timespec(skb->tstamp);
 706                else
 707                        getnstimeofday(&ts);
 708                h.h2->tp_sec = ts.tv_sec;
 709                h.h2->tp_nsec = ts.tv_nsec;
 710                h.h2->tp_vlan_tci = skb->vlan_tci;
 711                h.h2->tp_padding = 0;
 712                hdrlen = sizeof(*h.h2);
 713                break;
 714        default:
 715                BUG();
 716        }
 717
 718        sll = h.raw + TPACKET_ALIGN(hdrlen);
 719        sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
 720        sll->sll_family = AF_PACKET;
 721        sll->sll_hatype = dev->type;
 722        sll->sll_protocol = skb->protocol;
 723        sll->sll_pkttype = skb->pkt_type;
 724        if (unlikely(po->origdev))
 725                sll->sll_ifindex = orig_dev->ifindex;
 726        else
 727                sll->sll_ifindex = dev->ifindex;
 728
 729        __packet_set_status(po, h.raw, status);
 730        smp_mb();
 731
 732        {
 733                struct page *p_start, *p_end;
 734                u8 *h_end = h.raw + macoff + snaplen - 1;
 735
 736                p_start = virt_to_page(h.raw);
 737                p_end = virt_to_page(h_end);
 738                while (p_start <= p_end) {
 739                        flush_dcache_page(p_start);
 740                        p_start++;
 741                }
 742        }
 743
 744        sk->sk_data_ready(sk, 0);
 745
 746drop_n_restore:
 747        if (skb_head != skb->data && skb_shared(skb)) {
 748                skb->data = skb_head;
 749                skb->len = skb_len;
 750        }
 751drop:
 752        kfree_skb(skb);
 753        return 0;
 754
 755ring_is_full:
 756        po->stats.tp_drops++;
 757        spin_unlock(&sk->sk_receive_queue.lock);
 758
 759        sk->sk_data_ready(sk, 0);
 760        if (copy_skb)
 761                kfree_skb(copy_skb);
 762        goto drop_n_restore;
 763}
 764
 765#endif
 766
 767
 768static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 769                          struct msghdr *msg, size_t len)
 770{
 771        struct sock *sk = sock->sk;
 772        struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
 773        struct sk_buff *skb;
 774        struct net_device *dev;
 775        __be16 proto;
 776        unsigned char *addr;
 777        int ifindex, err, reserve = 0;
 778
 779        /*
 780         *      Get and verify the address.
 781         */
 782
 783        if (saddr == NULL) {
 784                struct packet_sock *po = pkt_sk(sk);
 785
 786                ifindex = po->ifindex;
 787                proto   = po->num;
 788                addr    = NULL;
 789        } else {
 790                err = -EINVAL;
 791                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
 792                        goto out;
 793                if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
 794                        goto out;
 795                ifindex = saddr->sll_ifindex;
 796                proto   = saddr->sll_protocol;
 797                addr    = saddr->sll_addr;
 798        }
 799
 800
 801        dev = dev_get_by_index(sock_net(sk), ifindex);
 802        err = -ENXIO;
 803        if (dev == NULL)
 804                goto out_unlock;
 805        if (sock->type == SOCK_RAW)
 806                reserve = dev->hard_header_len;
 807
 808        err = -ENETDOWN;
 809        if (!(dev->flags & IFF_UP))
 810                goto out_unlock;
 811
 812        err = -EMSGSIZE;
 813        if (len > dev->mtu+reserve)
 814                goto out_unlock;
 815
 816        skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
 817                                msg->msg_flags & MSG_DONTWAIT, &err);
 818        if (skb==NULL)
 819                goto out_unlock;
 820
 821        skb_reserve(skb, LL_RESERVED_SPACE(dev));
 822        skb_reset_network_header(skb);
 823
 824        err = -EINVAL;
 825        if (sock->type == SOCK_DGRAM &&
 826            dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
 827                goto out_free;
 828
 829        /* Returns -EFAULT on error */
 830        err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
 831        if (err)
 832                goto out_free;
 833
 834        skb->protocol = proto;
 835        skb->dev = dev;
 836        skb->priority = sk->sk_priority;
 837
 838        /*
 839         *      Now send it
 840         */
 841
 842        err = dev_queue_xmit(skb);
 843        if (err > 0 && (err = net_xmit_errno(err)) != 0)
 844                goto out_unlock;
 845
 846        dev_put(dev);
 847
 848        return(len);
 849
 850out_free:
 851        kfree_skb(skb);
 852out_unlock:
 853        if (dev)
 854                dev_put(dev);
 855out:
 856        return err;
 857}
 858
 859/*
 860 *      Close a PACKET socket. This is fairly simple. We immediately go
 861 *      to 'closed' state and remove our protocol entry in the device list.
 862 */
 863
 864static int packet_release(struct socket *sock)
 865{
 866        struct sock *sk = sock->sk;
 867        struct packet_sock *po;
 868        struct net *net;
 869
 870        if (!sk)
 871                return 0;
 872
 873        net = sock_net(sk);
 874        po = pkt_sk(sk);
 875
 876        write_lock_bh(&net->packet.sklist_lock);
 877        sk_del_node_init(sk);
 878        write_unlock_bh(&net->packet.sklist_lock);
 879
 880        /*
 881         *      Unhook packet receive handler.
 882         */
 883
 884        if (po->running) {
 885                /*
 886                 *      Remove the protocol hook
 887                 */
 888                dev_remove_pack(&po->prot_hook);
 889                po->running = 0;
 890                po->num = 0;
 891                __sock_put(sk);
 892        }
 893
 894        packet_flush_mclist(sk);
 895
 896#ifdef CONFIG_PACKET_MMAP
 897        if (po->pg_vec) {
 898                struct tpacket_req req;
 899                memset(&req, 0, sizeof(req));
 900                packet_set_ring(sk, &req, 1);
 901        }
 902#endif
 903
 904        /*
 905         *      Now the socket is dead. No more input will appear.
 906         */
 907
 908        sock_orphan(sk);
 909        sock->sk = NULL;
 910
 911        /* Purge queues */
 912
 913        skb_queue_purge(&sk->sk_receive_queue);
 914        sk_refcnt_debug_release(sk);
 915
 916        sock_put(sk);
 917        return 0;
 918}
 919
 920/*
 921 *      Attach a packet hook.
 922 */
 923
 924static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
 925{
 926        struct packet_sock *po = pkt_sk(sk);
 927        /*
 928         *      Detach an existing hook if present.
 929         */
 930
 931        lock_sock(sk);
 932
 933        spin_lock(&po->bind_lock);
 934        if (po->running) {
 935                __sock_put(sk);
 936                po->running = 0;
 937                po->num = 0;
 938                spin_unlock(&po->bind_lock);
 939                dev_remove_pack(&po->prot_hook);
 940                spin_lock(&po->bind_lock);
 941        }
 942
 943        po->num = protocol;
 944        po->prot_hook.type = protocol;
 945        po->prot_hook.dev = dev;
 946
 947        po->ifindex = dev ? dev->ifindex : 0;
 948
 949        if (protocol == 0)
 950                goto out_unlock;
 951
 952        if (!dev || (dev->flags & IFF_UP)) {
 953                dev_add_pack(&po->prot_hook);
 954                sock_hold(sk);
 955                po->running = 1;
 956        } else {
 957                sk->sk_err = ENETDOWN;
 958                if (!sock_flag(sk, SOCK_DEAD))
 959                        sk->sk_error_report(sk);
 960        }
 961
 962out_unlock:
 963        spin_unlock(&po->bind_lock);
 964        release_sock(sk);
 965        return 0;
 966}
 967
 968/*
 969 *      Bind a packet socket to a device
 970 */
 971
 972static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 973{
 974        struct sock *sk=sock->sk;
 975        char name[15];
 976        struct net_device *dev;
 977        int err = -ENODEV;
 978
 979        /*
 980         *      Check legality
 981         */
 982
 983        if (addr_len != sizeof(struct sockaddr))
 984                return -EINVAL;
 985        strlcpy(name,uaddr->sa_data,sizeof(name));
 986
 987        dev = dev_get_by_name(sock_net(sk), name);
 988        if (dev) {
 989                err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
 990                dev_put(dev);
 991        }
 992        return err;
 993}
 994
 995static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 996{
 997        struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
 998        struct sock *sk=sock->sk;
 999        struct net_device *dev = NULL;
1000        int err;
1001
1002
1003        /*
1004         *      Check legality
1005         */
1006
1007        if (addr_len < sizeof(struct sockaddr_ll))
1008                return -EINVAL;
1009        if (sll->sll_family != AF_PACKET)
1010                return -EINVAL;
1011
1012        if (sll->sll_ifindex) {
1013                err = -ENODEV;
1014                dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1015                if (dev == NULL)
1016                        goto out;
1017        }
1018        err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1019        if (dev)
1020                dev_put(dev);
1021
1022out:
1023        return err;
1024}
1025
1026static struct proto packet_proto = {
1027        .name     = "PACKET",
1028        .owner    = THIS_MODULE,
1029        .obj_size = sizeof(struct packet_sock),
1030};
1031
1032/*
1033 *      Create a packet of type SOCK_PACKET.
1034 */
1035
1036static int packet_create(struct net *net, struct socket *sock, int protocol)
1037{
1038        struct sock *sk;
1039        struct packet_sock *po;
1040        __be16 proto = (__force __be16)protocol; /* weird, but documented */
1041        int err;
1042
1043        if (!capable(CAP_NET_RAW))
1044                return -EPERM;
1045        if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1046            sock->type != SOCK_PACKET)
1047                return -ESOCKTNOSUPPORT;
1048
1049        sock->state = SS_UNCONNECTED;
1050
1051        err = -ENOBUFS;
1052        sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1053        if (sk == NULL)
1054                goto out;
1055
1056        sock->ops = &packet_ops;
1057        if (sock->type == SOCK_PACKET)
1058                sock->ops = &packet_ops_spkt;
1059
1060        sock_init_data(sock, sk);
1061
1062        po = pkt_sk(sk);
1063        sk->sk_family = PF_PACKET;
1064        po->num = proto;
1065
1066        sk->sk_destruct = packet_sock_destruct;
1067        sk_refcnt_debug_inc(sk);
1068
1069        /*
1070         *      Attach a protocol block
1071         */
1072
1073        spin_lock_init(&po->bind_lock);
1074        mutex_init(&po->pg_vec_lock);
1075        po->prot_hook.func = packet_rcv;
1076
1077        if (sock->type == SOCK_PACKET)
1078                po->prot_hook.func = packet_rcv_spkt;
1079
1080        po->prot_hook.af_packet_priv = sk;
1081
1082        if (proto) {
1083                po->prot_hook.type = proto;
1084                dev_add_pack(&po->prot_hook);
1085                sock_hold(sk);
1086                po->running = 1;
1087        }
1088
1089        write_lock_bh(&net->packet.sklist_lock);
1090        sk_add_node(sk, &net->packet.sklist);
1091        write_unlock_bh(&net->packet.sklist_lock);
1092        return(0);
1093out:
1094        return err;
1095}
1096
1097/*
1098 *      Pull a packet from our receive queue and hand it to the user.
1099 *      If necessary we block.
1100 */
1101
1102static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1103                          struct msghdr *msg, size_t len, int flags)
1104{
1105        struct sock *sk = sock->sk;
1106        struct sk_buff *skb;
1107        int copied, err;
1108        struct sockaddr_ll *sll;
1109
1110        err = -EINVAL;
1111        if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1112                goto out;
1113
1114#if 0
1115        /* What error should we return now? EUNATTACH? */
1116        if (pkt_sk(sk)->ifindex < 0)
1117                return -ENODEV;
1118#endif
1119
1120        /*
1121         *      Call the generic datagram receiver. This handles all sorts
1122         *      of horrible races and re-entrancy so we can forget about it
1123         *      in the protocol layers.
1124         *
1125         *      Now it will return ENETDOWN, if device have just gone down,
1126         *      but then it will block.
1127         */
1128
1129        skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1130
1131        /*
1132         *      An error occurred so return it. Because skb_recv_datagram()
1133         *      handles the blocking we don't see and worry about blocking
1134         *      retries.
1135         */
1136
1137        if (skb == NULL)
1138                goto out;
1139
1140        /*
1141         *      If the address length field is there to be filled in, we fill
1142         *      it in now.
1143         */
1144
1145        sll = &PACKET_SKB_CB(skb)->sa.ll;
1146        if (sock->type == SOCK_PACKET)
1147                msg->msg_namelen = sizeof(struct sockaddr_pkt);
1148        else
1149                msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1150
1151        /*
1152         *      You lose any data beyond the buffer you gave. If it worries a
1153         *      user program they can ask the device for its MTU anyway.
1154         */
1155
1156        copied = skb->len;
1157        if (copied > len)
1158        {
1159                copied=len;
1160                msg->msg_flags|=MSG_TRUNC;
1161        }
1162
1163        err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1164        if (err)
1165                goto out_free;
1166
1167        sock_recv_timestamp(msg, sk, skb);
1168
1169        if (msg->msg_name)
1170                memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1171                       msg->msg_namelen);
1172
1173        if (pkt_sk(sk)->auxdata) {
1174                struct tpacket_auxdata aux;
1175
1176                aux.tp_status = TP_STATUS_USER;
1177                if (skb->ip_summed == CHECKSUM_PARTIAL)
1178                        aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1179                aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1180                aux.tp_snaplen = skb->len;
1181                aux.tp_mac = 0;
1182                aux.tp_net = skb_network_offset(skb);
1183                aux.tp_vlan_tci = skb->vlan_tci;
1184
1185                aux.tp_padding = 0;
1186                put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1187        }
1188
1189        /*
1190         *      Free or return the buffer as appropriate. Again this
1191         *      hides all the races and re-entrancy issues from us.
1192         */
1193        err = (flags&MSG_TRUNC) ? skb->len : copied;
1194
1195out_free:
1196        skb_free_datagram(sk, skb);
1197out:
1198        return err;
1199}
1200
1201static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1202                               int *uaddr_len, int peer)
1203{
1204        struct net_device *dev;
1205        struct sock *sk = sock->sk;
1206
1207        if (peer)
1208                return -EOPNOTSUPP;
1209
1210        uaddr->sa_family = AF_PACKET;
1211        dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
1212        if (dev) {
1213                strncpy(uaddr->sa_data, dev->name, 14);
1214                dev_put(dev);
1215        } else
1216                memset(uaddr->sa_data, 0, 14);
1217        *uaddr_len = sizeof(*uaddr);
1218
1219        return 0;
1220}
1221
1222static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1223                          int *uaddr_len, int peer)
1224{
1225        struct net_device *dev;
1226        struct sock *sk = sock->sk;
1227        struct packet_sock *po = pkt_sk(sk);
1228        struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1229
1230        if (peer)
1231                return -EOPNOTSUPP;
1232
1233        sll->sll_family = AF_PACKET;
1234        sll->sll_ifindex = po->ifindex;
1235        sll->sll_protocol = po->num;
1236        sll->sll_pkttype = 0;
1237        dev = dev_get_by_index(sock_net(sk), po->ifindex);
1238        if (dev) {
1239                sll->sll_hatype = dev->type;
1240                sll->sll_halen = dev->addr_len;
1241                memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1242                dev_put(dev);
1243        } else {
1244                sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1245                sll->sll_halen = 0;
1246        }
1247        *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1248
1249        return 0;
1250}
1251
1252static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1253                         int what)
1254{
1255        switch (i->type) {
1256        case PACKET_MR_MULTICAST:
1257                if (what > 0)
1258                        dev_mc_add(dev, i->addr, i->alen, 0);
1259                else
1260                        dev_mc_delete(dev, i->addr, i->alen, 0);
1261                break;
1262        case PACKET_MR_PROMISC:
1263                return dev_set_promiscuity(dev, what);
1264                break;
1265        case PACKET_MR_ALLMULTI:
1266                return dev_set_allmulti(dev, what);
1267                break;
1268        default:;
1269        }
1270        return 0;
1271}
1272
1273static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1274{
1275        for ( ; i; i=i->next) {
1276                if (i->ifindex == dev->ifindex)
1277                        packet_dev_mc(dev, i, what);
1278        }
1279}
1280
1281static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1282{
1283        struct packet_sock *po = pkt_sk(sk);
1284        struct packet_mclist *ml, *i;
1285        struct net_device *dev;
1286        int err;
1287
1288        rtnl_lock();
1289
1290        err = -ENODEV;
1291        dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1292        if (!dev)
1293                goto done;
1294
1295        err = -EINVAL;
1296        if (mreq->mr_alen > dev->addr_len)
1297                goto done;
1298
1299        err = -ENOBUFS;
1300        i = kmalloc(sizeof(*i), GFP_KERNEL);
1301        if (i == NULL)
1302                goto done;
1303
1304        err = 0;
1305        for (ml = po->mclist; ml; ml = ml->next) {
1306                if (ml->ifindex == mreq->mr_ifindex &&
1307                    ml->type == mreq->mr_type &&
1308                    ml->alen == mreq->mr_alen &&
1309                    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1310                        ml->count++;
1311                        /* Free the new element ... */
1312                        kfree(i);
1313                        goto done;
1314                }
1315        }
1316
1317        i->type = mreq->mr_type;
1318        i->ifindex = mreq->mr_ifindex;
1319        i->alen = mreq->mr_alen;
1320        memcpy(i->addr, mreq->mr_address, i->alen);
1321        i->count = 1;
1322        i->next = po->mclist;
1323        po->mclist = i;
1324        err = packet_dev_mc(dev, i, 1);
1325        if (err) {
1326                po->mclist = i->next;
1327                kfree(i);
1328        }
1329
1330done:
1331        rtnl_unlock();
1332        return err;
1333}
1334
1335static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1336{
1337        struct packet_mclist *ml, **mlp;
1338
1339        rtnl_lock();
1340
1341        for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1342                if (ml->ifindex == mreq->mr_ifindex &&
1343                    ml->type == mreq->mr_type &&
1344                    ml->alen == mreq->mr_alen &&
1345                    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1346                        if (--ml->count == 0) {
1347                                struct net_device *dev;
1348                                *mlp = ml->next;
1349                                dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1350                                if (dev) {
1351                                        packet_dev_mc(dev, ml, -1);
1352                                        dev_put(dev);
1353                                }
1354                                kfree(ml);
1355                        }
1356                        rtnl_unlock();
1357                        return 0;
1358                }
1359        }
1360        rtnl_unlock();
1361        return -EADDRNOTAVAIL;
1362}
1363
1364static void packet_flush_mclist(struct sock *sk)
1365{
1366        struct packet_sock *po = pkt_sk(sk);
1367        struct packet_mclist *ml;
1368
1369        if (!po->mclist)
1370                return;
1371
1372        rtnl_lock();
1373        while ((ml = po->mclist) != NULL) {
1374                struct net_device *dev;
1375
1376                po->mclist = ml->next;
1377                if ((dev = dev_get_by_index(sock_net(sk), ml->ifindex)) != NULL) {
1378                        packet_dev_mc(dev, ml, -1);
1379                        dev_put(dev);
1380                }
1381                kfree(ml);
1382        }
1383        rtnl_unlock();
1384}
1385
1386static int
1387packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1388{
1389        struct sock *sk = sock->sk;
1390        struct packet_sock *po = pkt_sk(sk);
1391        int ret;
1392
1393        if (level != SOL_PACKET)
1394                return -ENOPROTOOPT;
1395
1396        switch(optname) {
1397        case PACKET_ADD_MEMBERSHIP:
1398        case PACKET_DROP_MEMBERSHIP:
1399        {
1400                struct packet_mreq_max mreq;
1401                int len = optlen;
1402                memset(&mreq, 0, sizeof(mreq));
1403                if (len < sizeof(struct packet_mreq))
1404                        return -EINVAL;
1405                if (len > sizeof(mreq))
1406                        len = sizeof(mreq);
1407                if (copy_from_user(&mreq,optval,len))
1408                        return -EFAULT;
1409                if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1410                        return -EINVAL;
1411                if (optname == PACKET_ADD_MEMBERSHIP)
1412                        ret = packet_mc_add(sk, &mreq);
1413                else
1414                        ret = packet_mc_drop(sk, &mreq);
1415                return ret;
1416        }
1417
1418#ifdef CONFIG_PACKET_MMAP
1419        case PACKET_RX_RING:
1420        {
1421                struct tpacket_req req;
1422
1423                if (optlen<sizeof(req))
1424                        return -EINVAL;
1425                if (copy_from_user(&req,optval,sizeof(req)))
1426                        return -EFAULT;
1427                return packet_set_ring(sk, &req, 0);
1428        }
1429        case PACKET_COPY_THRESH:
1430        {
1431                int val;
1432
1433                if (optlen!=sizeof(val))
1434                        return -EINVAL;
1435                if (copy_from_user(&val,optval,sizeof(val)))
1436                        return -EFAULT;
1437
1438                pkt_sk(sk)->copy_thresh = val;
1439                return 0;
1440        }
1441        case PACKET_VERSION:
1442        {
1443                int val;
1444
1445                if (optlen != sizeof(val))
1446                        return -EINVAL;
1447                if (po->pg_vec)
1448                        return -EBUSY;
1449                if (copy_from_user(&val, optval, sizeof(val)))
1450                        return -EFAULT;
1451                switch (val) {
1452                case TPACKET_V1:
1453                case TPACKET_V2:
1454                        po->tp_version = val;
1455                        return 0;
1456                default:
1457                        return -EINVAL;
1458                }
1459        }
1460        case PACKET_RESERVE:
1461        {
1462                unsigned int val;
1463
1464                if (optlen != sizeof(val))
1465                        return -EINVAL;
1466                if (po->pg_vec)
1467                        return -EBUSY;
1468                if (copy_from_user(&val, optval, sizeof(val)))
1469                        return -EFAULT;
1470                po->tp_reserve = val;
1471                return 0;
1472        }
1473#endif
1474        case PACKET_AUXDATA:
1475        {
1476                int val;
1477
1478                if (optlen < sizeof(val))
1479                        return -EINVAL;
1480                if (copy_from_user(&val, optval, sizeof(val)))
1481                        return -EFAULT;
1482
1483                po->auxdata = !!val;
1484                return 0;
1485        }
1486        case PACKET_ORIGDEV:
1487        {
1488                int val;
1489
1490                if (optlen < sizeof(val))
1491                        return -EINVAL;
1492                if (copy_from_user(&val, optval, sizeof(val)))
1493                        return -EFAULT;
1494
1495                po->origdev = !!val;
1496                return 0;
1497        }
1498        default:
1499                return -ENOPROTOOPT;
1500        }
1501}
1502
1503static int packet_getsockopt(struct socket *sock, int level, int optname,
1504                             char __user *optval, int __user *optlen)
1505{
1506        unsigned int len;
1507        int val;
1508        struct sock *sk = sock->sk;
1509        struct packet_sock *po = pkt_sk(sk);
1510        void *data;
1511        struct tpacket_stats st;
1512
1513        if (level != SOL_PACKET)
1514                return -ENOPROTOOPT;
1515
1516        if (get_user(len, optlen))
1517                return -EFAULT;
1518
1519        if ((int)len < 0)
1520                return -EINVAL;
1521
1522        switch(optname) {
1523        case PACKET_STATISTICS:
1524                if (len > sizeof(struct tpacket_stats))
1525                        len = sizeof(struct tpacket_stats);
1526                spin_lock_bh(&sk->sk_receive_queue.lock);
1527                st = po->stats;
1528                memset(&po->stats, 0, sizeof(st));
1529                spin_unlock_bh(&sk->sk_receive_queue.lock);
1530                st.tp_packets += st.tp_drops;
1531
1532                data = &st;
1533                break;
1534        case PACKET_AUXDATA:
1535                if (len > sizeof(int))
1536                        len = sizeof(int);
1537                val = po->auxdata;
1538
1539                data = &val;
1540                break;
1541        case PACKET_ORIGDEV:
1542                if (len > sizeof(int))
1543                        len = sizeof(int);
1544                val = po->origdev;
1545
1546                data = &val;
1547                break;
1548#ifdef CONFIG_PACKET_MMAP
1549        case PACKET_VERSION:
1550                if (len > sizeof(int))
1551                        len = sizeof(int);
1552                val = po->tp_version;
1553                data = &val;
1554                break;
1555        case PACKET_HDRLEN:
1556                if (len > sizeof(int))
1557                        len = sizeof(int);
1558                if (copy_from_user(&val, optval, len))
1559                        return -EFAULT;
1560                switch (val) {
1561                case TPACKET_V1:
1562                        val = sizeof(struct tpacket_hdr);
1563                        break;
1564                case TPACKET_V2:
1565                        val = sizeof(struct tpacket2_hdr);
1566                        break;
1567                default:
1568                        return -EINVAL;
1569                }
1570                data = &val;
1571                break;
1572        case PACKET_RESERVE:
1573                if (len > sizeof(unsigned int))
1574                        len = sizeof(unsigned int);
1575                val = po->tp_reserve;
1576                data = &val;
1577                break;
1578#endif
1579        default:
1580                return -ENOPROTOOPT;
1581        }
1582
1583        if (put_user(len, optlen))
1584                return -EFAULT;
1585        if (copy_to_user(optval, data, len))
1586                return -EFAULT;
1587        return 0;
1588}
1589
1590
1591static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1592{
1593        struct sock *sk;
1594        struct hlist_node *node;
1595        struct net_device *dev = data;
1596        struct net *net = dev_net(dev);
1597
1598        read_lock(&net->packet.sklist_lock);
1599        sk_for_each(sk, node, &net->packet.sklist) {
1600                struct packet_sock *po = pkt_sk(sk);
1601
1602                switch (msg) {
1603                case NETDEV_UNREGISTER:
1604                        if (po->mclist)
1605                                packet_dev_mclist(dev, po->mclist, -1);
1606                        /* fallthrough */
1607
1608                case NETDEV_DOWN:
1609                        if (dev->ifindex == po->ifindex) {
1610                                spin_lock(&po->bind_lock);
1611                                if (po->running) {
1612                                        __dev_remove_pack(&po->prot_hook);
1613                                        __sock_put(sk);
1614                                        po->running = 0;
1615                                        sk->sk_err = ENETDOWN;
1616                                        if (!sock_flag(sk, SOCK_DEAD))
1617                                                sk->sk_error_report(sk);
1618                                }
1619                                if (msg == NETDEV_UNREGISTER) {
1620                                        po->ifindex = -1;
1621                                        po->prot_hook.dev = NULL;
1622                                }
1623                                spin_unlock(&po->bind_lock);
1624                        }
1625                        break;
1626                case NETDEV_UP:
1627                        spin_lock(&po->bind_lock);
1628                        if (dev->ifindex == po->ifindex && po->num &&
1629                            !po->running) {
1630                                dev_add_pack(&po->prot_hook);
1631                                sock_hold(sk);
1632                                po->running = 1;
1633                        }
1634                        spin_unlock(&po->bind_lock);
1635                        break;
1636                }
1637        }
1638        read_unlock(&net->packet.sklist_lock);
1639        return NOTIFY_DONE;
1640}
1641
1642
1643static int packet_ioctl(struct socket *sock, unsigned int cmd,
1644                        unsigned long arg)
1645{
1646        struct sock *sk = sock->sk;
1647
1648        switch(cmd) {
1649                case SIOCOUTQ:
1650                {
1651                        int amount = atomic_read(&sk->sk_wmem_alloc);
1652                        return put_user(amount, (int __user *)arg);
1653                }
1654                case SIOCINQ:
1655                {
1656                        struct sk_buff *skb;
1657                        int amount = 0;
1658
1659                        spin_lock_bh(&sk->sk_receive_queue.lock);
1660                        skb = skb_peek(&sk->sk_receive_queue);
1661                        if (skb)
1662                                amount = skb->len;
1663                        spin_unlock_bh(&sk->sk_receive_queue.lock);
1664                        return put_user(amount, (int __user *)arg);
1665                }
1666                case SIOCGSTAMP:
1667                        return sock_get_timestamp(sk, (struct timeval __user *)arg);
1668                case SIOCGSTAMPNS:
1669                        return sock_get_timestampns(sk, (struct timespec __user *)arg);
1670
1671#ifdef CONFIG_INET
1672                case SIOCADDRT:
1673                case SIOCDELRT:
1674                case SIOCDARP:
1675                case SIOCGARP:
1676                case SIOCSARP:
1677                case SIOCGIFADDR:
1678                case SIOCSIFADDR:
1679                case SIOCGIFBRDADDR:
1680                case SIOCSIFBRDADDR:
1681                case SIOCGIFNETMASK:
1682                case SIOCSIFNETMASK:
1683                case SIOCGIFDSTADDR:
1684                case SIOCSIFDSTADDR:
1685                case SIOCSIFFLAGS:
1686                        if (!net_eq(sock_net(sk), &init_net))
1687                                return -ENOIOCTLCMD;
1688                        return inet_dgram_ops.ioctl(sock, cmd, arg);
1689#endif
1690
1691                default:
1692                        return -ENOIOCTLCMD;
1693        }
1694        return 0;
1695}
1696
1697#ifndef CONFIG_PACKET_MMAP
1698#define packet_mmap sock_no_mmap
1699#define packet_poll datagram_poll
1700#else
1701
1702static unsigned int packet_poll(struct file * file, struct socket *sock,
1703                                poll_table *wait)
1704{
1705        struct sock *sk = sock->sk;
1706        struct packet_sock *po = pkt_sk(sk);
1707        unsigned int mask = datagram_poll(file, sock, wait);
1708
1709        spin_lock_bh(&sk->sk_receive_queue.lock);
1710        if (po->pg_vec) {
1711                unsigned last = po->head ? po->head-1 : po->frame_max;
1712
1713                if (packet_lookup_frame(po, last, TP_STATUS_USER))
1714                        mask |= POLLIN | POLLRDNORM;
1715        }
1716        spin_unlock_bh(&sk->sk_receive_queue.lock);
1717        return mask;
1718}
1719
1720
1721/* Dirty? Well, I still did not learn better way to account
1722 * for user mmaps.
1723 */
1724
1725static void packet_mm_open(struct vm_area_struct *vma)
1726{
1727        struct file *file = vma->vm_file;
1728        struct socket * sock = file->private_data;
1729        struct sock *sk = sock->sk;
1730
1731        if (sk)
1732                atomic_inc(&pkt_sk(sk)->mapped);
1733}
1734
1735static void packet_mm_close(struct vm_area_struct *vma)
1736{
1737        struct file *file = vma->vm_file;
1738        struct socket * sock = file->private_data;
1739        struct sock *sk = sock->sk;
1740
1741        if (sk)
1742                atomic_dec(&pkt_sk(sk)->mapped);
1743}
1744
1745static struct vm_operations_struct packet_mmap_ops = {
1746        .open = packet_mm_open,
1747        .close =packet_mm_close,
1748};
1749
1750static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1751{
1752        int i;
1753
1754        for (i = 0; i < len; i++) {
1755                if (likely(pg_vec[i]))
1756                        free_pages((unsigned long) pg_vec[i], order);
1757        }
1758        kfree(pg_vec);
1759}
1760
1761static inline char *alloc_one_pg_vec_page(unsigned long order)
1762{
1763        return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1764                                         order);
1765}
1766
1767static char **alloc_pg_vec(struct tpacket_req *req, int order)
1768{
1769        unsigned int block_nr = req->tp_block_nr;
1770        char **pg_vec;
1771        int i;
1772
1773        pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1774        if (unlikely(!pg_vec))
1775                goto out;
1776
1777        for (i = 0; i < block_nr; i++) {
1778                pg_vec[i] = alloc_one_pg_vec_page(order);
1779                if (unlikely(!pg_vec[i]))
1780                        goto out_free_pgvec;
1781        }
1782
1783out:
1784        return pg_vec;
1785
1786out_free_pgvec:
1787        free_pg_vec(pg_vec, order, block_nr);
1788        pg_vec = NULL;
1789        goto out;
1790}
1791
1792static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1793{
1794        char **pg_vec = NULL;
1795        struct packet_sock *po = pkt_sk(sk);
1796        int was_running, order = 0;
1797        __be16 num;
1798        int err = 0;
1799
1800        if (req->tp_block_nr) {
1801                int i;
1802
1803                /* Sanity tests and some calculations */
1804
1805                if (unlikely(po->pg_vec))
1806                        return -EBUSY;
1807
1808                switch (po->tp_version) {
1809                case TPACKET_V1:
1810                        po->tp_hdrlen = TPACKET_HDRLEN;
1811                        break;
1812                case TPACKET_V2:
1813                        po->tp_hdrlen = TPACKET2_HDRLEN;
1814                        break;
1815                }
1816
1817                if (unlikely((int)req->tp_block_size <= 0))
1818                        return -EINVAL;
1819                if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1820                        return -EINVAL;
1821                if (unlikely(req->tp_frame_size < po->tp_hdrlen +
1822                                                  po->tp_reserve))
1823                        return -EINVAL;
1824                if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1825                        return -EINVAL;
1826
1827                po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1828                if (unlikely(po->frames_per_block <= 0))
1829                        return -EINVAL;
1830                if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1831                             req->tp_frame_nr))
1832                        return -EINVAL;
1833
1834                err = -ENOMEM;
1835                order = get_order(req->tp_block_size);
1836                pg_vec = alloc_pg_vec(req, order);
1837                if (unlikely(!pg_vec))
1838                        goto out;
1839
1840                for (i = 0; i < req->tp_block_nr; i++) {
1841                        void *ptr = pg_vec[i];
1842                        int k;
1843
1844                        for (k = 0; k < po->frames_per_block; k++) {
1845                                __packet_set_status(po, ptr, TP_STATUS_KERNEL);
1846                                ptr += req->tp_frame_size;
1847                        }
1848                }
1849                /* Done */
1850        } else {
1851                if (unlikely(req->tp_frame_nr))
1852                        return -EINVAL;
1853        }
1854
1855        lock_sock(sk);
1856
1857        /* Detach socket from network */
1858        spin_lock(&po->bind_lock);
1859        was_running = po->running;
1860        num = po->num;
1861        if (was_running) {
1862                __dev_remove_pack(&po->prot_hook);
1863                po->num = 0;
1864                po->running = 0;
1865                __sock_put(sk);
1866        }
1867        spin_unlock(&po->bind_lock);
1868
1869        synchronize_net();
1870
1871        err = -EBUSY;
1872        mutex_lock(&po->pg_vec_lock);
1873        if (closing || atomic_read(&po->mapped) == 0) {
1874                err = 0;
1875#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1876
1877                spin_lock_bh(&sk->sk_receive_queue.lock);
1878                pg_vec = XC(po->pg_vec, pg_vec);
1879                po->frame_max = (req->tp_frame_nr - 1);
1880                po->head = 0;
1881                po->frame_size = req->tp_frame_size;
1882                spin_unlock_bh(&sk->sk_receive_queue.lock);
1883
1884                order = XC(po->pg_vec_order, order);
1885                req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1886
1887                po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1888                po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1889                skb_queue_purge(&sk->sk_receive_queue);
1890#undef XC
1891                if (atomic_read(&po->mapped))
1892                        printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1893        }
1894        mutex_unlock(&po->pg_vec_lock);
1895
1896        spin_lock(&po->bind_lock);
1897        if (was_running && !po->running) {
1898                sock_hold(sk);
1899                po->running = 1;
1900                po->num = num;
1901                dev_add_pack(&po->prot_hook);
1902        }
1903        spin_unlock(&po->bind_lock);
1904
1905        release_sock(sk);
1906
1907        if (pg_vec)
1908                free_pg_vec(pg_vec, order, req->tp_block_nr);
1909out:
1910        return err;
1911}
1912
1913static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1914{
1915        struct sock *sk = sock->sk;
1916        struct packet_sock *po = pkt_sk(sk);
1917        unsigned long size;
1918        unsigned long start;
1919        int err = -EINVAL;
1920        int i;
1921
1922        if (vma->vm_pgoff)
1923                return -EINVAL;
1924
1925        size = vma->vm_end - vma->vm_start;
1926
1927        mutex_lock(&po->pg_vec_lock);
1928        if (po->pg_vec == NULL)
1929                goto out;
1930        if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1931                goto out;
1932
1933        start = vma->vm_start;
1934        for (i = 0; i < po->pg_vec_len; i++) {
1935                struct page *page = virt_to_page(po->pg_vec[i]);
1936                int pg_num;
1937
1938                for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1939                        err = vm_insert_page(vma, start, page);
1940                        if (unlikely(err))
1941                                goto out;
1942                        start += PAGE_SIZE;
1943                }
1944        }
1945        atomic_inc(&po->mapped);
1946        vma->vm_ops = &packet_mmap_ops;
1947        err = 0;
1948
1949out:
1950        mutex_unlock(&po->pg_vec_lock);
1951        return err;
1952}
1953#endif
1954
1955
1956static const struct proto_ops packet_ops_spkt = {
1957        .family =       PF_PACKET,
1958        .owner =        THIS_MODULE,
1959        .release =      packet_release,
1960        .bind =         packet_bind_spkt,
1961        .connect =      sock_no_connect,
1962        .socketpair =   sock_no_socketpair,
1963        .accept =       sock_no_accept,
1964        .getname =      packet_getname_spkt,
1965        .poll =         datagram_poll,
1966        .ioctl =        packet_ioctl,
1967        .listen =       sock_no_listen,
1968        .shutdown =     sock_no_shutdown,
1969        .setsockopt =   sock_no_setsockopt,
1970        .getsockopt =   sock_no_getsockopt,
1971        .sendmsg =      packet_sendmsg_spkt,
1972        .recvmsg =      packet_recvmsg,
1973        .mmap =         sock_no_mmap,
1974        .sendpage =     sock_no_sendpage,
1975};
1976
1977static const struct proto_ops packet_ops = {
1978        .family =       PF_PACKET,
1979        .owner =        THIS_MODULE,
1980        .release =      packet_release,
1981        .bind =         packet_bind,
1982        .connect =      sock_no_connect,
1983        .socketpair =   sock_no_socketpair,
1984        .accept =       sock_no_accept,
1985        .getname =      packet_getname,
1986        .poll =         packet_poll,
1987        .ioctl =        packet_ioctl,
1988        .listen =       sock_no_listen,
1989        .shutdown =     sock_no_shutdown,
1990        .setsockopt =   packet_setsockopt,
1991        .getsockopt =   packet_getsockopt,
1992        .sendmsg =      packet_sendmsg,
1993        .recvmsg =      packet_recvmsg,
1994        .mmap =         packet_mmap,
1995        .sendpage =     sock_no_sendpage,
1996};
1997
1998static struct net_proto_family packet_family_ops = {
1999        .family =       PF_PACKET,
2000        .create =       packet_create,
2001        .owner  =       THIS_MODULE,
2002};
2003
2004static struct notifier_block packet_netdev_notifier = {
2005        .notifier_call =packet_notifier,
2006};
2007
2008#ifdef CONFIG_PROC_FS
2009static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
2010{
2011        struct sock *s;
2012        struct hlist_node *node;
2013
2014        sk_for_each(s, node, &net->packet.sklist) {
2015                if (!off--)
2016                        return s;
2017        }
2018        return NULL;
2019}
2020
2021static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2022        __acquires(seq_file_net(seq)->packet.sklist_lock)
2023{
2024        struct net *net = seq_file_net(seq);
2025        read_lock(&net->packet.sklist_lock);
2026        return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
2027}
2028
2029static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2030{
2031        struct net *net = seq_file_net(seq);
2032        ++*pos;
2033        return  (v == SEQ_START_TOKEN)
2034                ? sk_head(&net->packet.sklist)
2035                : sk_next((struct sock*)v) ;
2036}
2037
2038static void packet_seq_stop(struct seq_file *seq, void *v)
2039        __releases(seq_file_net(seq)->packet.sklist_lock)
2040{
2041        struct net *net = seq_file_net(seq);
2042        read_unlock(&net->packet.sklist_lock);
2043}
2044
2045static int packet_seq_show(struct seq_file *seq, void *v)
2046{
2047        if (v == SEQ_START_TOKEN)
2048                seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
2049        else {
2050                struct sock *s = v;
2051                const struct packet_sock *po = pkt_sk(s);
2052
2053                seq_printf(seq,
2054                           "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
2055                           s,
2056                           atomic_read(&s->sk_refcnt),
2057                           s->sk_type,
2058                           ntohs(po->num),
2059                           po->ifindex,
2060                           po->running,
2061                           atomic_read(&s->sk_rmem_alloc),
2062                           sock_i_uid(s),
2063                           sock_i_ino(s) );
2064        }
2065
2066        return 0;
2067}
2068
2069static const struct seq_operations packet_seq_ops = {
2070        .start  = packet_seq_start,
2071        .next   = packet_seq_next,
2072        .stop   = packet_seq_stop,
2073        .show   = packet_seq_show,
2074};
2075
2076static int packet_seq_open(struct inode *inode, struct file *file)
2077{
2078        return seq_open_net(inode, file, &packet_seq_ops,
2079                            sizeof(struct seq_net_private));
2080}
2081
2082static const struct file_operations packet_seq_fops = {
2083        .owner          = THIS_MODULE,
2084        .open           = packet_seq_open,
2085        .read           = seq_read,
2086        .llseek         = seq_lseek,
2087        .release        = seq_release_net,
2088};
2089
2090#endif
2091
2092static int packet_net_init(struct net *net)
2093{
2094        rwlock_init(&net->packet.sklist_lock);
2095        INIT_HLIST_HEAD(&net->packet.sklist);
2096
2097        if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2098                return -ENOMEM;
2099
2100        return 0;
2101}
2102
2103static void packet_net_exit(struct net *net)
2104{
2105        proc_net_remove(net, "packet");
2106}
2107
2108static struct pernet_operations packet_net_ops = {
2109        .init = packet_net_init,
2110        .exit = packet_net_exit,
2111};
2112
2113
2114static void __exit packet_exit(void)
2115{
2116        unregister_netdevice_notifier(&packet_netdev_notifier);
2117        unregister_pernet_subsys(&packet_net_ops);
2118        sock_unregister(PF_PACKET);
2119        proto_unregister(&packet_proto);
2120}
2121
2122static int __init packet_init(void)
2123{
2124        int rc = proto_register(&packet_proto, 0);
2125
2126        if (rc != 0)
2127                goto out;
2128
2129        sock_register(&packet_family_ops);
2130        register_pernet_subsys(&packet_net_ops);
2131        register_netdevice_notifier(&packet_netdev_notifier);
2132out:
2133        return rc;
2134}
2135
2136module_init(packet_init);
2137module_exit(packet_exit);
2138MODULE_LICENSE("GPL");
2139MODULE_ALIAS_NETPROTO(PF_PACKET);
2140