linux/net/packet/af_packet.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              PACKET - implements raw packet sockets.
   7 *
   8 * Authors:     Ross Biro
   9 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  10 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  11 *
  12 * Fixes:
  13 *              Alan Cox        :       verify_area() now used correctly
  14 *              Alan Cox        :       new skbuff lists, look ma no backlogs!
  15 *              Alan Cox        :       tidied skbuff lists.
  16 *              Alan Cox        :       Now uses generic datagram routines I
  17 *                                      added. Also fixed the peek/read crash
  18 *                                      from all old Linux datagram code.
  19 *              Alan Cox        :       Uses the improved datagram code.
  20 *              Alan Cox        :       Added NULL's for socket options.
  21 *              Alan Cox        :       Re-commented the code.
  22 *              Alan Cox        :       Use new kernel side addressing
  23 *              Rob Janssen     :       Correct MTU usage.
  24 *              Dave Platt      :       Counter leaks caused by incorrect
  25 *                                      interrupt locking and some slightly
  26 *                                      dubious gcc output. Can you read
  27 *                                      compiler: it said _VOLATILE_
  28 *      Richard Kooijman        :       Timestamp fixes.
  29 *              Alan Cox        :       New buffers. Use sk->mac.raw.
  30 *              Alan Cox        :       sendmsg/recvmsg support.
  31 *              Alan Cox        :       Protocol setting support
  32 *      Alexey Kuznetsov        :       Untied from IPv4 stack.
  33 *      Cyrus Durgin            :       Fixed kerneld for kmod.
  34 *      Michal Ostrowski        :       Module initialization cleanup.
  35 *         Ulises Alonso        :       Frame number limit removal and
  36 *                                      packet_set_ring memory leak.
  37 *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
  38 *                                      The convention is that longer addresses
  39 *                                      will simply extend the hardware address
  40 *                                      byte arrays at the end of sockaddr_ll
  41 *                                      and packet_mreq.
  42 *
  43 *              This program is free software; you can redistribute it and/or
  44 *              modify it under the terms of the GNU General Public License
  45 *              as published by the Free Software Foundation; either version
  46 *              2 of the License, or (at your option) any later version.
  47 *
  48 */
  49
  50#include <linux/types.h>
  51#include <linux/mm.h>
  52#include <linux/capability.h>
  53#include <linux/fcntl.h>
  54#include <linux/socket.h>
  55#include <linux/in.h>
  56#include <linux/inet.h>
  57#include <linux/netdevice.h>
  58#include <linux/if_packet.h>
  59#include <linux/wireless.h>
  60#include <linux/kernel.h>
  61#include <linux/kmod.h>
  62#include <net/net_namespace.h>
  63#include <net/ip.h>
  64#include <net/protocol.h>
  65#include <linux/skbuff.h>
  66#include <net/sock.h>
  67#include <linux/errno.h>
  68#include <linux/timer.h>
  69#include <asm/system.h>
  70#include <asm/uaccess.h>
  71#include <asm/ioctls.h>
  72#include <asm/page.h>
  73#include <asm/cacheflush.h>
  74#include <asm/io.h>
  75#include <linux/proc_fs.h>
  76#include <linux/seq_file.h>
  77#include <linux/poll.h>
  78#include <linux/module.h>
  79#include <linux/init.h>
  80
  81#ifdef CONFIG_INET
  82#include <net/inet_common.h>
  83#endif
  84
  85/*
  86   Assumptions:
  87   - if device has no dev->hard_header routine, it adds and removes ll header
  88     inside itself. In this case ll header is invisible outside of device,
  89     but higher levels still should reserve dev->hard_header_len.
  90     Some devices are enough clever to reallocate skb, when header
  91     will not fit to reserved space (tunnel), another ones are silly
  92     (PPP).
  93   - packet socket receives packets with pulled ll header,
  94     so that SOCK_RAW should push it back.
  95
  96On receive:
  97-----------
  98
  99Incoming, dev->hard_header!=NULL
 100   mac_header -> ll header
 101   data       -> data
 102
 103Outgoing, dev->hard_header!=NULL
 104   mac_header -> ll header
 105   data       -> ll header
 106
 107Incoming, dev->hard_header==NULL
 108   mac_header -> UNKNOWN position. It is very likely, that it points to ll
 109                 header.  PPP makes it, that is wrong, because introduce
 110                 assymetry between rx and tx paths.
 111   data       -> data
 112
 113Outgoing, dev->hard_header==NULL
 114   mac_header -> data. ll header is still not built!
 115   data       -> data
 116
 117Resume
 118  If dev->hard_header==NULL we are unlikely to restore sensible ll header.
 119
 120
 121On transmit:
 122------------
 123
 124dev->hard_header != NULL
 125   mac_header -> ll header
 126   data       -> ll header
 127
 128dev->hard_header == NULL (ll header is added by device, we cannot control it)
 129   mac_header -> data
 130   data       -> data
 131
 132   We should set nh.raw on output to correct posistion,
 133   packet classifier depends on it.
 134 */
 135
 136/* Private packet socket structures. */
 137
 138struct packet_mclist
 139{
 140        struct packet_mclist    *next;
 141        int                     ifindex;
 142        int                     count;
 143        unsigned short          type;
 144        unsigned short          alen;
 145        unsigned char           addr[MAX_ADDR_LEN];
 146};
 147/* identical to struct packet_mreq except it has
 148 * a longer address field.
 149 */
 150struct packet_mreq_max
 151{
 152        int             mr_ifindex;
 153        unsigned short  mr_type;
 154        unsigned short  mr_alen;
 155        unsigned char   mr_address[MAX_ADDR_LEN];
 156};
 157
 158#ifdef CONFIG_PACKET_MMAP
 159static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
 160#endif
 161
 162static void packet_flush_mclist(struct sock *sk);
 163
 164struct packet_sock {
 165        /* struct sock has to be the first member of packet_sock */
 166        struct sock             sk;
 167        struct tpacket_stats    stats;
 168#ifdef CONFIG_PACKET_MMAP
 169        char *                  *pg_vec;
 170        unsigned int            head;
 171        unsigned int            frames_per_block;
 172        unsigned int            frame_size;
 173        unsigned int            frame_max;
 174        int                     copy_thresh;
 175#endif
 176        struct packet_type      prot_hook;
 177        spinlock_t              bind_lock;
 178        unsigned int            running:1,      /* prot_hook is attached*/
 179                                auxdata:1,
 180                                origdev:1;
 181        int                     ifindex;        /* bound device         */
 182        __be16                  num;
 183        struct packet_mclist    *mclist;
 184#ifdef CONFIG_PACKET_MMAP
 185        atomic_t                mapped;
 186        unsigned int            pg_vec_order;
 187        unsigned int            pg_vec_pages;
 188        unsigned int            pg_vec_len;
 189        enum tpacket_versions   tp_version;
 190        unsigned int            tp_hdrlen;
 191        unsigned int            tp_reserve;
 192#endif
 193};
 194
 195struct packet_skb_cb {
 196        unsigned int origlen;
 197        union {
 198                struct sockaddr_pkt pkt;
 199                struct sockaddr_ll ll;
 200        } sa;
 201};
 202
 203#define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
 204
 205#ifdef CONFIG_PACKET_MMAP
 206
 207static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
 208                                 int status)
 209{
 210        unsigned int pg_vec_pos, frame_offset;
 211        union {
 212                struct tpacket_hdr *h1;
 213                struct tpacket2_hdr *h2;
 214                void *raw;
 215        } h;
 216
 217        pg_vec_pos = position / po->frames_per_block;
 218        frame_offset = position % po->frames_per_block;
 219
 220        h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
 221        switch (po->tp_version) {
 222        case TPACKET_V1:
 223                if (status != h.h1->tp_status ? TP_STATUS_USER :
 224                                                TP_STATUS_KERNEL)
 225                        return NULL;
 226                break;
 227        case TPACKET_V2:
 228                if (status != h.h2->tp_status ? TP_STATUS_USER :
 229                                                TP_STATUS_KERNEL)
 230                        return NULL;
 231                break;
 232        }
 233        return h.raw;
 234}
 235
 236static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 237{
 238        union {
 239                struct tpacket_hdr *h1;
 240                struct tpacket2_hdr *h2;
 241                void *raw;
 242        } h;
 243
 244        h.raw = frame;
 245        switch (po->tp_version) {
 246        case TPACKET_V1:
 247                h.h1->tp_status = status;
 248                break;
 249        case TPACKET_V2:
 250                h.h2->tp_status = status;
 251                break;
 252        }
 253}
 254#endif
 255
 256static inline struct packet_sock *pkt_sk(struct sock *sk)
 257{
 258        return (struct packet_sock *)sk;
 259}
 260
 261static void packet_sock_destruct(struct sock *sk)
 262{
 263        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 264        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 265
 266        if (!sock_flag(sk, SOCK_DEAD)) {
 267                printk("Attempt to release alive packet socket: %p\n", sk);
 268                return;
 269        }
 270
 271        sk_refcnt_debug_dec(sk);
 272}
 273
 274
 275static const struct proto_ops packet_ops;
 276
 277static const struct proto_ops packet_ops_spkt;
 278
 279static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
 280{
 281        struct sock *sk;
 282        struct sockaddr_pkt *spkt;
 283
 284        /*
 285         *      When we registered the protocol we saved the socket in the data
 286         *      field for just this event.
 287         */
 288
 289        sk = pt->af_packet_priv;
 290
 291        /*
 292         *      Yank back the headers [hope the device set this
 293         *      right or kerboom...]
 294         *
 295         *      Incoming packets have ll header pulled,
 296         *      push it back.
 297         *
 298         *      For outgoing ones skb->data == skb_mac_header(skb)
 299         *      so that this procedure is noop.
 300         */
 301
 302        if (skb->pkt_type == PACKET_LOOPBACK)
 303                goto out;
 304
 305        if (dev_net(dev) != sock_net(sk))
 306                goto out;
 307
 308        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
 309                goto oom;
 310
 311        /* drop any routing info */
 312        dst_release(skb->dst);
 313        skb->dst = NULL;
 314
 315        /* drop conntrack reference */
 316        nf_reset(skb);
 317
 318        spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 319
 320        skb_push(skb, skb->data - skb_mac_header(skb));
 321
 322        /*
 323         *      The SOCK_PACKET socket receives _all_ frames.
 324         */
 325
 326        spkt->spkt_family = dev->type;
 327        strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
 328        spkt->spkt_protocol = skb->protocol;
 329
 330        /*
 331         *      Charge the memory to the socket. This is done specifically
 332         *      to prevent sockets using all the memory up.
 333         */
 334
 335        if (sock_queue_rcv_skb(sk,skb) == 0)
 336                return 0;
 337
 338out:
 339        kfree_skb(skb);
 340oom:
 341        return 0;
 342}
 343
 344
 345/*
 346 *      Output a raw packet to a device layer. This bypasses all the other
 347 *      protocol layers and you must therefore supply it with a complete frame
 348 */
 349
 350static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 351                               struct msghdr *msg, size_t len)
 352{
 353        struct sock *sk = sock->sk;
 354        struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
 355        struct sk_buff *skb;
 356        struct net_device *dev;
 357        __be16 proto=0;
 358        int err;
 359
 360        /*
 361         *      Get and verify the address.
 362         */
 363
 364        if (saddr)
 365        {
 366                if (msg->msg_namelen < sizeof(struct sockaddr))
 367                        return(-EINVAL);
 368                if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
 369                        proto=saddr->spkt_protocol;
 370        }
 371        else
 372                return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
 373
 374        /*
 375         *      Find the device first to size check it
 376         */
 377
 378        saddr->spkt_device[13] = 0;
 379        dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
 380        err = -ENODEV;
 381        if (dev == NULL)
 382                goto out_unlock;
 383
 384        err = -ENETDOWN;
 385        if (!(dev->flags & IFF_UP))
 386                goto out_unlock;
 387
 388        /*
 389         *      You may not queue a frame bigger than the mtu. This is the lowest level
 390         *      raw protocol and you must do your own fragmentation at this level.
 391         */
 392
 393        err = -EMSGSIZE;
 394        if (len > dev->mtu + dev->hard_header_len)
 395                goto out_unlock;
 396
 397        err = -ENOBUFS;
 398        skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
 399
 400        /*
 401         *      If the write buffer is full, then tough. At this level the user gets to
 402         *      deal with the problem - do your own algorithmic backoffs. That's far
 403         *      more flexible.
 404         */
 405
 406        if (skb == NULL)
 407                goto out_unlock;
 408
 409        /*
 410         *      Fill it in
 411         */
 412
 413        /* FIXME: Save some space for broken drivers that write a
 414         * hard header at transmission time by themselves. PPP is the
 415         * notable one here. This should really be fixed at the driver level.
 416         */
 417        skb_reserve(skb, LL_RESERVED_SPACE(dev));
 418        skb_reset_network_header(skb);
 419
 420        /* Try to align data part correctly */
 421        if (dev->header_ops) {
 422                skb->data -= dev->hard_header_len;
 423                skb->tail -= dev->hard_header_len;
 424                if (len < dev->hard_header_len)
 425                        skb_reset_network_header(skb);
 426        }
 427
 428        /* Returns -EFAULT on error */
 429        err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
 430        skb->protocol = proto;
 431        skb->dev = dev;
 432        skb->priority = sk->sk_priority;
 433        if (err)
 434                goto out_free;
 435
 436        /*
 437         *      Now send it
 438         */
 439
 440        dev_queue_xmit(skb);
 441        dev_put(dev);
 442        return(len);
 443
 444out_free:
 445        kfree_skb(skb);
 446out_unlock:
 447        if (dev)
 448                dev_put(dev);
 449        return err;
 450}
 451
 452static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 453                                      unsigned int res)
 454{
 455        struct sk_filter *filter;
 456
 457        rcu_read_lock_bh();
 458        filter = rcu_dereference(sk->sk_filter);
 459        if (filter != NULL)
 460                res = sk_run_filter(skb, filter->insns, filter->len);
 461        rcu_read_unlock_bh();
 462
 463        return res;
 464}
 465
 466/*
 467   This function makes lazy skb cloning in hope that most of packets
 468   are discarded by BPF.
 469
 470   Note tricky part: we DO mangle shared skb! skb->data, skb->len
 471   and skb->cb are mangled. It works because (and until) packets
 472   falling here are owned by current CPU. Output packets are cloned
 473   by dev_queue_xmit_nit(), input packets are processed by net_bh
 474   sequencially, so that if we return skb to original state on exit,
 475   we will not harm anyone.
 476 */
 477
 478static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 479{
 480        struct sock *sk;
 481        struct sockaddr_ll *sll;
 482        struct packet_sock *po;
 483        u8 * skb_head = skb->data;
 484        int skb_len = skb->len;
 485        unsigned int snaplen, res;
 486
 487        if (skb->pkt_type == PACKET_LOOPBACK)
 488                goto drop;
 489
 490        sk = pt->af_packet_priv;
 491        po = pkt_sk(sk);
 492
 493        if (dev_net(dev) != sock_net(sk))
 494                goto drop;
 495
 496        skb->dev = dev;
 497
 498        if (dev->header_ops) {
 499                /* The device has an explicit notion of ll header,
 500                   exported to higher levels.
 501
 502                   Otherwise, the device hides datails of it frame
 503                   structure, so that corresponding packet head
 504                   never delivered to user.
 505                 */
 506                if (sk->sk_type != SOCK_DGRAM)
 507                        skb_push(skb, skb->data - skb_mac_header(skb));
 508                else if (skb->pkt_type == PACKET_OUTGOING) {
 509                        /* Special case: outgoing packets have ll header at head */
 510                        skb_pull(skb, skb_network_offset(skb));
 511                }
 512        }
 513
 514        snaplen = skb->len;
 515
 516        res = run_filter(skb, sk, snaplen);
 517        if (!res)
 518                goto drop_n_restore;
 519        if (snaplen > res)
 520                snaplen = res;
 521
 522        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 523            (unsigned)sk->sk_rcvbuf)
 524                goto drop_n_acct;
 525
 526        if (skb_shared(skb)) {
 527                struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 528                if (nskb == NULL)
 529                        goto drop_n_acct;
 530
 531                if (skb_head != skb->data) {
 532                        skb->data = skb_head;
 533                        skb->len = skb_len;
 534                }
 535                kfree_skb(skb);
 536                skb = nskb;
 537        }
 538
 539        BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
 540                     sizeof(skb->cb));
 541
 542        sll = &PACKET_SKB_CB(skb)->sa.ll;
 543        sll->sll_family = AF_PACKET;
 544        sll->sll_hatype = dev->type;
 545        sll->sll_protocol = skb->protocol;
 546        sll->sll_pkttype = skb->pkt_type;
 547        if (unlikely(po->origdev))
 548                sll->sll_ifindex = orig_dev->ifindex;
 549        else
 550                sll->sll_ifindex = dev->ifindex;
 551
 552        sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
 553
 554        PACKET_SKB_CB(skb)->origlen = skb->len;
 555
 556        if (pskb_trim(skb, snaplen))
 557                goto drop_n_acct;
 558
 559        skb_set_owner_r(skb, sk);
 560        skb->dev = NULL;
 561        dst_release(skb->dst);
 562        skb->dst = NULL;
 563
 564        /* drop conntrack reference */
 565        nf_reset(skb);
 566
 567        spin_lock(&sk->sk_receive_queue.lock);
 568        po->stats.tp_packets++;
 569        __skb_queue_tail(&sk->sk_receive_queue, skb);
 570        spin_unlock(&sk->sk_receive_queue.lock);
 571        sk->sk_data_ready(sk, skb->len);
 572        return 0;
 573
 574drop_n_acct:
 575        spin_lock(&sk->sk_receive_queue.lock);
 576        po->stats.tp_drops++;
 577        spin_unlock(&sk->sk_receive_queue.lock);
 578
 579drop_n_restore:
 580        if (skb_head != skb->data && skb_shared(skb)) {
 581                skb->data = skb_head;
 582                skb->len = skb_len;
 583        }
 584drop:
 585        kfree_skb(skb);
 586        return 0;
 587}
 588
 589#ifdef CONFIG_PACKET_MMAP
 590static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 591{
 592        struct sock *sk;
 593        struct packet_sock *po;
 594        struct sockaddr_ll *sll;
 595        union {
 596                struct tpacket_hdr *h1;
 597                struct tpacket2_hdr *h2;
 598                void *raw;
 599        } h;
 600        u8 * skb_head = skb->data;
 601        int skb_len = skb->len;
 602        unsigned int snaplen, res;
 603        unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
 604        unsigned short macoff, netoff, hdrlen;
 605        struct sk_buff *copy_skb = NULL;
 606        struct timeval tv;
 607        struct timespec ts;
 608
 609        if (skb->pkt_type == PACKET_LOOPBACK)
 610                goto drop;
 611
 612        sk = pt->af_packet_priv;
 613        po = pkt_sk(sk);
 614
 615        if (dev_net(dev) != sock_net(sk))
 616                goto drop;
 617
 618        if (dev->header_ops) {
 619                if (sk->sk_type != SOCK_DGRAM)
 620                        skb_push(skb, skb->data - skb_mac_header(skb));
 621                else if (skb->pkt_type == PACKET_OUTGOING) {
 622                        /* Special case: outgoing packets have ll header at head */
 623                        skb_pull(skb, skb_network_offset(skb));
 624                }
 625        }
 626
 627        if (skb->ip_summed == CHECKSUM_PARTIAL)
 628                status |= TP_STATUS_CSUMNOTREADY;
 629
 630        snaplen = skb->len;
 631
 632        res = run_filter(skb, sk, snaplen);
 633        if (!res)
 634                goto drop_n_restore;
 635        if (snaplen > res)
 636                snaplen = res;
 637
 638        if (sk->sk_type == SOCK_DGRAM) {
 639                macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
 640                                  po->tp_reserve;
 641        } else {
 642                unsigned maclen = skb_network_offset(skb);
 643                netoff = TPACKET_ALIGN(po->tp_hdrlen +
 644                                       (maclen < 16 ? 16 : maclen)) +
 645                        po->tp_reserve;
 646                macoff = netoff - maclen;
 647        }
 648
 649        if (macoff + snaplen > po->frame_size) {
 650                if (po->copy_thresh &&
 651                    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
 652                    (unsigned)sk->sk_rcvbuf) {
 653                        if (skb_shared(skb)) {
 654                                copy_skb = skb_clone(skb, GFP_ATOMIC);
 655                        } else {
 656                                copy_skb = skb_get(skb);
 657                                skb_head = skb->data;
 658                        }
 659                        if (copy_skb)
 660                                skb_set_owner_r(copy_skb, sk);
 661                }
 662                snaplen = po->frame_size - macoff;
 663                if ((int)snaplen < 0)
 664                        snaplen = 0;
 665        }
 666
 667        spin_lock(&sk->sk_receive_queue.lock);
 668        h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
 669        if (!h.raw)
 670                goto ring_is_full;
 671        po->head = po->head != po->frame_max ? po->head+1 : 0;
 672        po->stats.tp_packets++;
 673        if (copy_skb) {
 674                status |= TP_STATUS_COPY;
 675                __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
 676        }
 677        if (!po->stats.tp_drops)
 678                status &= ~TP_STATUS_LOSING;
 679        spin_unlock(&sk->sk_receive_queue.lock);
 680
 681        skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 682
 683        switch (po->tp_version) {
 684        case TPACKET_V1:
 685                h.h1->tp_len = skb->len;
 686                h.h1->tp_snaplen = snaplen;
 687                h.h1->tp_mac = macoff;
 688                h.h1->tp_net = netoff;
 689                if (skb->tstamp.tv64)
 690                        tv = ktime_to_timeval(skb->tstamp);
 691                else
 692                        do_gettimeofday(&tv);
 693                h.h1->tp_sec = tv.tv_sec;
 694                h.h1->tp_usec = tv.tv_usec;
 695                hdrlen = sizeof(*h.h1);
 696                break;
 697        case TPACKET_V2:
 698                h.h2->tp_len = skb->len;
 699                h.h2->tp_snaplen = snaplen;
 700                h.h2->tp_mac = macoff;
 701                h.h2->tp_net = netoff;
 702                if (skb->tstamp.tv64)
 703                        ts = ktime_to_timespec(skb->tstamp);
 704                else
 705                        getnstimeofday(&ts);
 706                h.h2->tp_sec = ts.tv_sec;
 707                h.h2->tp_nsec = ts.tv_nsec;
 708                h.h2->tp_vlan_tci = skb->vlan_tci;
 709                hdrlen = sizeof(*h.h2);
 710                break;
 711        default:
 712                BUG();
 713        }
 714
 715        sll = h.raw + TPACKET_ALIGN(hdrlen);
 716        sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
 717        sll->sll_family = AF_PACKET;
 718        sll->sll_hatype = dev->type;
 719        sll->sll_protocol = skb->protocol;
 720        sll->sll_pkttype = skb->pkt_type;
 721        if (unlikely(po->origdev))
 722                sll->sll_ifindex = orig_dev->ifindex;
 723        else
 724                sll->sll_ifindex = dev->ifindex;
 725
 726        __packet_set_status(po, h.raw, status);
 727        smp_mb();
 728
 729        {
 730                struct page *p_start, *p_end;
 731                u8 *h_end = h.raw + macoff + snaplen - 1;
 732
 733                p_start = virt_to_page(h.raw);
 734                p_end = virt_to_page(h_end);
 735                while (p_start <= p_end) {
 736                        flush_dcache_page(p_start);
 737                        p_start++;
 738                }
 739        }
 740
 741        sk->sk_data_ready(sk, 0);
 742
 743drop_n_restore:
 744        if (skb_head != skb->data && skb_shared(skb)) {
 745                skb->data = skb_head;
 746                skb->len = skb_len;
 747        }
 748drop:
 749        kfree_skb(skb);
 750        return 0;
 751
 752ring_is_full:
 753        po->stats.tp_drops++;
 754        spin_unlock(&sk->sk_receive_queue.lock);
 755
 756        sk->sk_data_ready(sk, 0);
 757        if (copy_skb)
 758                kfree_skb(copy_skb);
 759        goto drop_n_restore;
 760}
 761
 762#endif
 763
 764
 765static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 766                          struct msghdr *msg, size_t len)
 767{
 768        struct sock *sk = sock->sk;
 769        struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
 770        struct sk_buff *skb;
 771        struct net_device *dev;
 772        __be16 proto;
 773        unsigned char *addr;
 774        int ifindex, err, reserve = 0;
 775
 776        /*
 777         *      Get and verify the address.
 778         */
 779
 780        if (saddr == NULL) {
 781                struct packet_sock *po = pkt_sk(sk);
 782
 783                ifindex = po->ifindex;
 784                proto   = po->num;
 785                addr    = NULL;
 786        } else {
 787                err = -EINVAL;
 788                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
 789                        goto out;
 790                if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
 791                        goto out;
 792                ifindex = saddr->sll_ifindex;
 793                proto   = saddr->sll_protocol;
 794                addr    = saddr->sll_addr;
 795        }
 796
 797
 798        dev = dev_get_by_index(sock_net(sk), ifindex);
 799        err = -ENXIO;
 800        if (dev == NULL)
 801                goto out_unlock;
 802        if (sock->type == SOCK_RAW)
 803                reserve = dev->hard_header_len;
 804
 805        err = -ENETDOWN;
 806        if (!(dev->flags & IFF_UP))
 807                goto out_unlock;
 808
 809        err = -EMSGSIZE;
 810        if (len > dev->mtu+reserve)
 811                goto out_unlock;
 812
 813        skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
 814                                msg->msg_flags & MSG_DONTWAIT, &err);
 815        if (skb==NULL)
 816                goto out_unlock;
 817
 818        skb_reserve(skb, LL_RESERVED_SPACE(dev));
 819        skb_reset_network_header(skb);
 820
 821        err = -EINVAL;
 822        if (sock->type == SOCK_DGRAM &&
 823            dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
 824                goto out_free;
 825
 826        /* Returns -EFAULT on error */
 827        err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
 828        if (err)
 829                goto out_free;
 830
 831        skb->protocol = proto;
 832        skb->dev = dev;
 833        skb->priority = sk->sk_priority;
 834
 835        /*
 836         *      Now send it
 837         */
 838
 839        err = dev_queue_xmit(skb);
 840        if (err > 0 && (err = net_xmit_errno(err)) != 0)
 841                goto out_unlock;
 842
 843        dev_put(dev);
 844
 845        return(len);
 846
 847out_free:
 848        kfree_skb(skb);
 849out_unlock:
 850        if (dev)
 851                dev_put(dev);
 852out:
 853        return err;
 854}
 855
 856/*
 857 *      Close a PACKET socket. This is fairly simple. We immediately go
 858 *      to 'closed' state and remove our protocol entry in the device list.
 859 */
 860
 861static int packet_release(struct socket *sock)
 862{
 863        struct sock *sk = sock->sk;
 864        struct packet_sock *po;
 865        struct net *net;
 866
 867        if (!sk)
 868                return 0;
 869
 870        net = sock_net(sk);
 871        po = pkt_sk(sk);
 872
 873        write_lock_bh(&net->packet.sklist_lock);
 874        sk_del_node_init(sk);
 875        write_unlock_bh(&net->packet.sklist_lock);
 876
 877        /*
 878         *      Unhook packet receive handler.
 879         */
 880
 881        if (po->running) {
 882                /*
 883                 *      Remove the protocol hook
 884                 */
 885                dev_remove_pack(&po->prot_hook);
 886                po->running = 0;
 887                po->num = 0;
 888                __sock_put(sk);
 889        }
 890
 891        packet_flush_mclist(sk);
 892
 893#ifdef CONFIG_PACKET_MMAP
 894        if (po->pg_vec) {
 895                struct tpacket_req req;
 896                memset(&req, 0, sizeof(req));
 897                packet_set_ring(sk, &req, 1);
 898        }
 899#endif
 900
 901        /*
 902         *      Now the socket is dead. No more input will appear.
 903         */
 904
 905        sock_orphan(sk);
 906        sock->sk = NULL;
 907
 908        /* Purge queues */
 909
 910        skb_queue_purge(&sk->sk_receive_queue);
 911        sk_refcnt_debug_release(sk);
 912
 913        sock_put(sk);
 914        return 0;
 915}
 916
 917/*
 918 *      Attach a packet hook.
 919 */
 920
 921static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
 922{
 923        struct packet_sock *po = pkt_sk(sk);
 924        /*
 925         *      Detach an existing hook if present.
 926         */
 927
 928        lock_sock(sk);
 929
 930        spin_lock(&po->bind_lock);
 931        if (po->running) {
 932                __sock_put(sk);
 933                po->running = 0;
 934                po->num = 0;
 935                spin_unlock(&po->bind_lock);
 936                dev_remove_pack(&po->prot_hook);
 937                spin_lock(&po->bind_lock);
 938        }
 939
 940        po->num = protocol;
 941        po->prot_hook.type = protocol;
 942        po->prot_hook.dev = dev;
 943
 944        po->ifindex = dev ? dev->ifindex : 0;
 945
 946        if (protocol == 0)
 947                goto out_unlock;
 948
 949        if (!dev || (dev->flags & IFF_UP)) {
 950                dev_add_pack(&po->prot_hook);
 951                sock_hold(sk);
 952                po->running = 1;
 953        } else {
 954                sk->sk_err = ENETDOWN;
 955                if (!sock_flag(sk, SOCK_DEAD))
 956                        sk->sk_error_report(sk);
 957        }
 958
 959out_unlock:
 960        spin_unlock(&po->bind_lock);
 961        release_sock(sk);
 962        return 0;
 963}
 964
 965/*
 966 *      Bind a packet socket to a device
 967 */
 968
 969static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 970{
 971        struct sock *sk=sock->sk;
 972        char name[15];
 973        struct net_device *dev;
 974        int err = -ENODEV;
 975
 976        /*
 977         *      Check legality
 978         */
 979
 980        if (addr_len != sizeof(struct sockaddr))
 981                return -EINVAL;
 982        strlcpy(name,uaddr->sa_data,sizeof(name));
 983
 984        dev = dev_get_by_name(sock_net(sk), name);
 985        if (dev) {
 986                err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
 987                dev_put(dev);
 988        }
 989        return err;
 990}
 991
 992static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 993{
 994        struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
 995        struct sock *sk=sock->sk;
 996        struct net_device *dev = NULL;
 997        int err;
 998
 999
1000        /*
1001         *      Check legality
1002         */
1003
1004        if (addr_len < sizeof(struct sockaddr_ll))
1005                return -EINVAL;
1006        if (sll->sll_family != AF_PACKET)
1007                return -EINVAL;
1008
1009        if (sll->sll_ifindex) {
1010                err = -ENODEV;
1011                dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1012                if (dev == NULL)
1013                        goto out;
1014        }
1015        err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1016        if (dev)
1017                dev_put(dev);
1018
1019out:
1020        return err;
1021}
1022
1023static struct proto packet_proto = {
1024        .name     = "PACKET",
1025        .owner    = THIS_MODULE,
1026        .obj_size = sizeof(struct packet_sock),
1027};
1028
1029/*
1030 *      Create a packet of type SOCK_PACKET.
1031 */
1032
1033static int packet_create(struct net *net, struct socket *sock, int protocol)
1034{
1035        struct sock *sk;
1036        struct packet_sock *po;
1037        __be16 proto = (__force __be16)protocol; /* weird, but documented */
1038        int err;
1039
1040        if (!capable(CAP_NET_RAW))
1041                return -EPERM;
1042        if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1043            sock->type != SOCK_PACKET)
1044                return -ESOCKTNOSUPPORT;
1045
1046        sock->state = SS_UNCONNECTED;
1047
1048        err = -ENOBUFS;
1049        sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1050        if (sk == NULL)
1051                goto out;
1052
1053        sock->ops = &packet_ops;
1054        if (sock->type == SOCK_PACKET)
1055                sock->ops = &packet_ops_spkt;
1056
1057        sock_init_data(sock, sk);
1058
1059        po = pkt_sk(sk);
1060        sk->sk_family = PF_PACKET;
1061        po->num = proto;
1062
1063        sk->sk_destruct = packet_sock_destruct;
1064        sk_refcnt_debug_inc(sk);
1065
1066        /*
1067         *      Attach a protocol block
1068         */
1069
1070        spin_lock_init(&po->bind_lock);
1071        po->prot_hook.func = packet_rcv;
1072
1073        if (sock->type == SOCK_PACKET)
1074                po->prot_hook.func = packet_rcv_spkt;
1075
1076        po->prot_hook.af_packet_priv = sk;
1077
1078        if (proto) {
1079                po->prot_hook.type = proto;
1080                dev_add_pack(&po->prot_hook);
1081                sock_hold(sk);
1082                po->running = 1;
1083        }
1084
1085        write_lock_bh(&net->packet.sklist_lock);
1086        sk_add_node(sk, &net->packet.sklist);
1087        write_unlock_bh(&net->packet.sklist_lock);
1088        return(0);
1089out:
1090        return err;
1091}
1092
1093/*
1094 *      Pull a packet from our receive queue and hand it to the user.
1095 *      If necessary we block.
1096 */
1097
1098static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1099                          struct msghdr *msg, size_t len, int flags)
1100{
1101        struct sock *sk = sock->sk;
1102        struct sk_buff *skb;
1103        int copied, err;
1104        struct sockaddr_ll *sll;
1105
1106        err = -EINVAL;
1107        if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1108                goto out;
1109
1110#if 0
1111        /* What error should we return now? EUNATTACH? */
1112        if (pkt_sk(sk)->ifindex < 0)
1113                return -ENODEV;
1114#endif
1115
1116        /*
1117         *      Call the generic datagram receiver. This handles all sorts
1118         *      of horrible races and re-entrancy so we can forget about it
1119         *      in the protocol layers.
1120         *
1121         *      Now it will return ENETDOWN, if device have just gone down,
1122         *      but then it will block.
1123         */
1124
1125        skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1126
1127        /*
1128         *      An error occurred so return it. Because skb_recv_datagram()
1129         *      handles the blocking we don't see and worry about blocking
1130         *      retries.
1131         */
1132
1133        if (skb == NULL)
1134                goto out;
1135
1136        /*
1137         *      If the address length field is there to be filled in, we fill
1138         *      it in now.
1139         */
1140
1141        sll = &PACKET_SKB_CB(skb)->sa.ll;
1142        if (sock->type == SOCK_PACKET)
1143                msg->msg_namelen = sizeof(struct sockaddr_pkt);
1144        else
1145                msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1146
1147        /*
1148         *      You lose any data beyond the buffer you gave. If it worries a
1149         *      user program they can ask the device for its MTU anyway.
1150         */
1151
1152        copied = skb->len;
1153        if (copied > len)
1154        {
1155                copied=len;
1156                msg->msg_flags|=MSG_TRUNC;
1157        }
1158
1159        err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1160        if (err)
1161                goto out_free;
1162
1163        sock_recv_timestamp(msg, sk, skb);
1164
1165        if (msg->msg_name)
1166                memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1167                       msg->msg_namelen);
1168
1169        if (pkt_sk(sk)->auxdata) {
1170                struct tpacket_auxdata aux;
1171
1172                aux.tp_status = TP_STATUS_USER;
1173                if (skb->ip_summed == CHECKSUM_PARTIAL)
1174                        aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1175                aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1176                aux.tp_snaplen = skb->len;
1177                aux.tp_mac = 0;
1178                aux.tp_net = skb_network_offset(skb);
1179                aux.tp_vlan_tci = skb->vlan_tci;
1180
1181                put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1182        }
1183
1184        /*
1185         *      Free or return the buffer as appropriate. Again this
1186         *      hides all the races and re-entrancy issues from us.
1187         */
1188        err = (flags&MSG_TRUNC) ? skb->len : copied;
1189
1190out_free:
1191        skb_free_datagram(sk, skb);
1192out:
1193        return err;
1194}
1195
1196static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1197                               int *uaddr_len, int peer)
1198{
1199        struct net_device *dev;
1200        struct sock *sk = sock->sk;
1201
1202        if (peer)
1203                return -EOPNOTSUPP;
1204
1205        uaddr->sa_family = AF_PACKET;
1206        dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
1207        if (dev) {
1208                strlcpy(uaddr->sa_data, dev->name, 15);
1209                dev_put(dev);
1210        } else
1211                memset(uaddr->sa_data, 0, 14);
1212        *uaddr_len = sizeof(*uaddr);
1213
1214        return 0;
1215}
1216
1217static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1218                          int *uaddr_len, int peer)
1219{
1220        struct net_device *dev;
1221        struct sock *sk = sock->sk;
1222        struct packet_sock *po = pkt_sk(sk);
1223        struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1224
1225        if (peer)
1226                return -EOPNOTSUPP;
1227
1228        sll->sll_family = AF_PACKET;
1229        sll->sll_ifindex = po->ifindex;
1230        sll->sll_protocol = po->num;
1231        dev = dev_get_by_index(sock_net(sk), po->ifindex);
1232        if (dev) {
1233                sll->sll_hatype = dev->type;
1234                sll->sll_halen = dev->addr_len;
1235                memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1236                dev_put(dev);
1237        } else {
1238                sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1239                sll->sll_halen = 0;
1240        }
1241        *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1242
1243        return 0;
1244}
1245
1246static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1247                         int what)
1248{
1249        switch (i->type) {
1250        case PACKET_MR_MULTICAST:
1251                if (what > 0)
1252                        dev_mc_add(dev, i->addr, i->alen, 0);
1253                else
1254                        dev_mc_delete(dev, i->addr, i->alen, 0);
1255                break;
1256        case PACKET_MR_PROMISC:
1257                return dev_set_promiscuity(dev, what);
1258                break;
1259        case PACKET_MR_ALLMULTI:
1260                return dev_set_allmulti(dev, what);
1261                break;
1262        default:;
1263        }
1264        return 0;
1265}
1266
1267static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1268{
1269        for ( ; i; i=i->next) {
1270                if (i->ifindex == dev->ifindex)
1271                        packet_dev_mc(dev, i, what);
1272        }
1273}
1274
1275static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1276{
1277        struct packet_sock *po = pkt_sk(sk);
1278        struct packet_mclist *ml, *i;
1279        struct net_device *dev;
1280        int err;
1281
1282        rtnl_lock();
1283
1284        err = -ENODEV;
1285        dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1286        if (!dev)
1287                goto done;
1288
1289        err = -EINVAL;
1290        if (mreq->mr_alen > dev->addr_len)
1291                goto done;
1292
1293        err = -ENOBUFS;
1294        i = kmalloc(sizeof(*i), GFP_KERNEL);
1295        if (i == NULL)
1296                goto done;
1297
1298        err = 0;
1299        for (ml = po->mclist; ml; ml = ml->next) {
1300                if (ml->ifindex == mreq->mr_ifindex &&
1301                    ml->type == mreq->mr_type &&
1302                    ml->alen == mreq->mr_alen &&
1303                    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1304                        ml->count++;
1305                        /* Free the new element ... */
1306                        kfree(i);
1307                        goto done;
1308                }
1309        }
1310
1311        i->type = mreq->mr_type;
1312        i->ifindex = mreq->mr_ifindex;
1313        i->alen = mreq->mr_alen;
1314        memcpy(i->addr, mreq->mr_address, i->alen);
1315        i->count = 1;
1316        i->next = po->mclist;
1317        po->mclist = i;
1318        err = packet_dev_mc(dev, i, 1);
1319        if (err) {
1320                po->mclist = i->next;
1321                kfree(i);
1322        }
1323
1324done:
1325        rtnl_unlock();
1326        return err;
1327}
1328
1329static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1330{
1331        struct packet_mclist *ml, **mlp;
1332
1333        rtnl_lock();
1334
1335        for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1336                if (ml->ifindex == mreq->mr_ifindex &&
1337                    ml->type == mreq->mr_type &&
1338                    ml->alen == mreq->mr_alen &&
1339                    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1340                        if (--ml->count == 0) {
1341                                struct net_device *dev;
1342                                *mlp = ml->next;
1343                                dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1344                                if (dev) {
1345                                        packet_dev_mc(dev, ml, -1);
1346                                        dev_put(dev);
1347                                }
1348                                kfree(ml);
1349                        }
1350                        rtnl_unlock();
1351                        return 0;
1352                }
1353        }
1354        rtnl_unlock();
1355        return -EADDRNOTAVAIL;
1356}
1357
1358static void packet_flush_mclist(struct sock *sk)
1359{
1360        struct packet_sock *po = pkt_sk(sk);
1361        struct packet_mclist *ml;
1362
1363        if (!po->mclist)
1364                return;
1365
1366        rtnl_lock();
1367        while ((ml = po->mclist) != NULL) {
1368                struct net_device *dev;
1369
1370                po->mclist = ml->next;
1371                if ((dev = dev_get_by_index(sock_net(sk), ml->ifindex)) != NULL) {
1372                        packet_dev_mc(dev, ml, -1);
1373                        dev_put(dev);
1374                }
1375                kfree(ml);
1376        }
1377        rtnl_unlock();
1378}
1379
1380static int
1381packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1382{
1383        struct sock *sk = sock->sk;
1384        struct packet_sock *po = pkt_sk(sk);
1385        int ret;
1386
1387        if (level != SOL_PACKET)
1388                return -ENOPROTOOPT;
1389
1390        switch(optname) {
1391        case PACKET_ADD_MEMBERSHIP:
1392        case PACKET_DROP_MEMBERSHIP:
1393        {
1394                struct packet_mreq_max mreq;
1395                int len = optlen;
1396                memset(&mreq, 0, sizeof(mreq));
1397                if (len < sizeof(struct packet_mreq))
1398                        return -EINVAL;
1399                if (len > sizeof(mreq))
1400                        len = sizeof(mreq);
1401                if (copy_from_user(&mreq,optval,len))
1402                        return -EFAULT;
1403                if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1404                        return -EINVAL;
1405                if (optname == PACKET_ADD_MEMBERSHIP)
1406                        ret = packet_mc_add(sk, &mreq);
1407                else
1408                        ret = packet_mc_drop(sk, &mreq);
1409                return ret;
1410        }
1411
1412#ifdef CONFIG_PACKET_MMAP
1413        case PACKET_RX_RING:
1414        {
1415                struct tpacket_req req;
1416
1417                if (optlen<sizeof(req))
1418                        return -EINVAL;
1419                if (copy_from_user(&req,optval,sizeof(req)))
1420                        return -EFAULT;
1421                return packet_set_ring(sk, &req, 0);
1422        }
1423        case PACKET_COPY_THRESH:
1424        {
1425                int val;
1426
1427                if (optlen!=sizeof(val))
1428                        return -EINVAL;
1429                if (copy_from_user(&val,optval,sizeof(val)))
1430                        return -EFAULT;
1431
1432                pkt_sk(sk)->copy_thresh = val;
1433                return 0;
1434        }
1435        case PACKET_VERSION:
1436        {
1437                int val;
1438
1439                if (optlen != sizeof(val))
1440                        return -EINVAL;
1441                if (po->pg_vec)
1442                        return -EBUSY;
1443                if (copy_from_user(&val, optval, sizeof(val)))
1444                        return -EFAULT;
1445                switch (val) {
1446                case TPACKET_V1:
1447                case TPACKET_V2:
1448                        po->tp_version = val;
1449                        return 0;
1450                default:
1451                        return -EINVAL;
1452                }
1453        }
1454        case PACKET_RESERVE:
1455        {
1456                unsigned int val;
1457
1458                if (optlen != sizeof(val))
1459                        return -EINVAL;
1460                if (po->pg_vec)
1461                        return -EBUSY;
1462                if (copy_from_user(&val, optval, sizeof(val)))
1463                        return -EFAULT;
1464                po->tp_reserve = val;
1465                return 0;
1466        }
1467#endif
1468        case PACKET_AUXDATA:
1469        {
1470                int val;
1471
1472                if (optlen < sizeof(val))
1473                        return -EINVAL;
1474                if (copy_from_user(&val, optval, sizeof(val)))
1475                        return -EFAULT;
1476
1477                po->auxdata = !!val;
1478                return 0;
1479        }
1480        case PACKET_ORIGDEV:
1481        {
1482                int val;
1483
1484                if (optlen < sizeof(val))
1485                        return -EINVAL;
1486                if (copy_from_user(&val, optval, sizeof(val)))
1487                        return -EFAULT;
1488
1489                po->origdev = !!val;
1490                return 0;
1491        }
1492        default:
1493                return -ENOPROTOOPT;
1494        }
1495}
1496
1497static int packet_getsockopt(struct socket *sock, int level, int optname,
1498                             char __user *optval, int __user *optlen)
1499{
1500        int len;
1501        int val;
1502        struct sock *sk = sock->sk;
1503        struct packet_sock *po = pkt_sk(sk);
1504        void *data;
1505        struct tpacket_stats st;
1506
1507        if (level != SOL_PACKET)
1508                return -ENOPROTOOPT;
1509
1510        if (get_user(len, optlen))
1511                return -EFAULT;
1512
1513        if (len < 0)
1514                return -EINVAL;
1515
1516        switch(optname) {
1517        case PACKET_STATISTICS:
1518                if (len > sizeof(struct tpacket_stats))
1519                        len = sizeof(struct tpacket_stats);
1520                spin_lock_bh(&sk->sk_receive_queue.lock);
1521                st = po->stats;
1522                memset(&po->stats, 0, sizeof(st));
1523                spin_unlock_bh(&sk->sk_receive_queue.lock);
1524                st.tp_packets += st.tp_drops;
1525
1526                data = &st;
1527                break;
1528        case PACKET_AUXDATA:
1529                if (len > sizeof(int))
1530                        len = sizeof(int);
1531                val = po->auxdata;
1532
1533                data = &val;
1534                break;
1535        case PACKET_ORIGDEV:
1536                if (len > sizeof(int))
1537                        len = sizeof(int);
1538                val = po->origdev;
1539
1540                data = &val;
1541                break;
1542#ifdef CONFIG_PACKET_MMAP
1543        case PACKET_VERSION:
1544                if (len > sizeof(int))
1545                        len = sizeof(int);
1546                val = po->tp_version;
1547                data = &val;
1548                break;
1549        case PACKET_HDRLEN:
1550                if (len > sizeof(int))
1551                        len = sizeof(int);
1552                if (copy_from_user(&val, optval, len))
1553                        return -EFAULT;
1554                switch (val) {
1555                case TPACKET_V1:
1556                        val = sizeof(struct tpacket_hdr);
1557                        break;
1558                case TPACKET_V2:
1559                        val = sizeof(struct tpacket2_hdr);
1560                        break;
1561                default:
1562                        return -EINVAL;
1563                }
1564                data = &val;
1565                break;
1566        case PACKET_RESERVE:
1567                if (len > sizeof(unsigned int))
1568                        len = sizeof(unsigned int);
1569                val = po->tp_reserve;
1570                data = &val;
1571                break;
1572#endif
1573        default:
1574                return -ENOPROTOOPT;
1575        }
1576
1577        if (put_user(len, optlen))
1578                return -EFAULT;
1579        if (copy_to_user(optval, data, len))
1580                return -EFAULT;
1581        return 0;
1582}
1583
1584
1585static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1586{
1587        struct sock *sk;
1588        struct hlist_node *node;
1589        struct net_device *dev = data;
1590        struct net *net = dev_net(dev);
1591
1592        read_lock(&net->packet.sklist_lock);
1593        sk_for_each(sk, node, &net->packet.sklist) {
1594                struct packet_sock *po = pkt_sk(sk);
1595
1596                switch (msg) {
1597                case NETDEV_UNREGISTER:
1598                        if (po->mclist)
1599                                packet_dev_mclist(dev, po->mclist, -1);
1600                        /* fallthrough */
1601
1602                case NETDEV_DOWN:
1603                        if (dev->ifindex == po->ifindex) {
1604                                spin_lock(&po->bind_lock);
1605                                if (po->running) {
1606                                        __dev_remove_pack(&po->prot_hook);
1607                                        __sock_put(sk);
1608                                        po->running = 0;
1609                                        sk->sk_err = ENETDOWN;
1610                                        if (!sock_flag(sk, SOCK_DEAD))
1611                                                sk->sk_error_report(sk);
1612                                }
1613                                if (msg == NETDEV_UNREGISTER) {
1614                                        po->ifindex = -1;
1615                                        po->prot_hook.dev = NULL;
1616                                }
1617                                spin_unlock(&po->bind_lock);
1618                        }
1619                        break;
1620                case NETDEV_UP:
1621                        spin_lock(&po->bind_lock);
1622                        if (dev->ifindex == po->ifindex && po->num &&
1623                            !po->running) {
1624                                dev_add_pack(&po->prot_hook);
1625                                sock_hold(sk);
1626                                po->running = 1;
1627                        }
1628                        spin_unlock(&po->bind_lock);
1629                        break;
1630                }
1631        }
1632        read_unlock(&net->packet.sklist_lock);
1633        return NOTIFY_DONE;
1634}
1635
1636
1637static int packet_ioctl(struct socket *sock, unsigned int cmd,
1638                        unsigned long arg)
1639{
1640        struct sock *sk = sock->sk;
1641
1642        switch(cmd) {
1643                case SIOCOUTQ:
1644                {
1645                        int amount = atomic_read(&sk->sk_wmem_alloc);
1646                        return put_user(amount, (int __user *)arg);
1647                }
1648                case SIOCINQ:
1649                {
1650                        struct sk_buff *skb;
1651                        int amount = 0;
1652
1653                        spin_lock_bh(&sk->sk_receive_queue.lock);
1654                        skb = skb_peek(&sk->sk_receive_queue);
1655                        if (skb)
1656                                amount = skb->len;
1657                        spin_unlock_bh(&sk->sk_receive_queue.lock);
1658                        return put_user(amount, (int __user *)arg);
1659                }
1660                case SIOCGSTAMP:
1661                        return sock_get_timestamp(sk, (struct timeval __user *)arg);
1662                case SIOCGSTAMPNS:
1663                        return sock_get_timestampns(sk, (struct timespec __user *)arg);
1664
1665#ifdef CONFIG_INET
1666                case SIOCADDRT:
1667                case SIOCDELRT:
1668                case SIOCDARP:
1669                case SIOCGARP:
1670                case SIOCSARP:
1671                case SIOCGIFADDR:
1672                case SIOCSIFADDR:
1673                case SIOCGIFBRDADDR:
1674                case SIOCSIFBRDADDR:
1675                case SIOCGIFNETMASK:
1676                case SIOCSIFNETMASK:
1677                case SIOCGIFDSTADDR:
1678                case SIOCSIFDSTADDR:
1679                case SIOCSIFFLAGS:
1680                        if (!net_eq(sock_net(sk), &init_net))
1681                                return -ENOIOCTLCMD;
1682                        return inet_dgram_ops.ioctl(sock, cmd, arg);
1683#endif
1684
1685                default:
1686                        return -ENOIOCTLCMD;
1687        }
1688        return 0;
1689}
1690
1691#ifndef CONFIG_PACKET_MMAP
1692#define packet_mmap sock_no_mmap
1693#define packet_poll datagram_poll
1694#else
1695
1696static unsigned int packet_poll(struct file * file, struct socket *sock,
1697                                poll_table *wait)
1698{
1699        struct sock *sk = sock->sk;
1700        struct packet_sock *po = pkt_sk(sk);
1701        unsigned int mask = datagram_poll(file, sock, wait);
1702
1703        spin_lock_bh(&sk->sk_receive_queue.lock);
1704        if (po->pg_vec) {
1705                unsigned last = po->head ? po->head-1 : po->frame_max;
1706
1707                if (packet_lookup_frame(po, last, TP_STATUS_USER))
1708                        mask |= POLLIN | POLLRDNORM;
1709        }
1710        spin_unlock_bh(&sk->sk_receive_queue.lock);
1711        return mask;
1712}
1713
1714
1715/* Dirty? Well, I still did not learn better way to account
1716 * for user mmaps.
1717 */
1718
1719static void packet_mm_open(struct vm_area_struct *vma)
1720{
1721        struct file *file = vma->vm_file;
1722        struct socket * sock = file->private_data;
1723        struct sock *sk = sock->sk;
1724
1725        if (sk)
1726                atomic_inc(&pkt_sk(sk)->mapped);
1727}
1728
1729static void packet_mm_close(struct vm_area_struct *vma)
1730{
1731        struct file *file = vma->vm_file;
1732        struct socket * sock = file->private_data;
1733        struct sock *sk = sock->sk;
1734
1735        if (sk)
1736                atomic_dec(&pkt_sk(sk)->mapped);
1737}
1738
1739static struct vm_operations_struct packet_mmap_ops = {
1740        .open = packet_mm_open,
1741        .close =packet_mm_close,
1742};
1743
1744static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1745{
1746        int i;
1747
1748        for (i = 0; i < len; i++) {
1749                if (likely(pg_vec[i]))
1750                        free_pages((unsigned long) pg_vec[i], order);
1751        }
1752        kfree(pg_vec);
1753}
1754
1755static inline char *alloc_one_pg_vec_page(unsigned long order)
1756{
1757        return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1758                                         order);
1759}
1760
1761static char **alloc_pg_vec(struct tpacket_req *req, int order)
1762{
1763        unsigned int block_nr = req->tp_block_nr;
1764        char **pg_vec;
1765        int i;
1766
1767        pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1768        if (unlikely(!pg_vec))
1769                goto out;
1770
1771        for (i = 0; i < block_nr; i++) {
1772                pg_vec[i] = alloc_one_pg_vec_page(order);
1773                if (unlikely(!pg_vec[i]))
1774                        goto out_free_pgvec;
1775        }
1776
1777out:
1778        return pg_vec;
1779
1780out_free_pgvec:
1781        free_pg_vec(pg_vec, order, block_nr);
1782        pg_vec = NULL;
1783        goto out;
1784}
1785
1786static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1787{
1788        char **pg_vec = NULL;
1789        struct packet_sock *po = pkt_sk(sk);
1790        int was_running, order = 0;
1791        __be16 num;
1792        int err = 0;
1793
1794        if (req->tp_block_nr) {
1795                int i;
1796
1797                /* Sanity tests and some calculations */
1798
1799                if (unlikely(po->pg_vec))
1800                        return -EBUSY;
1801
1802                switch (po->tp_version) {
1803                case TPACKET_V1:
1804                        po->tp_hdrlen = TPACKET_HDRLEN;
1805                        break;
1806                case TPACKET_V2:
1807                        po->tp_hdrlen = TPACKET2_HDRLEN;
1808                        break;
1809                }
1810
1811                if (unlikely((int)req->tp_block_size <= 0))
1812                        return -EINVAL;
1813                if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1814                        return -EINVAL;
1815                if (unlikely(req->tp_frame_size < po->tp_hdrlen +
1816                                                  po->tp_reserve))
1817                        return -EINVAL;
1818                if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1819                        return -EINVAL;
1820
1821                po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1822                if (unlikely(po->frames_per_block <= 0))
1823                        return -EINVAL;
1824                if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1825                             req->tp_frame_nr))
1826                        return -EINVAL;
1827
1828                err = -ENOMEM;
1829                order = get_order(req->tp_block_size);
1830                pg_vec = alloc_pg_vec(req, order);
1831                if (unlikely(!pg_vec))
1832                        goto out;
1833
1834                for (i = 0; i < req->tp_block_nr; i++) {
1835                        void *ptr = pg_vec[i];
1836                        int k;
1837
1838                        for (k = 0; k < po->frames_per_block; k++) {
1839                                __packet_set_status(po, ptr, TP_STATUS_KERNEL);
1840                                ptr += req->tp_frame_size;
1841                        }
1842                }
1843                /* Done */
1844        } else {
1845                if (unlikely(req->tp_frame_nr))
1846                        return -EINVAL;
1847        }
1848
1849        lock_sock(sk);
1850
1851        /* Detach socket from network */
1852        spin_lock(&po->bind_lock);
1853        was_running = po->running;
1854        num = po->num;
1855        if (was_running) {
1856                __dev_remove_pack(&po->prot_hook);
1857                po->num = 0;
1858                po->running = 0;
1859                __sock_put(sk);
1860        }
1861        spin_unlock(&po->bind_lock);
1862
1863        synchronize_net();
1864
1865        err = -EBUSY;
1866        if (closing || atomic_read(&po->mapped) == 0) {
1867                err = 0;
1868#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1869
1870                spin_lock_bh(&sk->sk_receive_queue.lock);
1871                pg_vec = XC(po->pg_vec, pg_vec);
1872                po->frame_max = (req->tp_frame_nr - 1);
1873                po->head = 0;
1874                po->frame_size = req->tp_frame_size;
1875                spin_unlock_bh(&sk->sk_receive_queue.lock);
1876
1877                order = XC(po->pg_vec_order, order);
1878                req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1879
1880                po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1881                po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1882                skb_queue_purge(&sk->sk_receive_queue);
1883#undef XC
1884                if (atomic_read(&po->mapped))
1885                        printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1886        }
1887
1888        spin_lock(&po->bind_lock);
1889        if (was_running && !po->running) {
1890                sock_hold(sk);
1891                po->running = 1;
1892                po->num = num;
1893                dev_add_pack(&po->prot_hook);
1894        }
1895        spin_unlock(&po->bind_lock);
1896
1897        release_sock(sk);
1898
1899        if (pg_vec)
1900                free_pg_vec(pg_vec, order, req->tp_block_nr);
1901out:
1902        return err;
1903}
1904
1905static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1906{
1907        struct sock *sk = sock->sk;
1908        struct packet_sock *po = pkt_sk(sk);
1909        unsigned long size;
1910        unsigned long start;
1911        int err = -EINVAL;
1912        int i;
1913
1914        if (vma->vm_pgoff)
1915                return -EINVAL;
1916
1917        size = vma->vm_end - vma->vm_start;
1918
1919        lock_sock(sk);
1920        if (po->pg_vec == NULL)
1921                goto out;
1922        if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1923                goto out;
1924
1925        start = vma->vm_start;
1926        for (i = 0; i < po->pg_vec_len; i++) {
1927                struct page *page = virt_to_page(po->pg_vec[i]);
1928                int pg_num;
1929
1930                for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1931                        err = vm_insert_page(vma, start, page);
1932                        if (unlikely(err))
1933                                goto out;
1934                        start += PAGE_SIZE;
1935                }
1936        }
1937        atomic_inc(&po->mapped);
1938        vma->vm_ops = &packet_mmap_ops;
1939        err = 0;
1940
1941out:
1942        release_sock(sk);
1943        return err;
1944}
1945#endif
1946
1947
1948static const struct proto_ops packet_ops_spkt = {
1949        .family =       PF_PACKET,
1950        .owner =        THIS_MODULE,
1951        .release =      packet_release,
1952        .bind =         packet_bind_spkt,
1953        .connect =      sock_no_connect,
1954        .socketpair =   sock_no_socketpair,
1955        .accept =       sock_no_accept,
1956        .getname =      packet_getname_spkt,
1957        .poll =         datagram_poll,
1958        .ioctl =        packet_ioctl,
1959        .listen =       sock_no_listen,
1960        .shutdown =     sock_no_shutdown,
1961        .setsockopt =   sock_no_setsockopt,
1962        .getsockopt =   sock_no_getsockopt,
1963        .sendmsg =      packet_sendmsg_spkt,
1964        .recvmsg =      packet_recvmsg,
1965        .mmap =         sock_no_mmap,
1966        .sendpage =     sock_no_sendpage,
1967};
1968
1969static const struct proto_ops packet_ops = {
1970        .family =       PF_PACKET,
1971        .owner =        THIS_MODULE,
1972        .release =      packet_release,
1973        .bind =         packet_bind,
1974        .connect =      sock_no_connect,
1975        .socketpair =   sock_no_socketpair,
1976        .accept =       sock_no_accept,
1977        .getname =      packet_getname,
1978        .poll =         packet_poll,
1979        .ioctl =        packet_ioctl,
1980        .listen =       sock_no_listen,
1981        .shutdown =     sock_no_shutdown,
1982        .setsockopt =   packet_setsockopt,
1983        .getsockopt =   packet_getsockopt,
1984        .sendmsg =      packet_sendmsg,
1985        .recvmsg =      packet_recvmsg,
1986        .mmap =         packet_mmap,
1987        .sendpage =     sock_no_sendpage,
1988};
1989
1990static struct net_proto_family packet_family_ops = {
1991        .family =       PF_PACKET,
1992        .create =       packet_create,
1993        .owner  =       THIS_MODULE,
1994};
1995
1996static struct notifier_block packet_netdev_notifier = {
1997        .notifier_call =packet_notifier,
1998};
1999
2000#ifdef CONFIG_PROC_FS
2001static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
2002{
2003        struct sock *s;
2004        struct hlist_node *node;
2005
2006        sk_for_each(s, node, &net->packet.sklist) {
2007                if (!off--)
2008                        return s;
2009        }
2010        return NULL;
2011}
2012
2013static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2014        __acquires(seq_file_net(seq)->packet.sklist_lock)
2015{
2016        struct net *net = seq_file_net(seq);
2017        read_lock(&net->packet.sklist_lock);
2018        return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
2019}
2020
2021static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2022{
2023        struct net *net = seq_file_net(seq);
2024        ++*pos;
2025        return  (v == SEQ_START_TOKEN)
2026                ? sk_head(&net->packet.sklist)
2027                : sk_next((struct sock*)v) ;
2028}
2029
2030static void packet_seq_stop(struct seq_file *seq, void *v)
2031        __releases(seq_file_net(seq)->packet.sklist_lock)
2032{
2033        struct net *net = seq_file_net(seq);
2034        read_unlock(&net->packet.sklist_lock);
2035}
2036
2037static int packet_seq_show(struct seq_file *seq, void *v)
2038{
2039        if (v == SEQ_START_TOKEN)
2040                seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
2041        else {
2042                struct sock *s = v;
2043                const struct packet_sock *po = pkt_sk(s);
2044
2045                seq_printf(seq,
2046                           "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
2047                           s,
2048                           atomic_read(&s->sk_refcnt),
2049                           s->sk_type,
2050                           ntohs(po->num),
2051                           po->ifindex,
2052                           po->running,
2053                           atomic_read(&s->sk_rmem_alloc),
2054                           sock_i_uid(s),
2055                           sock_i_ino(s) );
2056        }
2057
2058        return 0;
2059}
2060
2061static const struct seq_operations packet_seq_ops = {
2062        .start  = packet_seq_start,
2063        .next   = packet_seq_next,
2064        .stop   = packet_seq_stop,
2065        .show   = packet_seq_show,
2066};
2067
2068static int packet_seq_open(struct inode *inode, struct file *file)
2069{
2070        return seq_open_net(inode, file, &packet_seq_ops,
2071                            sizeof(struct seq_net_private));
2072}
2073
2074static const struct file_operations packet_seq_fops = {
2075        .owner          = THIS_MODULE,
2076        .open           = packet_seq_open,
2077        .read           = seq_read,
2078        .llseek         = seq_lseek,
2079        .release        = seq_release_net,
2080};
2081
2082#endif
2083
2084static int packet_net_init(struct net *net)
2085{
2086        rwlock_init(&net->packet.sklist_lock);
2087        INIT_HLIST_HEAD(&net->packet.sklist);
2088
2089        if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2090                return -ENOMEM;
2091
2092        return 0;
2093}
2094
2095static void packet_net_exit(struct net *net)
2096{
2097        proc_net_remove(net, "packet");
2098}
2099
2100static struct pernet_operations packet_net_ops = {
2101        .init = packet_net_init,
2102        .exit = packet_net_exit,
2103};
2104
2105
2106static void __exit packet_exit(void)
2107{
2108        unregister_netdevice_notifier(&packet_netdev_notifier);
2109        unregister_pernet_subsys(&packet_net_ops);
2110        sock_unregister(PF_PACKET);
2111        proto_unregister(&packet_proto);
2112}
2113
2114static int __init packet_init(void)
2115{
2116        int rc = proto_register(&packet_proto, 0);
2117
2118        if (rc != 0)
2119                goto out;
2120
2121        sock_register(&packet_family_ops);
2122        register_pernet_subsys(&packet_net_ops);
2123        register_netdevice_notifier(&packet_netdev_notifier);
2124out:
2125        return rc;
2126}
2127
2128module_init(packet_init);
2129module_exit(packet_exit);
2130MODULE_LICENSE("GPL");
2131MODULE_ALIAS_NETPROTO(PF_PACKET);
2132
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.