1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/types.h>
56#include <linux/mm.h>
57#include <linux/capability.h>
58#include <linux/fcntl.h>
59#include <linux/socket.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_packet.h>
64#include <linux/wireless.h>
65#include <linux/kernel.h>
66#include <linux/kmod.h>
67#include <linux/slab.h>
68#include <linux/vmalloc.h>
69#include <net/net_namespace.h>
70#include <net/ip.h>
71#include <net/protocol.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <linux/errno.h>
75#include <linux/timer.h>
76#include <asm/system.h>
77#include <asm/uaccess.h>
78#include <asm/ioctls.h>
79#include <asm/page.h>
80#include <asm/cacheflush.h>
81#include <asm/io.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84#include <linux/poll.h>
85#include <linux/module.h>
86#include <linux/init.h>
87#include <linux/mutex.h>
88#include <linux/if_vlan.h>
89#include <linux/virtio_net.h>
90#include <linux/errqueue.h>
91#include <linux/net_tstamp.h>
92
93#ifdef CONFIG_INET
94#include <net/inet_common.h>
95#endif
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150struct packet_mclist {
151 struct packet_mclist *next;
152 int ifindex;
153 int count;
154 unsigned short type;
155 unsigned short alen;
156 unsigned char addr[MAX_ADDR_LEN];
157};
158
159
160
161struct packet_mreq_max {
162 int mr_ifindex;
163 unsigned short mr_type;
164 unsigned short mr_alen;
165 unsigned char mr_address[MAX_ADDR_LEN];
166};
167
168static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
169 int closing, int tx_ring);
170
171
172#define V3_ALIGNMENT (8)
173
174#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
175
176#define BLK_PLUS_PRIV(sz_of_priv) \
177 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
178
179
180struct tpacket_kbdq_core {
181 struct pgv *pkbdq;
182 unsigned int feature_req_word;
183 unsigned int hdrlen;
184 unsigned char reset_pending_on_curr_blk;
185 unsigned char delete_blk_timer;
186 unsigned short kactive_blk_num;
187 unsigned short blk_sizeof_priv;
188
189
190
191
192
193 unsigned short last_kactive_blk_num;
194
195 char *pkblk_start;
196 char *pkblk_end;
197 int kblk_size;
198 unsigned int knum_blocks;
199 uint64_t knxt_seq_num;
200 char *prev;
201 char *nxt_offset;
202 struct sk_buff *skb;
203
204 atomic_t blk_fill_in_prog;
205
206
207#define DEFAULT_PRB_RETIRE_TOV (8)
208
209 unsigned short retire_blk_tov;
210 unsigned short version;
211 unsigned long tov_in_jiffies;
212
213
214 struct timer_list retire_blk_timer;
215};
216
217#define PGV_FROM_VMALLOC 1
218struct pgv {
219 char *buffer;
220};
221
222struct packet_ring_buffer {
223 struct pgv *pg_vec;
224 unsigned int head;
225 unsigned int frames_per_block;
226 unsigned int frame_size;
227 unsigned int frame_max;
228
229 unsigned int pg_vec_order;
230 unsigned int pg_vec_pages;
231 unsigned int pg_vec_len;
232
233 struct tpacket_kbdq_core prb_bdqc;
234 atomic_t pending;
235};
236
237#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
238#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
239#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
240#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
241#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
242#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
243#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
244
245struct packet_sock;
246static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
247
248static void *packet_previous_frame(struct packet_sock *po,
249 struct packet_ring_buffer *rb,
250 int status);
251static void packet_increment_head(struct packet_ring_buffer *buff);
252static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
253 struct tpacket_block_desc *);
254static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
255 struct packet_sock *);
256static void prb_retire_current_block(struct tpacket_kbdq_core *,
257 struct packet_sock *, unsigned int status);
258static int prb_queue_frozen(struct tpacket_kbdq_core *);
259static void prb_open_block(struct tpacket_kbdq_core *,
260 struct tpacket_block_desc *);
261static void prb_retire_rx_blk_timer_expired(unsigned long);
262static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
263static void prb_init_blk_timer(struct packet_sock *,
264 struct tpacket_kbdq_core *,
265 void (*func) (unsigned long));
266static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
267static void prb_clear_rxhash(struct tpacket_kbdq_core *,
268 struct tpacket3_hdr *);
269static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
270 struct tpacket3_hdr *);
271static void packet_flush_mclist(struct sock *sk);
272
273struct packet_fanout;
274struct packet_sock {
275
276 struct sock sk;
277 struct packet_fanout *fanout;
278 struct tpacket_stats stats;
279 union tpacket_stats_u stats_u;
280 struct packet_ring_buffer rx_ring;
281 struct packet_ring_buffer tx_ring;
282 int copy_thresh;
283 spinlock_t bind_lock;
284 struct mutex pg_vec_lock;
285 unsigned int running:1,
286 auxdata:1,
287 origdev:1,
288 has_vnet_hdr:1;
289 int ifindex;
290 __be16 num;
291 struct packet_mclist *mclist;
292 atomic_t mapped;
293 enum tpacket_versions tp_version;
294 unsigned int tp_hdrlen;
295 unsigned int tp_reserve;
296 unsigned int tp_loss:1;
297 unsigned int tp_tstamp;
298 struct packet_type prot_hook ____cacheline_aligned_in_smp;
299};
300
301#define PACKET_FANOUT_MAX 256
302
303struct packet_fanout {
304#ifdef CONFIG_NET_NS
305 struct net *net;
306#endif
307 unsigned int num_members;
308 u16 id;
309 u8 type;
310 u8 defrag;
311 atomic_t rr_cur;
312 struct list_head list;
313 struct sock *arr[PACKET_FANOUT_MAX];
314 spinlock_t lock;
315 atomic_t sk_ref;
316 struct packet_type prot_hook ____cacheline_aligned_in_smp;
317};
318
319struct packet_skb_cb {
320 unsigned int origlen;
321 union {
322 struct sockaddr_pkt pkt;
323 struct sockaddr_ll ll;
324 } sa;
325};
326
327#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
328
329#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
330#define GET_PBLOCK_DESC(x, bid) \
331 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
332#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
333 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
334#define GET_NEXT_PRB_BLK_NUM(x) \
335 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
336 ((x)->kactive_blk_num+1) : 0)
337
338static struct packet_sock *pkt_sk(struct sock *sk)
339{
340 return (struct packet_sock *)sk;
341}
342
343static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
344static void __fanout_link(struct sock *sk, struct packet_sock *po);
345
346
347
348
349
350static void register_prot_hook(struct sock *sk)
351{
352 struct packet_sock *po = pkt_sk(sk);
353 if (!po->running) {
354 if (po->fanout)
355 __fanout_link(sk, po);
356 else
357 dev_add_pack(&po->prot_hook);
358 sock_hold(sk);
359 po->running = 1;
360 }
361}
362
363
364
365
366
367
368
369
370static void __unregister_prot_hook(struct sock *sk, bool sync)
371{
372 struct packet_sock *po = pkt_sk(sk);
373
374 po->running = 0;
375 if (po->fanout)
376 __fanout_unlink(sk, po);
377 else
378 __dev_remove_pack(&po->prot_hook);
379 __sock_put(sk);
380
381 if (sync) {
382 spin_unlock(&po->bind_lock);
383 synchronize_net();
384 spin_lock(&po->bind_lock);
385 }
386}
387
388static void unregister_prot_hook(struct sock *sk, bool sync)
389{
390 struct packet_sock *po = pkt_sk(sk);
391
392 if (po->running)
393 __unregister_prot_hook(sk, sync);
394}
395
396static inline __pure struct page *pgv_to_page(void *addr)
397{
398 if (is_vmalloc_addr(addr))
399 return vmalloc_to_page(addr);
400 return virt_to_page(addr);
401}
402
403static void __packet_set_status(struct packet_sock *po, void *frame, int status)
404{
405 union {
406 struct tpacket_hdr *h1;
407 struct tpacket2_hdr *h2;
408 void *raw;
409 } h;
410
411 h.raw = frame;
412 switch (po->tp_version) {
413 case TPACKET_V1:
414 h.h1->tp_status = status;
415 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
416 break;
417 case TPACKET_V2:
418 h.h2->tp_status = status;
419 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
420 break;
421 case TPACKET_V3:
422 default:
423 WARN(1, "TPACKET version not supported.\n");
424 BUG();
425 }
426
427 smp_wmb();
428}
429
430static int __packet_get_status(struct packet_sock *po, void *frame)
431{
432 union {
433 struct tpacket_hdr *h1;
434 struct tpacket2_hdr *h2;
435 void *raw;
436 } h;
437
438 smp_rmb();
439
440 h.raw = frame;
441 switch (po->tp_version) {
442 case TPACKET_V1:
443 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
444 return h.h1->tp_status;
445 case TPACKET_V2:
446 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
447 return h.h2->tp_status;
448 case TPACKET_V3:
449 default:
450 WARN(1, "TPACKET version not supported.\n");
451 BUG();
452 return 0;
453 }
454}
455
456static void *packet_lookup_frame(struct packet_sock *po,
457 struct packet_ring_buffer *rb,
458 unsigned int position,
459 int status)
460{
461 unsigned int pg_vec_pos, frame_offset;
462 union {
463 struct tpacket_hdr *h1;
464 struct tpacket2_hdr *h2;
465 void *raw;
466 } h;
467
468 pg_vec_pos = position / rb->frames_per_block;
469 frame_offset = position % rb->frames_per_block;
470
471 h.raw = rb->pg_vec[pg_vec_pos].buffer +
472 (frame_offset * rb->frame_size);
473
474 if (status != __packet_get_status(po, h.raw))
475 return NULL;
476
477 return h.raw;
478}
479
480static void *packet_current_frame(struct packet_sock *po,
481 struct packet_ring_buffer *rb,
482 int status)
483{
484 return packet_lookup_frame(po, rb, rb->head, status);
485}
486
487static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
488{
489 del_timer_sync(&pkc->retire_blk_timer);
490}
491
492static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
493 int tx_ring,
494 struct sk_buff_head *rb_queue)
495{
496 struct tpacket_kbdq_core *pkc;
497
498 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
499
500 spin_lock(&rb_queue->lock);
501 pkc->delete_blk_timer = 1;
502 spin_unlock(&rb_queue->lock);
503
504 prb_del_retire_blk_timer(pkc);
505}
506
507static void prb_init_blk_timer(struct packet_sock *po,
508 struct tpacket_kbdq_core *pkc,
509 void (*func) (unsigned long))
510{
511 init_timer(&pkc->retire_blk_timer);
512 pkc->retire_blk_timer.data = (long)po;
513 pkc->retire_blk_timer.function = func;
514 pkc->retire_blk_timer.expires = jiffies;
515}
516
517static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
518{
519 struct tpacket_kbdq_core *pkc;
520
521 if (tx_ring)
522 BUG();
523
524 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
525 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
526}
527
528static int prb_calc_retire_blk_tmo(struct packet_sock *po,
529 int blk_size_in_bytes)
530{
531 struct net_device *dev;
532 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
533 struct ethtool_cmd ecmd;
534 int err;
535
536 rtnl_lock();
537 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
538 if (unlikely(!dev)) {
539 rtnl_unlock();
540 return DEFAULT_PRB_RETIRE_TOV;
541 }
542 err = __ethtool_get_settings(dev, &ecmd);
543 rtnl_unlock();
544 if (!err) {
545 switch (ecmd.speed) {
546 case SPEED_10000:
547 msec = 1;
548 div = 10000/1000;
549 break;
550 case SPEED_1000:
551 msec = 1;
552 div = 1000/1000;
553 break;
554
555
556
557
558 case SPEED_100:
559 case SPEED_10:
560 default:
561 return DEFAULT_PRB_RETIRE_TOV;
562 }
563 }
564
565 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
566
567 if (div)
568 mbits /= div;
569
570 tmo = mbits * msec;
571
572 if (div)
573 return tmo+1;
574 return tmo;
575}
576
577static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
578 union tpacket_req_u *req_u)
579{
580 p1->feature_req_word = req_u->req3.tp_feature_req_word;
581}
582
583static void init_prb_bdqc(struct packet_sock *po,
584 struct packet_ring_buffer *rb,
585 struct pgv *pg_vec,
586 union tpacket_req_u *req_u, int tx_ring)
587{
588 struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
589 struct tpacket_block_desc *pbd;
590
591 memset(p1, 0x0, sizeof(*p1));
592
593 p1->knxt_seq_num = 1;
594 p1->pkbdq = pg_vec;
595 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
596 p1->pkblk_start = (char *)pg_vec[0].buffer;
597 p1->kblk_size = req_u->req3.tp_block_size;
598 p1->knum_blocks = req_u->req3.tp_block_nr;
599 p1->hdrlen = po->tp_hdrlen;
600 p1->version = po->tp_version;
601 p1->last_kactive_blk_num = 0;
602 po->stats_u.stats3.tp_freeze_q_cnt = 0;
603 if (req_u->req3.tp_retire_blk_tov)
604 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
605 else
606 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
607 req_u->req3.tp_block_size);
608 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
609 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
610
611 prb_init_ft_ops(p1, req_u);
612 prb_setup_retire_blk_timer(po, tx_ring);
613 prb_open_block(p1, pbd);
614}
615
616
617
618
619static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
620{
621 mod_timer(&pkc->retire_blk_timer,
622 jiffies + pkc->tov_in_jiffies);
623 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649static void prb_retire_rx_blk_timer_expired(unsigned long data)
650{
651 struct packet_sock *po = (struct packet_sock *)data;
652 struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
653 unsigned int frozen;
654 struct tpacket_block_desc *pbd;
655
656 spin_lock(&po->sk.sk_receive_queue.lock);
657
658 frozen = prb_queue_frozen(pkc);
659 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
660
661 if (unlikely(pkc->delete_blk_timer))
662 goto out;
663
664
665
666
667
668
669
670
671
672
673 if (BLOCK_NUM_PKTS(pbd)) {
674 while (atomic_read(&pkc->blk_fill_in_prog)) {
675
676 cpu_relax();
677 }
678 }
679
680 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
681 if (!frozen) {
682 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
683 if (!prb_dispatch_next_block(pkc, po))
684 goto refresh_timer;
685 else
686 goto out;
687 } else {
688
689
690
691 if (prb_curr_blk_in_use(pkc, pbd)) {
692
693
694
695
696 goto refresh_timer;
697 } else {
698
699
700
701
702
703
704
705 prb_open_block(pkc, pbd);
706 goto out;
707 }
708 }
709 }
710
711refresh_timer:
712 _prb_refresh_rx_retire_blk_timer(pkc);
713
714out:
715 spin_unlock(&po->sk.sk_receive_queue.lock);
716}
717
718static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
719 struct tpacket_block_desc *pbd1, __u32 status)
720{
721
722
723#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
724 u8 *start, *end;
725
726 start = (u8 *)pbd1;
727
728
729 start += PAGE_SIZE;
730
731 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
732 for (; start < end; start += PAGE_SIZE)
733 flush_dcache_page(pgv_to_page(start));
734
735 smp_wmb();
736#endif
737
738
739
740 BLOCK_STATUS(pbd1) = status;
741
742
743
744#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
745 start = (u8 *)pbd1;
746 flush_dcache_page(pgv_to_page(start));
747
748 smp_wmb();
749#endif
750}
751
752
753
754
755
756
757
758
759
760
761static void prb_close_block(struct tpacket_kbdq_core *pkc1,
762 struct tpacket_block_desc *pbd1,
763 struct packet_sock *po, unsigned int stat)
764{
765 __u32 status = TP_STATUS_USER | stat;
766
767 struct tpacket3_hdr *last_pkt;
768 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
769
770 if (po->stats.tp_drops)
771 status |= TP_STATUS_LOSING;
772
773 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
774 last_pkt->tp_next_offset = 0;
775
776
777 if (BLOCK_NUM_PKTS(pbd1)) {
778 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
779 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
780 } else {
781
782 struct timespec ts;
783 getnstimeofday(&ts);
784 h1->ts_last_pkt.ts_sec = ts.tv_sec;
785 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
786 }
787
788 smp_wmb();
789
790
791 prb_flush_block(pkc1, pbd1, status);
792
793 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
794}
795
796static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
797{
798 pkc->reset_pending_on_curr_blk = 0;
799}
800
801
802
803
804
805
806
807
808static void prb_open_block(struct tpacket_kbdq_core *pkc1,
809 struct tpacket_block_desc *pbd1)
810{
811 struct timespec ts;
812 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
813
814 smp_rmb();
815
816 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
817
818
819
820
821 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
822 BLOCK_NUM_PKTS(pbd1) = 0;
823 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
824 getnstimeofday(&ts);
825 h1->ts_first_pkt.ts_sec = ts.tv_sec;
826 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
827 pkc1->pkblk_start = (char *)pbd1;
828 pkc1->nxt_offset = (char *)(pkc1->pkblk_start +
829 BLK_PLUS_PRIV(pkc1->blk_sizeof_priv));
830 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
831 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
832 pbd1->version = pkc1->version;
833 pkc1->prev = pkc1->nxt_offset;
834 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
835 prb_thaw_queue(pkc1);
836 _prb_refresh_rx_retire_blk_timer(pkc1);
837
838 smp_wmb();
839
840 return;
841 }
842
843 WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
844 pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
845 dump_stack();
846 BUG();
847}
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
873 struct packet_sock *po)
874{
875 pkc->reset_pending_on_curr_blk = 1;
876 po->stats_u.stats3.tp_freeze_q_cnt++;
877}
878
879#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
880
881
882
883
884
885
886
887static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
888 struct packet_sock *po)
889{
890 struct tpacket_block_desc *pbd;
891
892 smp_rmb();
893
894
895 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
896
897
898 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
899 prb_freeze_queue(pkc, po);
900 return NULL;
901 }
902
903
904
905
906
907
908 prb_open_block(pkc, pbd);
909 return (void *)pkc->nxt_offset;
910}
911
912static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
913 struct packet_sock *po, unsigned int status)
914{
915 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
916
917
918 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
919
920
921
922
923
924
925
926
927
928 if (!(status & TP_STATUS_BLK_TMO)) {
929 while (atomic_read(&pkc->blk_fill_in_prog)) {
930
931 cpu_relax();
932 }
933 }
934 prb_close_block(pkc, pbd, po, status);
935 return;
936 }
937
938 WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
939 dump_stack();
940 BUG();
941}
942
943static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
944 struct tpacket_block_desc *pbd)
945{
946 return TP_STATUS_USER & BLOCK_STATUS(pbd);
947}
948
949static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
950{
951 return pkc->reset_pending_on_curr_blk;
952}
953
954static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
955{
956 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
957 atomic_dec(&pkc->blk_fill_in_prog);
958}
959
960static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
961 struct tpacket3_hdr *ppd)
962{
963 ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
964}
965
966static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
967 struct tpacket3_hdr *ppd)
968{
969 ppd->hv1.tp_rxhash = 0;
970}
971
972static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
973 struct tpacket3_hdr *ppd)
974{
975 if (vlan_tx_tag_present(pkc->skb)) {
976 ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
977 ppd->tp_status = TP_STATUS_VLAN_VALID;
978 } else {
979 ppd->hv1.tp_vlan_tci = ppd->tp_status = 0;
980 }
981}
982
983static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
984 struct tpacket3_hdr *ppd)
985{
986 prb_fill_vlan_info(pkc, ppd);
987
988 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
989 prb_fill_rxhash(pkc, ppd);
990 else
991 prb_clear_rxhash(pkc, ppd);
992}
993
994static void prb_fill_curr_block(char *curr,
995 struct tpacket_kbdq_core *pkc,
996 struct tpacket_block_desc *pbd,
997 unsigned int len)
998{
999 struct tpacket3_hdr *ppd;
1000
1001 ppd = (struct tpacket3_hdr *)curr;
1002 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
1003 pkc->prev = curr;
1004 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
1005 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
1006 BLOCK_NUM_PKTS(pbd) += 1;
1007 atomic_inc(&pkc->blk_fill_in_prog);
1008 prb_run_all_ft_ops(pkc, ppd);
1009}
1010
1011
1012static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1013 struct sk_buff *skb,
1014 int status,
1015 unsigned int len
1016 )
1017{
1018 struct tpacket_kbdq_core *pkc;
1019 struct tpacket_block_desc *pbd;
1020 char *curr, *end;
1021
1022 pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring));
1023 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1024
1025
1026 if (prb_queue_frozen(pkc)) {
1027
1028
1029
1030
1031 if (prb_curr_blk_in_use(pkc, pbd)) {
1032
1033 return NULL;
1034 } else {
1035
1036
1037
1038
1039
1040
1041 prb_open_block(pkc, pbd);
1042 }
1043 }
1044
1045 smp_mb();
1046 curr = pkc->nxt_offset;
1047 pkc->skb = skb;
1048 end = (char *) ((char *)pbd + pkc->kblk_size);
1049
1050
1051 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1052 prb_fill_curr_block(curr, pkc, pbd, len);
1053 return (void *)curr;
1054 }
1055
1056
1057 prb_retire_current_block(pkc, po, 0);
1058
1059
1060 curr = (char *)prb_dispatch_next_block(pkc, po);
1061 if (curr) {
1062 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1063 prb_fill_curr_block(curr, pkc, pbd, len);
1064 return (void *)curr;
1065 }
1066
1067
1068
1069
1070
1071 return NULL;
1072}
1073
1074static void *packet_current_rx_frame(struct packet_sock *po,
1075 struct sk_buff *skb,
1076 int status, unsigned int len)
1077{
1078 char *curr = NULL;
1079 switch (po->tp_version) {
1080 case TPACKET_V1:
1081 case TPACKET_V2:
1082 curr = packet_lookup_frame(po, &po->rx_ring,
1083 po->rx_ring.head, status);
1084 return curr;
1085 case TPACKET_V3:
1086 return __packet_lookup_frame_in_block(po, skb, status, len);
1087 default:
1088 WARN(1, "TPACKET version not supported\n");
1089 BUG();
1090 return 0;
1091 }
1092}
1093
1094static void *prb_lookup_block(struct packet_sock *po,
1095 struct packet_ring_buffer *rb,
1096 unsigned int previous,
1097 int status)
1098{
1099 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1100 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
1101
1102 if (status != BLOCK_STATUS(pbd))
1103 return NULL;
1104 return pbd;
1105}
1106
1107static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1108{
1109 unsigned int prev;
1110 if (rb->prb_bdqc.kactive_blk_num)
1111 prev = rb->prb_bdqc.kactive_blk_num-1;
1112 else
1113 prev = rb->prb_bdqc.knum_blocks-1;
1114 return prev;
1115}
1116
1117
1118static void *__prb_previous_block(struct packet_sock *po,
1119 struct packet_ring_buffer *rb,
1120 int status)
1121{
1122 unsigned int previous = prb_previous_blk_num(rb);
1123 return prb_lookup_block(po, rb, previous, status);
1124}
1125
1126static void *packet_previous_rx_frame(struct packet_sock *po,
1127 struct packet_ring_buffer *rb,
1128 int status)
1129{
1130 if (po->tp_version <= TPACKET_V2)
1131 return packet_previous_frame(po, rb, status);
1132
1133 return __prb_previous_block(po, rb, status);
1134}
1135
1136static void packet_increment_rx_head(struct packet_sock *po,
1137 struct packet_ring_buffer *rb)
1138{
1139 switch (po->tp_version) {
1140 case TPACKET_V1:
1141 case TPACKET_V2:
1142 return packet_increment_head(rb);
1143 case TPACKET_V3:
1144 default:
1145 WARN(1, "TPACKET version not supported.\n");
1146 BUG();
1147 return;
1148 }
1149}
1150
1151static void *packet_previous_frame(struct packet_sock *po,
1152 struct packet_ring_buffer *rb,
1153 int status)
1154{
1155 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1156 return packet_lookup_frame(po, rb, previous, status);
1157}
1158
1159static void packet_increment_head(struct packet_ring_buffer *buff)
1160{
1161 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1162}
1163
1164static void packet_sock_destruct(struct sock *sk)
1165{
1166 skb_queue_purge(&sk->sk_error_queue);
1167
1168 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1169 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1170
1171 if (!sock_flag(sk, SOCK_DEAD)) {
1172 pr_err("Attempt to release alive packet socket: %p\n", sk);
1173 return;
1174 }
1175
1176 sk_refcnt_debug_dec(sk);
1177}
1178
1179static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
1180{
1181 int x = atomic_read(&f->rr_cur) + 1;
1182
1183 if (x >= num)
1184 x = 0;
1185
1186 return x;
1187}
1188
1189static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1190{
1191 u32 idx, hash = skb->rxhash;
1192
1193 idx = ((u64)hash * num) >> 32;
1194
1195 return f->arr[idx];
1196}
1197
1198static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1199{
1200 int cur, old;
1201
1202 cur = atomic_read(&f->rr_cur);
1203 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1204 fanout_rr_next(f, num))) != cur)
1205 cur = old;
1206 return f->arr[cur];
1207}
1208
1209static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1210{
1211 unsigned int cpu = smp_processor_id();
1212
1213 return f->arr[cpu % num];
1214}
1215
1216static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1217 struct packet_type *pt, struct net_device *orig_dev)
1218{
1219 struct packet_fanout *f = pt->af_packet_priv;
1220 unsigned int num = f->num_members;
1221 struct packet_sock *po;
1222 struct sock *sk;
1223
1224 if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
1225 !num) {
1226 kfree_skb(skb);
1227 return 0;
1228 }
1229
1230 switch (f->type) {
1231 case PACKET_FANOUT_HASH:
1232 default:
1233 if (f->defrag) {
1234 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
1235 if (!skb)
1236 return 0;
1237 }
1238 skb_get_rxhash(skb);
1239 sk = fanout_demux_hash(f, skb, num);
1240 break;
1241 case PACKET_FANOUT_LB:
1242 sk = fanout_demux_lb(f, skb, num);
1243 break;
1244 case PACKET_FANOUT_CPU:
1245 sk = fanout_demux_cpu(f, skb, num);
1246 break;
1247 }
1248
1249 po = pkt_sk(sk);
1250
1251 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1252}
1253
1254static DEFINE_MUTEX(fanout_mutex);
1255static LIST_HEAD(fanout_list);
1256
1257static void __fanout_link(struct sock *sk, struct packet_sock *po)
1258{
1259 struct packet_fanout *f = po->fanout;
1260
1261 spin_lock(&f->lock);
1262 f->arr[f->num_members] = sk;
1263 smp_wmb();
1264 f->num_members++;
1265 spin_unlock(&f->lock);
1266}
1267
1268static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1269{
1270 struct packet_fanout *f = po->fanout;
1271 int i;
1272
1273 spin_lock(&f->lock);
1274 for (i = 0; i < f->num_members; i++) {
1275 if (f->arr[i] == sk)
1276 break;
1277 }
1278 BUG_ON(i >= f->num_members);
1279 f->arr[i] = f->arr[f->num_members - 1];
1280 f->num_members--;
1281 spin_unlock(&f->lock);
1282}
1283
1284static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1285{
1286 struct packet_sock *po = pkt_sk(sk);
1287 struct packet_fanout *f, *match;
1288 u8 type = type_flags & 0xff;
1289 u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
1290 int err;
1291
1292 switch (type) {
1293 case PACKET_FANOUT_HASH:
1294 case PACKET_FANOUT_LB:
1295 case PACKET_FANOUT_CPU:
1296 break;
1297 default:
1298 return -EINVAL;
1299 }
1300
1301 if (!po->running)
1302 return -EINVAL;
1303
1304 if (po->fanout)
1305 return -EALREADY;
1306
1307 mutex_lock(&fanout_mutex);
1308 match = NULL;
1309 list_for_each_entry(f, &fanout_list, list) {
1310 if (f->id == id &&
1311 read_pnet(&f->net) == sock_net(sk)) {
1312 match = f;
1313 break;
1314 }
1315 }
1316 err = -EINVAL;
1317 if (match && match->defrag != defrag)
1318 goto out;
1319 if (!match) {
1320 err = -ENOMEM;
1321 match = kzalloc(sizeof(*match), GFP_KERNEL);
1322 if (!match)
1323 goto out;
1324 write_pnet(&match->net, sock_net(sk));
1325 match->id = id;
1326 match->type = type;
1327 match->defrag = defrag;
1328 atomic_set(&match->rr_cur, 0);
1329 INIT_LIST_HEAD(&match->list);
1330 spin_lock_init(&match->lock);
1331 atomic_set(&match->sk_ref, 0);
1332 match->prot_hook.type = po->prot_hook.type;
1333 match->prot_hook.dev = po->prot_hook.dev;
1334 match->prot_hook.func = packet_rcv_fanout;
1335 match->prot_hook.af_packet_priv = match;
1336 dev_add_pack(&match->prot_hook);
1337 list_add(&match->list, &fanout_list);
1338 }
1339 err = -EINVAL;
1340 if (match->type == type &&
1341 match->prot_hook.type == po->prot_hook.type &&
1342 match->prot_hook.dev == po->prot_hook.dev) {
1343 err = -ENOSPC;
1344 if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1345 __dev_remove_pack(&po->prot_hook);
1346 po->fanout = match;
1347 atomic_inc(&match->sk_ref);
1348 __fanout_link(sk, po);
1349 err = 0;
1350 }
1351 }
1352out:
1353 mutex_unlock(&fanout_mutex);
1354 return err;
1355}
1356
1357static void fanout_release(struct sock *sk)
1358{
1359 struct packet_sock *po = pkt_sk(sk);
1360 struct packet_fanout *f;
1361
1362 f = po->fanout;
1363 if (!f)
1364 return;
1365
1366 po->fanout = NULL;
1367
1368 mutex_lock(&fanout_mutex);
1369 if (atomic_dec_and_test(&f->sk_ref)) {
1370 list_del(&f->list);
1371 dev_remove_pack(&f->prot_hook);
1372 kfree(f);
1373 }
1374 mutex_unlock(&fanout_mutex);
1375}
1376
1377static const struct proto_ops packet_ops;
1378
1379static const struct proto_ops packet_ops_spkt;
1380
1381static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1382 struct packet_type *pt, struct net_device *orig_dev)
1383{
1384 struct sock *sk;
1385 struct sockaddr_pkt *spkt;
1386
1387
1388
1389
1390
1391
1392 sk = pt->af_packet_priv;
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405 if (skb->pkt_type == PACKET_LOOPBACK)
1406 goto out;
1407
1408 if (!net_eq(dev_net(dev), sock_net(sk)))
1409 goto out;
1410
1411 skb = skb_share_check(skb, GFP_ATOMIC);
1412 if (skb == NULL)
1413 goto oom;
1414
1415
1416 skb_dst_drop(skb);
1417
1418
1419 nf_reset(skb);
1420
1421 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1422
1423 skb_push(skb, skb->data - skb_mac_header(skb));
1424
1425
1426
1427
1428
1429 spkt->spkt_family = dev->type;
1430 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1431 spkt->spkt_protocol = skb->protocol;
1432
1433
1434
1435
1436
1437
1438 if (sock_queue_rcv_skb(sk, skb) == 0)
1439 return 0;
1440
1441out:
1442 kfree_skb(skb);
1443oom:
1444 return 0;
1445}
1446
1447
1448
1449
1450
1451
1452
1453static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
1454 struct msghdr *msg, size_t len)
1455{
1456 struct sock *sk = sock->sk;
1457 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1458 struct sk_buff *skb = NULL;
1459 struct net_device *dev;
1460 __be16 proto = 0;
1461 int err;
1462
1463
1464
1465
1466
1467 if (saddr) {
1468 if (msg->msg_namelen < sizeof(struct sockaddr))
1469 return -EINVAL;
1470 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1471 proto = saddr->spkt_protocol;
1472 } else
1473 return -ENOTCONN;
1474
1475
1476
1477
1478
1479 saddr->spkt_device[13] = 0;
1480retry:
1481 rcu_read_lock();
1482 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1483 err = -ENODEV;
1484 if (dev == NULL)
1485 goto out_unlock;
1486
1487 err = -ENETDOWN;
1488 if (!(dev->flags & IFF_UP))
1489 goto out_unlock;
1490
1491
1492
1493
1494
1495
1496 err = -EMSGSIZE;
1497 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN)
1498 goto out_unlock;
1499
1500 if (!skb) {
1501 size_t reserved = LL_RESERVED_SPACE(dev);
1502 int tlen = dev->needed_tailroom;
1503 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1504
1505 rcu_read_unlock();
1506 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1507 if (skb == NULL)
1508 return -ENOBUFS;
1509
1510
1511
1512
1513 skb_reserve(skb, reserved);
1514 skb_reset_network_header(skb);
1515
1516
1517 if (hhlen) {
1518 skb->data -= hhlen;
1519 skb->tail -= hhlen;
1520 if (len < hhlen)
1521 skb_reset_network_header(skb);
1522 }
1523 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1524 if (err)
1525 goto out_free;
1526 goto retry;
1527 }
1528
1529 if (len > (dev->mtu + dev->hard_header_len)) {
1530
1531
1532
1533
1534 struct ethhdr *ehdr;
1535 skb_reset_mac_header(skb);
1536 ehdr = eth_hdr(skb);
1537 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1538 err = -EMSGSIZE;
1539 goto out_unlock;
1540 }
1541 }
1542
1543 skb->protocol = proto;
1544 skb->dev = dev;
1545 skb->priority = sk->sk_priority;
1546 skb->mark = sk->sk_mark;
1547 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1548 if (err < 0)
1549 goto out_unlock;
1550
1551 dev_queue_xmit(skb);
1552 rcu_read_unlock();
1553 return len;
1554
1555out_unlock:
1556 rcu_read_unlock();
1557out_free:
1558 kfree_skb(skb);
1559 return err;
1560}
1561
1562static unsigned int run_filter(const struct sk_buff *skb,
1563 const struct sock *sk,
1564 unsigned int res)
1565{
1566 struct sk_filter *filter;
1567
1568 rcu_read_lock();
1569 filter = rcu_dereference(sk->sk_filter);
1570 if (filter != NULL)
1571 res = SK_RUN_FILTER(filter, skb);
1572 rcu_read_unlock();
1573
1574 return res;
1575}
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
1590 struct packet_type *pt, struct net_device *orig_dev)
1591{
1592 struct sock *sk;
1593 struct sockaddr_ll *sll;
1594 struct packet_sock *po;
1595 u8 *skb_head = skb->data;
1596 int skb_len = skb->len;
1597 unsigned int snaplen, res;
1598
1599 if (skb->pkt_type == PACKET_LOOPBACK)
1600 goto drop;
1601
1602 sk = pt->af_packet_priv;
1603 po = pkt_sk(sk);
1604
1605 if (!net_eq(dev_net(dev), sock_net(sk)))
1606 goto drop;
1607
1608 skb->dev = dev;
1609
1610 if (dev->header_ops) {
1611
1612
1613
1614
1615
1616
1617
1618 if (sk->sk_type != SOCK_DGRAM)
1619 skb_push(skb, skb->data - skb_mac_header(skb));
1620 else if (skb->pkt_type == PACKET_OUTGOING) {
1621
1622 skb_pull(skb, skb_network_offset(skb));
1623 }
1624 }
1625
1626 snaplen = skb->len;
1627
1628 res = run_filter(skb, sk, snaplen);
1629 if (!res)
1630 goto drop_n_restore;
1631 if (snaplen > res)
1632 snaplen = res;
1633
1634 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
1635 goto drop_n_acct;
1636
1637 if (skb_shared(skb)) {
1638 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
1639 if (nskb == NULL)
1640 goto drop_n_acct;
1641
1642 if (skb_head != skb->data) {
1643 skb->data = skb_head;
1644 skb->len = skb_len;
1645 }
1646 kfree_skb(skb);
1647 skb = nskb;
1648 }
1649
1650 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
1651 sizeof(skb->cb));
1652
1653 sll = &PACKET_SKB_CB(skb)->sa.ll;
1654 sll->sll_family = AF_PACKET;
1655 sll->sll_hatype = dev->type;
1656 sll->sll_protocol = skb->protocol;
1657 sll->sll_pkttype = skb->pkt_type;
1658 if (unlikely(po->origdev))
1659 sll->sll_ifindex = orig_dev->ifindex;
1660 else
1661 sll->sll_ifindex = dev->ifindex;
1662
1663 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1664
1665 PACKET_SKB_CB(skb)->origlen = skb->len;
1666
1667 if (pskb_trim(skb, snaplen))
1668 goto drop_n_acct;
1669
1670 skb_set_owner_r(skb, sk);
1671 skb->dev = NULL;
1672 skb_dst_drop(skb);
1673
1674
1675 nf_reset(skb);
1676
1677 spin_lock(&sk->sk_receive_queue.lock);
1678 po->stats.tp_packets++;
1679 skb->dropcount = atomic_read(&sk->sk_drops);
1680 __skb_queue_tail(&sk->sk_receive_queue, skb);
1681 spin_unlock(&sk->sk_receive_queue.lock);
1682 sk->sk_data_ready(sk, skb->len);
1683 return 0;
1684
1685drop_n_acct:
1686 spin_lock(&sk->sk_receive_queue.lock);
1687 po->stats.tp_drops++;
1688 atomic_inc(&sk->sk_drops);
1689 spin_unlock(&sk->sk_receive_queue.lock);
1690
1691drop_n_restore:
1692 if (skb_head != skb->data && skb_shared(skb)) {
1693 skb->data = skb_head;
1694 skb->len = skb_len;
1695 }
1696drop:
1697 consume_skb(skb);
1698 return 0;
1699}
1700
1701static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1702 struct packet_type *pt, struct net_device *orig_dev)
1703{
1704 struct sock *sk;
1705 struct packet_sock *po;
1706 struct sockaddr_ll *sll;
1707 union {
1708 struct tpacket_hdr *h1;
1709 struct tpacket2_hdr *h2;
1710 struct tpacket3_hdr *h3;
1711 void *raw;
1712 } h;
1713 u8 *skb_head = skb->data;
1714 int skb_len = skb->len;
1715 unsigned int snaplen, res;
1716 unsigned long status = TP_STATUS_USER;
1717 unsigned short macoff, netoff, hdrlen;
1718 struct sk_buff *copy_skb = NULL;
1719 struct timeval tv;
1720 struct timespec ts;
1721 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1722
1723 if (skb->pkt_type == PACKET_LOOPBACK)
1724 goto drop;
1725
1726 sk = pt->af_packet_priv;
1727 po = pkt_sk(sk);
1728
1729 if (!net_eq(dev_net(dev), sock_net(sk)))
1730 goto drop;
1731
1732 if (dev->header_ops) {
1733 if (sk->sk_type != SOCK_DGRAM)
1734 skb_push(skb, skb->data - skb_mac_header(skb));
1735 else if (skb->pkt_type == PACKET_OUTGOING) {
1736
1737 skb_pull(skb, skb_network_offset(skb));
1738 }
1739 }
1740
1741 if (skb->ip_summed == CHECKSUM_PARTIAL)
1742 status |= TP_STATUS_CSUMNOTREADY;
1743
1744 snaplen = skb->len;
1745
1746 res = run_filter(skb, sk, snaplen);
1747 if (!res)
1748 goto drop_n_restore;
1749 if (snaplen > res)
1750 snaplen = res;
1751
1752 if (sk->sk_type == SOCK_DGRAM) {
1753 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
1754 po->tp_reserve;
1755 } else {
1756 unsigned maclen = skb_network_offset(skb);
1757 netoff = TPACKET_ALIGN(po->tp_hdrlen +
1758 (maclen < 16 ? 16 : maclen)) +
1759 po->tp_reserve;
1760 macoff = netoff - maclen;
1761 }
1762 if (po->tp_version <= TPACKET_V2) {
1763 if (macoff + snaplen > po->rx_ring.frame_size) {
1764 if (po->copy_thresh &&
1765 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1766 if (skb_shared(skb)) {
1767 copy_skb = skb_clone(skb, GFP_ATOMIC);
1768 } else {
1769 copy_skb = skb_get(skb);
1770 skb_head = skb->data;
1771 }
1772 if (copy_skb)
1773 skb_set_owner_r(copy_skb, sk);
1774 }
1775 snaplen = po->rx_ring.frame_size - macoff;
1776 if ((int)snaplen < 0)
1777 snaplen = 0;
1778 }
1779 }
1780 spin_lock(&sk->sk_receive_queue.lock);
1781 h.raw = packet_current_rx_frame(po, skb,
1782 TP_STATUS_KERNEL, (macoff+snaplen));
1783 if (!h.raw)
1784 goto ring_is_full;
1785 if (po->tp_version <= TPACKET_V2) {
1786 packet_increment_rx_head(po, &po->rx_ring);
1787
1788
1789
1790
1791
1792
1793 if (po->stats.tp_drops)
1794 status |= TP_STATUS_LOSING;
1795 }
1796 po->stats.tp_packets++;
1797 if (copy_skb) {
1798 status |= TP_STATUS_COPY;
1799 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
1800 }
1801 spin_unlock(&sk->sk_receive_queue.lock);
1802
1803 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1804
1805 switch (po->tp_version) {
1806 case TPACKET_V1:
1807 h.h1->tp_len = skb->len;
1808 h.h1->tp_snaplen = snaplen;
1809 h.h1->tp_mac = macoff;
1810 h.h1->tp_net = netoff;
1811 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1812 && shhwtstamps->syststamp.tv64)
1813 tv = ktime_to_timeval(shhwtstamps->syststamp);
1814 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1815 && shhwtstamps->hwtstamp.tv64)
1816 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
1817 else if (skb->tstamp.tv64)
1818 tv = ktime_to_timeval(skb->tstamp);
1819 else
1820 do_gettimeofday(&tv);
1821 h.h1->tp_sec = tv.tv_sec;
1822 h.h1->tp_usec = tv.tv_usec;
1823 hdrlen = sizeof(*h.h1);
1824 break;
1825 case TPACKET_V2:
1826 h.h2->tp_len = skb->len;
1827 h.h2->tp_snaplen = snaplen;
1828 h.h2->tp_mac = macoff;
1829 h.h2->tp_net = netoff;
1830 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1831 && shhwtstamps->syststamp.tv64)
1832 ts = ktime_to_timespec(shhwtstamps->syststamp);
1833 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1834 && shhwtstamps->hwtstamp.tv64)
1835 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1836 else if (skb->tstamp.tv64)
1837 ts = ktime_to_timespec(skb->tstamp);
1838 else
1839 getnstimeofday(&ts);
1840 h.h2->tp_sec = ts.tv_sec;
1841 h.h2->tp_nsec = ts.tv_nsec;
1842 if (vlan_tx_tag_present(skb)) {
1843 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
1844 status |= TP_STATUS_VLAN_VALID;
1845 } else {
1846 h.h2->tp_vlan_tci = 0;
1847 }
1848 h.h2->tp_padding = 0;
1849 hdrlen = sizeof(*h.h2);
1850 break;
1851 case TPACKET_V3:
1852
1853
1854
1855 h.h3->tp_status |= status;
1856 h.h3->tp_len = skb->len;
1857 h.h3->tp_snaplen = snaplen;
1858 h.h3->tp_mac = macoff;
1859 h.h3->tp_net = netoff;
1860 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1861 && shhwtstamps->syststamp.tv64)
1862 ts = ktime_to_timespec(shhwtstamps->syststamp);
1863 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1864 && shhwtstamps->hwtstamp.tv64)
1865 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1866 else if (skb->tstamp.tv64)
1867 ts = ktime_to_timespec(skb->tstamp);
1868 else
1869 getnstimeofday(&ts);
1870 h.h3->tp_sec = ts.tv_sec;
1871 h.h3->tp_nsec = ts.tv_nsec;
1872 hdrlen = sizeof(*h.h3);
1873 break;
1874 default:
1875 BUG();
1876 }
1877
1878 sll = h.raw + TPACKET_ALIGN(hdrlen);
1879 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1880 sll->sll_family = AF_PACKET;
1881 sll->sll_hatype = dev->type;
1882 sll->sll_protocol = skb->protocol;
1883 sll->sll_pkttype = skb->pkt_type;
1884 if (unlikely(po->origdev))
1885 sll->sll_ifindex = orig_dev->ifindex;
1886 else
1887 sll->sll_ifindex = dev->ifindex;
1888
1889 smp_mb();
1890#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1891 {
1892 u8 *start, *end;
1893
1894 if (po->tp_version <= TPACKET_V2) {
1895 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
1896 + macoff + snaplen);
1897 for (start = h.raw; start < end; start += PAGE_SIZE)
1898 flush_dcache_page(pgv_to_page(start));
1899 }
1900 smp_wmb();
1901 }
1902#endif
1903 if (po->tp_version <= TPACKET_V2)
1904 __packet_set_status(po, h.raw, status);
1905 else
1906 prb_clear_blk_fill_status(&po->rx_ring);
1907
1908 sk->sk_data_ready(sk, 0);
1909
1910drop_n_restore:
1911 if (skb_head != skb->data && skb_shared(skb)) {
1912 skb->data = skb_head;
1913 skb->len = skb_len;
1914 }
1915drop:
1916 kfree_skb(skb);
1917 return 0;
1918
1919ring_is_full:
1920 po->stats.tp_drops++;
1921 spin_unlock(&sk->sk_receive_queue.lock);
1922
1923 sk->sk_data_ready(sk, 0);
1924 kfree_skb(copy_skb);
1925 goto drop_n_restore;
1926}
1927
1928static void tpacket_destruct_skb(struct sk_buff *skb)
1929{
1930 struct packet_sock *po = pkt_sk(skb->sk);
1931 void *ph;
1932
1933 if (likely(po->tx_ring.pg_vec)) {
1934 ph = skb_shinfo(skb)->destructor_arg;
1935 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
1936 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
1937 atomic_dec(&po->tx_ring.pending);
1938 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
1939 }
1940
1941 sock_wfree(skb);
1942}
1943
1944static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1945 void *frame, struct net_device *dev, int size_max,
1946 __be16 proto, unsigned char *addr, int hlen)
1947{
1948 union {
1949 struct tpacket_hdr *h1;
1950 struct tpacket2_hdr *h2;
1951 void *raw;
1952 } ph;
1953 int to_write, offset, len, tp_len, nr_frags, len_max;
1954 struct socket *sock = po->sk.sk_socket;
1955 struct page *page;
1956 void *data;
1957 int err;
1958
1959 ph.raw = frame;
1960
1961 skb->protocol = proto;
1962 skb->dev = dev;
1963 skb->priority = po->sk.sk_priority;
1964 skb->mark = po->sk.sk_mark;
1965 skb_shinfo(skb)->destructor_arg = ph.raw;
1966
1967 switch (po->tp_version) {
1968 case TPACKET_V2:
1969 tp_len = ph.h2->tp_len;
1970 break;
1971 default:
1972 tp_len = ph.h1->tp_len;
1973 break;
1974 }
1975 if (unlikely(tp_len > size_max)) {
1976 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
1977 return -EMSGSIZE;
1978 }
1979
1980 skb_reserve(skb, hlen);
1981 skb_reset_network_header(skb);
1982
1983 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
1984 to_write = tp_len;
1985
1986 if (sock->type == SOCK_DGRAM) {
1987 err = dev_hard_header(skb, dev, ntohs(proto), addr,
1988 NULL, tp_len);
1989 if (unlikely(err < 0))
1990 return -EINVAL;
1991 } else if (dev->hard_header_len) {
1992
1993 if (unlikely(tp_len <= dev->hard_header_len)) {
1994 pr_err("packet size is too short (%d < %d)\n",
1995 tp_len, dev->hard_header_len);
1996 return -EINVAL;
1997 }
1998
1999 skb_push(skb, dev->hard_header_len);
2000 err = skb_store_bits(skb, 0, data,
2001 dev->hard_header_len);
2002 if (unlikely(err))
2003 return err;
2004
2005 data += dev->hard_header_len;
2006 to_write -= dev->hard_header_len;
2007 }
2008
2009 err = -EFAULT;
2010 offset = offset_in_page(data);
2011 len_max = PAGE_SIZE - offset;
2012 len = ((to_write > len_max) ? len_max : to_write);
2013
2014 skb->data_len = to_write;
2015 skb->len += to_write;
2016 skb->truesize += to_write;
2017 atomic_add(to_write, &po->sk.sk_wmem_alloc);
2018
2019 while (likely(to_write)) {
2020 nr_frags = skb_shinfo(skb)->nr_frags;
2021
2022 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2023 pr_err("Packet exceed the number of skb frags(%lu)\n",
2024 MAX_SKB_FRAGS);
2025 return -EFAULT;
2026 }
2027
2028 page = pgv_to_page(data);
2029 data += len;
2030 flush_dcache_page(page);
2031 get_page(page);
2032 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2033 to_write -= len;
2034 offset = 0;
2035 len_max = PAGE_SIZE;
2036 len = ((to_write > len_max) ? len_max : to_write);
2037 }
2038
2039 return tp_len;
2040}
2041
2042static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2043{
2044 struct sk_buff *skb;
2045 struct net_device *dev;
2046 __be16 proto;
2047 bool need_rls_dev = false;
2048 int err, reserve = 0;
2049 void *ph;
2050 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
2051 int tp_len, size_max;
2052 unsigned char *addr;
2053 int len_sum = 0;
2054 int status = 0;
2055 int hlen, tlen;
2056
2057 mutex_lock(&po->pg_vec_lock);
2058
2059 err = -EBUSY;
2060 if (saddr == NULL) {
2061 dev = po->prot_hook.dev;
2062 proto = po->num;
2063 addr = NULL;
2064 } else {
2065 err = -EINVAL;
2066 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2067 goto out;
2068 if (msg->msg_namelen < (saddr->sll_halen
2069 + offsetof(struct sockaddr_ll,
2070 sll_addr)))
2071 goto out;
2072 proto = saddr->sll_protocol;
2073 addr = saddr->sll_addr;
2074 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2075 need_rls_dev = true;
2076 }
2077
2078 err = -ENXIO;
2079 if (unlikely(dev == NULL))
2080 goto out;
2081
2082 reserve = dev->hard_header_len;
2083
2084 err = -ENETDOWN;
2085 if (unlikely(!(dev->flags & IFF_UP)))
2086 goto out_put;
2087
2088 size_max = po->tx_ring.frame_size
2089 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2090
2091 if (size_max > dev->mtu + reserve)
2092 size_max = dev->mtu + reserve;
2093
2094 do {
2095 ph = packet_current_frame(po, &po->tx_ring,
2096 TP_STATUS_SEND_REQUEST);
2097
2098 if (unlikely(ph == NULL)) {
2099 schedule();
2100 continue;
2101 }
2102
2103 status = TP_STATUS_SEND_REQUEST;
2104 hlen = LL_RESERVED_SPACE(dev);
2105 tlen = dev->needed_tailroom;
2106 skb = sock_alloc_send_skb(&po->sk,
2107 hlen + tlen + sizeof(struct sockaddr_ll),
2108 0, &err);
2109
2110 if (unlikely(skb == NULL))
2111 goto out_status;
2112
2113 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2114 addr, hlen);
2115
2116 if (unlikely(tp_len < 0)) {
2117 if (po->tp_loss) {
2118 __packet_set_status(po, ph,
2119 TP_STATUS_AVAILABLE);
2120 packet_increment_head(&po->tx_ring);
2121 kfree_skb(skb);
2122 continue;
2123 } else {
2124 status = TP_STATUS_WRONG_FORMAT;
2125 err = tp_len;
2126 goto out_status;
2127 }
2128 }
2129
2130 skb->destructor = tpacket_destruct_skb;
2131 __packet_set_status(po, ph, TP_STATUS_SENDING);
2132 atomic_inc(&po->tx_ring.pending);
2133
2134 status = TP_STATUS_SEND_REQUEST;
2135 err = dev_queue_xmit(skb);
2136 if (unlikely(err > 0)) {
2137 err = net_xmit_errno(err);
2138 if (err && __packet_get_status(po, ph) ==
2139 TP_STATUS_AVAILABLE) {
2140
2141 skb = NULL;
2142 goto out_status;
2143 }
2144
2145
2146
2147
2148 err = 0;
2149 }
2150 packet_increment_head(&po->tx_ring);
2151 len_sum += tp_len;
2152 } while (likely((ph != NULL) ||
2153 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
2154 (atomic_read(&po->tx_ring.pending))))
2155 );
2156
2157 err = len_sum;
2158 goto out_put;
2159
2160out_status:
2161 __packet_set_status(po, ph, status);
2162 kfree_skb(skb);
2163out_put:
2164 if (need_rls_dev)
2165 dev_put(dev);
2166out:
2167 mutex_unlock(&po->pg_vec_lock);
2168 return err;
2169}
2170
2171static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2172 size_t reserve, size_t len,
2173 size_t linear, int noblock,
2174 int *err)
2175{
2176 struct sk_buff *skb;
2177
2178
2179 if (prepad + len < PAGE_SIZE || !linear)
2180 linear = len;
2181
2182 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2183 err);
2184 if (!skb)
2185 return NULL;
2186
2187 skb_reserve(skb, reserve);
2188 skb_put(skb, linear);
2189 skb->data_len = len - linear;
2190 skb->len += len - linear;
2191
2192 return skb;
2193}
2194
2195static int packet_snd(struct socket *sock,
2196 struct msghdr *msg, size_t len)
2197{
2198 struct sock *sk = sock->sk;
2199 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
2200 struct sk_buff *skb;
2201 struct net_device *dev;
2202 __be16 proto;
2203 bool need_rls_dev = false;
2204 unsigned char *addr;
2205 int err, reserve = 0;
2206 struct virtio_net_hdr vnet_hdr = { 0 };
2207 int offset = 0;
2208 int vnet_hdr_len;
2209 struct packet_sock *po = pkt_sk(sk);
2210 unsigned short gso_type = 0;
2211 int hlen, tlen;
2212
2213
2214
2215
2216
2217 if (saddr == NULL) {
2218 dev = po->prot_hook.dev;
2219 proto = po->num;
2220 addr = NULL;
2221 } else {
2222 err = -EINVAL;
2223 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2224 goto out;
2225 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2226 goto out;
2227 proto = saddr->sll_protocol;
2228 addr = saddr->sll_addr;
2229 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2230 need_rls_dev = true;
2231 }
2232
2233 err = -ENXIO;
2234 if (dev == NULL)
2235 goto out_unlock;
2236 if (sock->type == SOCK_RAW)
2237 reserve = dev->hard_header_len;
2238
2239 err = -ENETDOWN;
2240 if (!(dev->flags & IFF_UP))
2241 goto out_unlock;
2242
2243 if (po->has_vnet_hdr) {
2244 vnet_hdr_len = sizeof(vnet_hdr);
2245
2246 err = -EINVAL;
2247 if (len < vnet_hdr_len)
2248 goto out_unlock;
2249
2250 len -= vnet_hdr_len;
2251
2252 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
2253 vnet_hdr_len);
2254 if (err < 0)
2255 goto out_unlock;
2256
2257 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2258 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
2259 vnet_hdr.hdr_len))
2260 vnet_hdr.hdr_len = vnet_hdr.csum_start +
2261 vnet_hdr.csum_offset + 2;
2262
2263 err = -EINVAL;
2264 if (vnet_hdr.hdr_len > len)
2265 goto out_unlock;
2266
2267 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2268 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2269 case VIRTIO_NET_HDR_GSO_TCPV4:
2270 gso_type = SKB_GSO_TCPV4;
2271 break;
2272 case VIRTIO_NET_HDR_GSO_TCPV6:
2273 gso_type = SKB_GSO_TCPV6;
2274 break;
2275 case VIRTIO_NET_HDR_GSO_UDP:
2276 gso_type = SKB_GSO_UDP;
2277 break;
2278 default:
2279 goto out_unlock;
2280 }
2281
2282 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
2283 gso_type |= SKB_GSO_TCP_ECN;
2284
2285 if (vnet_hdr.gso_size == 0)
2286 goto out_unlock;
2287
2288 }
2289 }
2290
2291 err = -EMSGSIZE;
2292 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN))
2293 goto out_unlock;
2294
2295 err = -ENOBUFS;
2296 hlen = LL_RESERVED_SPACE(dev);
2297 tlen = dev->needed_tailroom;
2298 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len,
2299 msg->msg_flags & MSG_DONTWAIT, &err);
2300 if (skb == NULL)
2301 goto out_unlock;
2302
2303 skb_set_network_header(skb, reserve);
2304
2305 err = -EINVAL;
2306 if (sock->type == SOCK_DGRAM &&
2307 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
2308 goto out_free;
2309
2310
2311 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
2312 if (err)
2313 goto out_free;
2314 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2315 if (err < 0)
2316 goto out_free;
2317
2318 if (!gso_type && (len > dev->mtu + reserve)) {
2319
2320
2321
2322
2323 struct ethhdr *ehdr;
2324 skb_reset_mac_header(skb);
2325 ehdr = eth_hdr(skb);
2326 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2327 err = -EMSGSIZE;
2328 goto out_free;
2329 }
2330 }
2331
2332 skb->protocol = proto;
2333 skb->dev = dev;
2334 skb->priority = sk->sk_priority;
2335 skb->mark = sk->sk_mark;
2336
2337 if (po->has_vnet_hdr) {
2338 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2339 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
2340 vnet_hdr.csum_offset)) {
2341 err = -EINVAL;
2342 goto out_free;
2343 }
2344 }
2345
2346 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
2347 skb_shinfo(skb)->gso_type = gso_type;
2348
2349
2350 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2351 skb_shinfo(skb)->gso_segs = 0;
2352
2353 len += vnet_hdr_len;
2354 }
2355
2356
2357
2358
2359
2360 err = dev_queue_xmit(skb);
2361 if (err > 0 && (err = net_xmit_errno(err)) != 0)
2362 goto out_unlock;
2363
2364 if (need_rls_dev)
2365 dev_put(dev);
2366
2367 return len;
2368
2369out_free:
2370 kfree_skb(skb);
2371out_unlock:
2372 if (dev && need_rls_dev)
2373 dev_put(dev);
2374out:
2375 return err;
2376}
2377
2378static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
2379 struct msghdr *msg, size_t len)
2380{
2381 struct sock *sk = sock->sk;
2382 struct packet_sock *po = pkt_sk(sk);
2383 if (po->tx_ring.pg_vec)
2384 return tpacket_snd(po, msg);
2385 else
2386 return packet_snd(sock, msg, len);
2387}
2388
2389
2390
2391
2392
2393
2394static int packet_release(struct socket *sock)
2395{
2396 struct sock *sk = sock->sk;
2397 struct packet_sock *po;
2398 struct net *net;
2399 union tpacket_req_u req_u;
2400
2401 if (!sk)
2402 return 0;
2403
2404 net = sock_net(sk);
2405 po = pkt_sk(sk);
2406
2407 spin_lock_bh(&net->packet.sklist_lock);
2408 sk_del_node_init_rcu(sk);
2409 sock_prot_inuse_add(net, sk->sk_prot, -1);
2410 spin_unlock_bh(&net->packet.sklist_lock);
2411
2412 spin_lock(&po->bind_lock);
2413 unregister_prot_hook(sk, false);
2414 if (po->prot_hook.dev) {
2415 dev_put(po->prot_hook.dev);
2416 po->prot_hook.dev = NULL;
2417 }
2418 spin_unlock(&po->bind_lock);
2419
2420 packet_flush_mclist(sk);
2421
2422 memset(&req_u, 0, sizeof(req_u));
2423
2424 if (po->rx_ring.pg_vec)
2425 packet_set_ring(sk, &req_u, 1, 0);
2426
2427 if (po->tx_ring.pg_vec)
2428 packet_set_ring(sk, &req_u, 1, 1);
2429
2430 fanout_release(sk);
2431
2432 synchronize_net();
2433
2434
2435
2436 sock_orphan(sk);
2437 sock->sk = NULL;
2438
2439
2440
2441 skb_queue_purge(&sk->sk_receive_queue);
2442 sk_refcnt_debug_release(sk);
2443
2444 sock_put(sk);
2445 return 0;
2446}
2447
2448
2449
2450
2451
2452static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
2453{
2454 struct packet_sock *po = pkt_sk(sk);
2455
2456 if (po->fanout) {
2457 if (dev)
2458 dev_put(dev);
2459
2460 return -EINVAL;
2461 }
2462
2463 lock_sock(sk);
2464
2465 spin_lock(&po->bind_lock);
2466 unregister_prot_hook(sk, true);
2467 po->num = protocol;
2468 po->prot_hook.type = protocol;
2469 if (po->prot_hook.dev)
2470 dev_put(po->prot_hook.dev);
2471 po->prot_hook.dev = dev;
2472
2473 po->ifindex = dev ? dev->ifindex : 0;
2474
2475 if (protocol == 0)
2476 goto out_unlock;
2477
2478 if (!dev || (dev->flags & IFF_UP)) {
2479 register_prot_hook(sk);
2480 } else {
2481 sk->sk_err = ENETDOWN;
2482 if (!sock_flag(sk, SOCK_DEAD))
2483 sk->sk_error_report(sk);
2484 }
2485
2486out_unlock:
2487 spin_unlock(&po->bind_lock);
2488 release_sock(sk);
2489 return 0;
2490}
2491
2492
2493
2494
2495
2496static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2497 int addr_len)
2498{
2499 struct sock *sk = sock->sk;
2500 char name[15];
2501 struct net_device *dev;
2502 int err = -ENODEV;
2503
2504
2505
2506
2507
2508 if (addr_len != sizeof(struct sockaddr))
2509 return -EINVAL;
2510 strlcpy(name, uaddr->sa_data, sizeof(name));
2511
2512 dev = dev_get_by_name(sock_net(sk), name);
2513 if (dev)
2514 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
2515 return err;
2516}
2517
2518static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
2519{
2520 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
2521 struct sock *sk = sock->sk;
2522 struct net_device *dev = NULL;
2523 int err;
2524
2525
2526
2527
2528
2529
2530 if (addr_len < sizeof(struct sockaddr_ll))
2531 return -EINVAL;
2532 if (sll->sll_family != AF_PACKET)
2533 return -EINVAL;
2534
2535 if (sll->sll_ifindex) {
2536 err = -ENODEV;
2537 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
2538 if (dev == NULL)
2539 goto out;
2540 }
2541 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
2542
2543out:
2544 return err;
2545}
2546
2547static struct proto packet_proto = {
2548 .name = "PACKET",
2549 .owner = THIS_MODULE,
2550 .obj_size = sizeof(struct packet_sock),
2551};
2552
2553
2554
2555
2556
2557static int packet_create(struct net *net, struct socket *sock, int protocol,
2558 int kern)
2559{
2560 struct sock *sk;
2561 struct packet_sock *po;
2562 __be16 proto = (__force __be16)protocol;
2563 int err;
2564
2565 if (!capable(CAP_NET_RAW))
2566 return -EPERM;
2567 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
2568 sock->type != SOCK_PACKET)
2569 return -ESOCKTNOSUPPORT;
2570
2571 sock->state = SS_UNCONNECTED;
2572
2573 err = -ENOBUFS;
2574 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
2575 if (sk == NULL)
2576 goto out;
2577
2578 sock->ops = &packet_ops;
2579 if (sock->type == SOCK_PACKET)
2580 sock->ops = &packet_ops_spkt;
2581
2582 sock_init_data(sock, sk);
2583
2584 po = pkt_sk(sk);
2585 sk->sk_family = PF_PACKET;
2586 po->num = proto;
2587
2588 sk->sk_destruct = packet_sock_destruct;
2589 sk_refcnt_debug_inc(sk);
2590
2591
2592
2593
2594
2595 spin_lock_init(&po->bind_lock);
2596 mutex_init(&po->pg_vec_lock);
2597 po->prot_hook.func = packet_rcv;
2598
2599 if (sock->type == SOCK_PACKET)
2600 po->prot_hook.func = packet_rcv_spkt;
2601
2602 po->prot_hook.af_packet_priv = sk;
2603
2604 if (proto) {
2605 po->prot_hook.type = proto;
2606 register_prot_hook(sk);
2607 }
2608
2609 spin_lock_bh(&net->packet.sklist_lock);
2610 sk_add_node_rcu(sk, &net->packet.sklist);
2611 sock_prot_inuse_add(net, &packet_proto, 1);
2612 spin_unlock_bh(&net->packet.sklist_lock);
2613
2614 return 0;
2615out:
2616 return err;
2617}
2618
2619static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
2620{
2621 struct sock_exterr_skb *serr;
2622 struct sk_buff *skb, *skb2;
2623 int copied, err;
2624
2625 err = -EAGAIN;
2626 skb = skb_dequeue(&sk->sk_error_queue);
2627 if (skb == NULL)
2628 goto out;
2629
2630 copied = skb->len;
2631 if (copied > len) {
2632 msg->msg_flags |= MSG_TRUNC;
2633 copied = len;
2634 }
2635 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2636 if (err)
2637 goto out_free_skb;
2638
2639 sock_recv_timestamp(msg, sk, skb);
2640
2641 serr = SKB_EXT_ERR(skb);
2642 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
2643 sizeof(serr->ee), &serr->ee);
2644
2645 msg->msg_flags |= MSG_ERRQUEUE;
2646 err = copied;
2647
2648
2649 spin_lock_bh(&sk->sk_error_queue.lock);
2650 sk->sk_err = 0;
2651 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2652 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2653 spin_unlock_bh(&sk->sk_error_queue.lock);
2654 sk->sk_error_report(sk);
2655 } else
2656 spin_unlock_bh(&sk->sk_error_queue.lock);
2657
2658out_free_skb:
2659 kfree_skb(skb);
2660out:
2661 return err;
2662}
2663
2664
2665
2666
2667
2668
2669static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
2670 struct msghdr *msg, size_t len, int flags)
2671{
2672 struct sock *sk = sock->sk;
2673 struct sk_buff *skb;
2674 int copied, err;
2675 struct sockaddr_ll *sll;
2676 int vnet_hdr_len = 0;
2677
2678 err = -EINVAL;
2679 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
2680 goto out;
2681
2682#if 0
2683
2684 if (pkt_sk(sk)->ifindex < 0)
2685 return -ENODEV;
2686#endif
2687
2688 if (flags & MSG_ERRQUEUE) {
2689 err = packet_recv_error(sk, msg, len);
2690 goto out;
2691 }
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
2703
2704
2705
2706
2707
2708
2709
2710 if (skb == NULL)
2711 goto out;
2712
2713 if (pkt_sk(sk)->has_vnet_hdr) {
2714 struct virtio_net_hdr vnet_hdr = { 0 };
2715
2716 err = -EINVAL;
2717 vnet_hdr_len = sizeof(vnet_hdr);
2718 if (len < vnet_hdr_len)
2719 goto out_free;
2720
2721 len -= vnet_hdr_len;
2722
2723 if (skb_is_gso(skb)) {
2724 struct skb_shared_info *sinfo = skb_shinfo(skb);
2725
2726
2727 vnet_hdr.hdr_len = skb_headlen(skb);
2728 vnet_hdr.gso_size = sinfo->gso_size;
2729 if (sinfo->gso_type & SKB_GSO_TCPV4)
2730 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2731 else if (sinfo->gso_type & SKB_GSO_TCPV6)
2732 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2733 else if (sinfo->gso_type & SKB_GSO_UDP)
2734 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
2735 else if (sinfo->gso_type & SKB_GSO_FCOE)
2736 goto out_free;
2737 else
2738 BUG();
2739 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
2740 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2741 } else
2742 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
2743
2744 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2745 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
2746 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
2747 vnet_hdr.csum_offset = skb->csum_offset;
2748 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
2749 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
2750 }
2751
2752 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
2753 vnet_hdr_len);
2754 if (err < 0)
2755 goto out_free;
2756 }
2757
2758
2759
2760
2761
2762
2763 sll = &PACKET_SKB_CB(skb)->sa.ll;
2764 if (sock->type == SOCK_PACKET)
2765 msg->msg_namelen = sizeof(struct sockaddr_pkt);
2766 else
2767 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
2768
2769
2770
2771
2772
2773
2774 copied = skb->len;
2775 if (copied > len) {
2776 copied = len;
2777 msg->msg_flags |= MSG_TRUNC;
2778 }
2779
2780 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2781 if (err)
2782 goto out_free;
2783
2784 sock_recv_ts_and_drops(msg, sk, skb);
2785
2786 if (msg->msg_name)
2787 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
2788 msg->msg_namelen);
2789
2790 if (pkt_sk(sk)->auxdata) {
2791 struct tpacket_auxdata aux;
2792
2793 aux.tp_status = TP_STATUS_USER;
2794 if (skb->ip_summed == CHECKSUM_PARTIAL)
2795 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
2796 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
2797 aux.tp_snaplen = skb->len;
2798 aux.tp_mac = 0;
2799 aux.tp_net = skb_network_offset(skb);
2800 if (vlan_tx_tag_present(skb)) {
2801 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
2802 aux.tp_status |= TP_STATUS_VLAN_VALID;
2803 } else {
2804 aux.tp_vlan_tci = 0;
2805 }
2806 aux.tp_padding = 0;
2807 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
2808 }
2809
2810
2811
2812
2813
2814 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
2815
2816out_free:
2817 skb_free_datagram(sk, skb);
2818out:
2819 return err;
2820}
2821
2822static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
2823 int *uaddr_len, int peer)
2824{
2825 struct net_device *dev;
2826 struct sock *sk = sock->sk;
2827
2828 if (peer)
2829 return -EOPNOTSUPP;
2830
2831 uaddr->sa_family = AF_PACKET;
2832 rcu_read_lock();
2833 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
2834 if (dev)
2835 strncpy(uaddr->sa_data, dev->name, 14);
2836 else
2837 memset(uaddr->sa_data, 0, 14);
2838 rcu_read_unlock();
2839 *uaddr_len = sizeof(*uaddr);
2840
2841 return 0;
2842}
2843
2844static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
2845 int *uaddr_len, int peer)
2846{
2847 struct net_device *dev;
2848 struct sock *sk = sock->sk;
2849 struct packet_sock *po = pkt_sk(sk);
2850 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
2851
2852 if (peer)
2853 return -EOPNOTSUPP;
2854
2855 sll->sll_family = AF_PACKET;
2856 sll->sll_ifindex = po->ifindex;
2857 sll->sll_protocol = po->num;
2858 sll->sll_pkttype = 0;
2859 rcu_read_lock();
2860 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
2861 if (dev) {
2862 sll->sll_hatype = dev->type;
2863 sll->sll_halen = dev->addr_len;
2864 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
2865 } else {
2866 sll->sll_hatype = 0;
2867 sll->sll_halen = 0;
2868 }
2869 rcu_read_unlock();
2870 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
2871
2872 return 0;
2873}
2874
2875static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
2876 int what)
2877{
2878 switch (i->type) {
2879 case PACKET_MR_MULTICAST:
2880 if (i->alen != dev->addr_len)
2881 return -EINVAL;
2882 if (what > 0)
2883 return dev_mc_add(dev, i->addr);
2884 else
2885 return dev_mc_del(dev, i->addr);
2886 break;
2887 case PACKET_MR_PROMISC:
2888 return dev_set_promiscuity(dev, what);
2889 break;
2890 case PACKET_MR_ALLMULTI:
2891 return dev_set_allmulti(dev, what);
2892 break;
2893 case PACKET_MR_UNICAST:
2894 if (i->alen != dev->addr_len)
2895 return -EINVAL;
2896 if (what > 0)
2897 return dev_uc_add(dev, i->addr);
2898 else
2899 return dev_uc_del(dev, i->addr);
2900 break;
2901 default:
2902 break;
2903 }
2904 return 0;
2905}
2906
2907static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
2908{
2909 for ( ; i; i = i->next) {
2910 if (i->ifindex == dev->ifindex)
2911 packet_dev_mc(dev, i, what);
2912 }
2913}
2914
2915static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
2916{
2917 struct packet_sock *po = pkt_sk(sk);
2918 struct packet_mclist *ml, *i;
2919 struct net_device *dev;
2920 int err;
2921
2922 rtnl_lock();
2923
2924 err = -ENODEV;
2925 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
2926 if (!dev)
2927 goto done;
2928
2929 err = -EINVAL;
2930 if (mreq->mr_alen > dev->addr_len)
2931 goto done;
2932
2933 err = -ENOBUFS;
2934 i = kmalloc(sizeof(*i), GFP_KERNEL);
2935 if (i == NULL)
2936 goto done;
2937
2938 err = 0;
2939 for (ml = po->mclist; ml; ml = ml->next) {
2940 if (ml->ifindex == mreq->mr_ifindex &&
2941 ml->type == mreq->mr_type &&
2942 ml->alen == mreq->mr_alen &&
2943 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
2944 ml->count++;
2945
2946 kfree(i);
2947 goto done;
2948 }
2949 }
2950
2951 i->type = mreq->mr_type;
2952 i->ifindex = mreq->mr_ifindex;
2953 i->alen = mreq->mr_alen;
2954 memcpy(i->addr, mreq->mr_address, i->alen);
2955 i->count = 1;
2956 i->next = po->mclist;
2957 po->mclist = i;
2958 err = packet_dev_mc(dev, i, 1);
2959 if (err) {
2960 po->mclist = i->next;
2961 kfree(i);
2962 }
2963
2964done:
2965 rtnl_unlock();
2966 return err;
2967}
2968
2969static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
2970{
2971 struct packet_mclist *ml, **mlp;
2972
2973 rtnl_lock();
2974
2975 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
2976 if (ml->ifindex == mreq->mr_ifindex &&
2977 ml->type == mreq->mr_type &&
2978 ml->alen == mreq->mr_alen &&
2979 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
2980 if (--ml->count == 0) {
2981 struct net_device *dev;
2982 *mlp = ml->next;
2983 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
2984 if (dev)
2985 packet_dev_mc(dev, ml, -1);
2986 kfree(ml);
2987 }
2988 rtnl_unlock();
2989 return 0;
2990 }
2991 }
2992 rtnl_unlock();
2993 return -EADDRNOTAVAIL;
2994}
2995
2996static void packet_flush_mclist(struct sock *sk)
2997{
2998 struct packet_sock *po = pkt_sk(sk);
2999 struct packet_mclist *ml;
3000
3001 if (!po->mclist)
3002 return;
3003
3004 rtnl_lock();
3005 while ((ml = po->mclist) != NULL) {
3006 struct net_device *dev;
3007
3008 po->mclist = ml->next;
3009 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3010 if (dev != NULL)
3011 packet_dev_mc(dev, ml, -1);
3012 kfree(ml);
3013 }
3014 rtnl_unlock();
3015}
3016
3017static int
3018packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3019{
3020 struct sock *sk = sock->sk;
3021 struct packet_sock *po = pkt_sk(sk);
3022 int ret;
3023
3024 if (level != SOL_PACKET)
3025 return -ENOPROTOOPT;
3026
3027 switch (optname) {
3028 case PACKET_ADD_MEMBERSHIP:
3029 case PACKET_DROP_MEMBERSHIP:
3030 {
3031 struct packet_mreq_max mreq;
3032 int len = optlen;
3033 memset(&mreq, 0, sizeof(mreq));
3034 if (len < sizeof(struct packet_mreq))
3035 return -EINVAL;
3036 if (len > sizeof(mreq))
3037 len = sizeof(mreq);
3038 if (copy_from_user(&mreq, optval, len))
3039 return -EFAULT;
3040 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3041 return -EINVAL;
3042 if (optname == PACKET_ADD_MEMBERSHIP)
3043 ret = packet_mc_add(sk, &mreq);
3044 else
3045 ret = packet_mc_drop(sk, &mreq);
3046 return ret;
3047 }
3048
3049 case PACKET_RX_RING:
3050 case PACKET_TX_RING:
3051 {
3052 union tpacket_req_u req_u;
3053 int len;
3054
3055 switch (po->tp_version) {
3056 case TPACKET_V1:
3057 case TPACKET_V2:
3058 len = sizeof(req_u.req);
3059 break;
3060 case TPACKET_V3:
3061 default:
3062 len = sizeof(req_u.req3);
3063 break;
3064 }
3065 if (optlen < len)
3066 return -EINVAL;
3067 if (pkt_sk(sk)->has_vnet_hdr)
3068 return -EINVAL;
3069 if (copy_from_user(&req_u.req, optval, len))
3070 return -EFAULT;
3071 return packet_set_ring(sk, &req_u, 0,
3072 optname == PACKET_TX_RING);
3073 }
3074 case PACKET_COPY_THRESH:
3075 {
3076 int val;
3077
3078 if (optlen != sizeof(val))
3079 return -EINVAL;
3080 if (copy_from_user(&val, optval, sizeof(val)))
3081 return -EFAULT;
3082
3083 pkt_sk(sk)->copy_thresh = val;
3084 return 0;
3085 }
3086 case PACKET_VERSION:
3087 {
3088 int val;
3089
3090 if (optlen != sizeof(val))
3091 return -EINVAL;
3092 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3093 return -EBUSY;
3094 if (copy_from_user(&val, optval, sizeof(val)))
3095 return -EFAULT;
3096 switch (val) {
3097 case TPACKET_V1:
3098 case TPACKET_V2:
3099 case TPACKET_V3:
3100 po->tp_version = val;
3101 return 0;
3102 default:
3103 return -EINVAL;
3104 }
3105 }
3106 case PACKET_RESERVE:
3107 {
3108 unsigned int val;
3109
3110 if (optlen != sizeof(val))
3111 return -EINVAL;
3112 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3113 return -EBUSY;
3114 if (copy_from_user(&val, optval, sizeof(val)))
3115 return -EFAULT;
3116 po->tp_reserve = val;
3117 return 0;
3118 }
3119 case PACKET_LOSS:
3120 {
3121 unsigned int val;
3122
3123 if (optlen != sizeof(val))
3124 return -EINVAL;
3125 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3126 return -EBUSY;
3127 if (copy_from_user(&val, optval, sizeof(val)))
3128 return -EFAULT;
3129 po->tp_loss = !!val;
3130 return 0;
3131 }
3132 case PACKET_AUXDATA:
3133 {
3134 int val;
3135
3136 if (optlen < sizeof(val))
3137 return -EINVAL;
3138 if (copy_from_user(&val, optval, sizeof(val)))
3139 return -EFAULT;
3140
3141 po->auxdata = !!val;
3142 return 0;
3143 }
3144 case PACKET_ORIGDEV:
3145 {
3146 int val;
3147
3148 if (optlen < sizeof(val))
3149 return -EINVAL;
3150 if (copy_from_user(&val, optval, sizeof(val)))
3151 return -EFAULT;
3152
3153 po->origdev = !!val;
3154 return 0;
3155 }
3156 case PACKET_VNET_HDR:
3157 {
3158 int val;
3159
3160 if (sock->type != SOCK_RAW)
3161 return -EINVAL;
3162 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3163 return -EBUSY;
3164 if (optlen < sizeof(val))
3165 return -EINVAL;
3166 if (copy_from_user(&val, optval, sizeof(val)))
3167 return -EFAULT;
3168
3169 po->has_vnet_hdr = !!val;
3170 return 0;
3171 }
3172 case PACKET_TIMESTAMP:
3173 {
3174 int val;
3175
3176 if (optlen != sizeof(val))
3177 return -EINVAL;
3178 if (copy_from_user(&val, optval, sizeof(val)))
3179 return -EFAULT;
3180
3181 po->tp_tstamp = val;
3182 return 0;
3183 }
3184 case PACKET_FANOUT:
3185 {
3186 int val;
3187
3188 if (optlen != sizeof(val))
3189 return -EINVAL;
3190 if (copy_from_user(&val, optval, sizeof(val)))
3191 return -EFAULT;
3192
3193 return fanout_add(sk, val & 0xffff, val >> 16);
3194 }
3195 default:
3196 return -ENOPROTOOPT;
3197 }
3198}
3199
3200static int packet_getsockopt(struct socket *sock, int level, int optname,
3201 char __user *optval, int __user *optlen)
3202{
3203 int len;
3204 int val;
3205 struct sock *sk = sock->sk;
3206 struct packet_sock *po = pkt_sk(sk);
3207 void *data;
3208 struct tpacket_stats st;
3209 union tpacket_stats_u st_u;
3210
3211 if (level != SOL_PACKET)
3212 return -ENOPROTOOPT;
3213
3214 if (get_user(len, optlen))
3215 return -EFAULT;
3216
3217 if (len < 0)
3218 return -EINVAL;
3219
3220 switch (optname) {
3221 case PACKET_STATISTICS:
3222 if (po->tp_version == TPACKET_V3) {
3223 len = sizeof(struct tpacket_stats_v3);
3224 } else {
3225 if (len > sizeof(struct tpacket_stats))
3226 len = sizeof(struct tpacket_stats);
3227 }
3228 spin_lock_bh(&sk->sk_receive_queue.lock);
3229 if (po->tp_version == TPACKET_V3) {
3230 memcpy(&st_u.stats3, &po->stats,
3231 sizeof(struct tpacket_stats));
3232 st_u.stats3.tp_freeze_q_cnt =
3233 po->stats_u.stats3.tp_freeze_q_cnt;
3234 st_u.stats3.tp_packets += po->stats.tp_drops;
3235 data = &st_u.stats3;
3236 } else {
3237 st = po->stats;
3238 st.tp_packets += st.tp_drops;
3239 data = &st;
3240 }
3241 memset(&po->stats, 0, sizeof(st));
3242 spin_unlock_bh(&sk->sk_receive_queue.lock);
3243 break;
3244 case PACKET_AUXDATA:
3245 if (len > sizeof(int))
3246 len = sizeof(int);
3247 val = po->auxdata;
3248
3249 data = &val;
3250 break;
3251 case PACKET_ORIGDEV:
3252 if (len > sizeof(int))
3253 len = sizeof(int);
3254 val = po->origdev;
3255
3256 data = &val;
3257 break;
3258 case PACKET_VNET_HDR:
3259 if (len > sizeof(int))
3260 len = sizeof(int);
3261 val = po->has_vnet_hdr;
3262
3263 data = &val;
3264 break;
3265 case PACKET_VERSION:
3266 if (len > sizeof(int))
3267 len = sizeof(int);
3268 val = po->tp_version;
3269 data = &val;
3270 break;
3271 case PACKET_HDRLEN:
3272 if (len > sizeof(int))
3273 len = sizeof(int);
3274 if (copy_from_user(&val, optval, len))
3275 return -EFAULT;
3276 switch (val) {
3277 case TPACKET_V1:
3278 val = sizeof(struct tpacket_hdr);
3279 break;
3280 case TPACKET_V2:
3281 val = sizeof(struct tpacket2_hdr);
3282 break;
3283 case TPACKET_V3:
3284 val = sizeof(struct tpacket3_hdr);
3285 break;
3286 default:
3287 return -EINVAL;
3288 }
3289 data = &val;
3290 break;
3291 case PACKET_RESERVE:
3292 if (len > sizeof(unsigned int))
3293 len = sizeof(unsigned int);
3294 val = po->tp_reserve;
3295 data = &val;
3296 break;
3297 case PACKET_LOSS:
3298 if (len > sizeof(unsigned int))
3299 len = sizeof(unsigned int);
3300 val = po->tp_loss;
3301 data = &val;
3302 break;
3303 case PACKET_TIMESTAMP:
3304 if (len > sizeof(int))
3305 len = sizeof(int);
3306 val = po->tp_tstamp;
3307 data = &val;
3308 break;
3309 case PACKET_FANOUT:
3310 if (len > sizeof(int))
3311 len = sizeof(int);
3312 val = (po->fanout ?
3313 ((u32)po->fanout->id |
3314 ((u32)po->fanout->type << 16)) :
3315 0);
3316 data = &val;
3317 break;
3318 default:
3319 return -ENOPROTOOPT;
3320 }
3321
3322 if (put_user(len, optlen))
3323 return -EFAULT;
3324 if (copy_to_user(optval, data, len))
3325 return -EFAULT;
3326 return 0;
3327}
3328
3329
3330static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
3331{
3332 struct sock *sk;
3333 struct hlist_node *node;
3334 struct net_device *dev = data;
3335 struct net *net = dev_net(dev);
3336
3337 rcu_read_lock();
3338 sk_for_each_rcu(sk, node, &net->packet.sklist) {
3339 struct packet_sock *po = pkt_sk(sk);
3340
3341 switch (msg) {
3342 case NETDEV_UNREGISTER:
3343 if (po->mclist)
3344 packet_dev_mclist(dev, po->mclist, -1);
3345
3346
3347 case NETDEV_DOWN:
3348 if (dev->ifindex == po->ifindex) {
3349 spin_lock(&po->bind_lock);
3350 if (po->running) {
3351 __unregister_prot_hook(sk, false);
3352 sk->sk_err = ENETDOWN;
3353 if (!sock_flag(sk, SOCK_DEAD))
3354 sk->sk_error_report(sk);
3355 }
3356 if (msg == NETDEV_UNREGISTER) {
3357 po->ifindex = -1;
3358 if (po->prot_hook.dev)
3359 dev_put(po->prot_hook.dev);
3360 po->prot_hook.dev = NULL;
3361 }
3362 spin_unlock(&po->bind_lock);
3363 }
3364 break;
3365 case NETDEV_UP:
3366 if (dev->ifindex == po->ifindex) {
3367 spin_lock(&po->bind_lock);
3368 if (po->num)
3369 register_prot_hook(sk);
3370 spin_unlock(&po->bind_lock);
3371 }
3372 break;
3373 }
3374 }
3375 rcu_read_unlock();
3376 return NOTIFY_DONE;
3377}
3378
3379
3380static int packet_ioctl(struct socket *sock, unsigned int cmd,
3381 unsigned long arg)
3382{
3383 struct sock *sk = sock->sk;
3384
3385 switch (cmd) {
3386 case SIOCOUTQ:
3387 {
3388 int amount = sk_wmem_alloc_get(sk);
3389
3390 return put_user(amount, (int __user *)arg);
3391 }
3392 case SIOCINQ:
3393 {
3394 struct sk_buff *skb;
3395 int amount = 0;
3396
3397 spin_lock_bh(&sk->sk_receive_queue.lock);
3398 skb = skb_peek(&sk->sk_receive_queue);
3399 if (skb)
3400 amount = skb->len;
3401 spin_unlock_bh(&sk->sk_receive_queue.lock);
3402 return put_user(amount, (int __user *)arg);
3403 }
3404 case SIOCGSTAMP:
3405 return sock_get_timestamp(sk, (struct timeval __user *)arg);
3406 case SIOCGSTAMPNS:
3407 return sock_get_timestampns(sk, (struct timespec __user *)arg);
3408
3409#ifdef CONFIG_INET
3410 case SIOCADDRT:
3411 case SIOCDELRT:
3412 case SIOCDARP:
3413 case SIOCGARP:
3414 case SIOCSARP:
3415 case SIOCGIFADDR:
3416 case SIOCSIFADDR:
3417 case SIOCGIFBRDADDR:
3418 case SIOCSIFBRDADDR:
3419 case SIOCGIFNETMASK:
3420 case SIOCSIFNETMASK:
3421 case SIOCGIFDSTADDR:
3422 case SIOCSIFDSTADDR:
3423 case SIOCSIFFLAGS:
3424 return inet_dgram_ops.ioctl(sock, cmd, arg);
3425#endif
3426
3427 default:
3428 return -ENOIOCTLCMD;
3429 }
3430 return 0;
3431}
3432
3433static unsigned int packet_poll(struct file *file, struct socket *sock,
3434 poll_table *wait)
3435{
3436 struct sock *sk = sock->sk;
3437 struct packet_sock *po = pkt_sk(sk);
3438 unsigned int mask = datagram_poll(file, sock, wait);
3439
3440 spin_lock_bh(&sk->sk_receive_queue.lock);
3441 if (po->rx_ring.pg_vec) {
3442 if (!packet_previous_rx_frame(po, &po->rx_ring,
3443 TP_STATUS_KERNEL))
3444 mask |= POLLIN | POLLRDNORM;
3445 }
3446 spin_unlock_bh(&sk->sk_receive_queue.lock);
3447 spin_lock_bh(&sk->sk_write_queue.lock);
3448 if (po->tx_ring.pg_vec) {
3449 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
3450 mask |= POLLOUT | POLLWRNORM;
3451 }
3452 spin_unlock_bh(&sk->sk_write_queue.lock);
3453 return mask;
3454}
3455
3456
3457
3458
3459
3460
3461static void packet_mm_open(struct vm_area_struct *vma)
3462{
3463 struct file *file = vma->vm_file;
3464 struct socket *sock = file->private_data;
3465 struct sock *sk = sock->sk;
3466
3467 if (sk)
3468 atomic_inc(&pkt_sk(sk)->mapped);
3469}
3470
3471static void packet_mm_close(struct vm_area_struct *vma)
3472{
3473 struct file *file = vma->vm_file;
3474 struct socket *sock = file->private_data;
3475 struct sock *sk = sock->sk;
3476
3477 if (sk)
3478 atomic_dec(&pkt_sk(sk)->mapped);
3479}
3480
3481static const struct vm_operations_struct packet_mmap_ops = {
3482 .open = packet_mm_open,
3483 .close = packet_mm_close,
3484};
3485
3486static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
3487 unsigned int len)
3488{
3489 int i;
3490
3491 for (i = 0; i < len; i++) {
3492 if (likely(pg_vec[i].buffer)) {
3493 if (is_vmalloc_addr(pg_vec[i].buffer))
3494 vfree(pg_vec[i].buffer);
3495 else
3496 free_pages((unsigned long)pg_vec[i].buffer,
3497 order);
3498 pg_vec[i].buffer = NULL;
3499 }
3500 }
3501 kfree(pg_vec);
3502}
3503
3504static char *alloc_one_pg_vec_page(unsigned long order)
3505{
3506 char *buffer = NULL;
3507 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
3508 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
3509
3510 buffer = (char *) __get_free_pages(gfp_flags, order);
3511
3512 if (buffer)
3513 return buffer;
3514
3515
3516
3517
3518 buffer = vzalloc((1 << order) * PAGE_SIZE);
3519
3520 if (buffer)
3521 return buffer;
3522
3523
3524
3525
3526 gfp_flags &= ~__GFP_NORETRY;
3527 buffer = (char *)__get_free_pages(gfp_flags, order);
3528 if (buffer)
3529 return buffer;
3530
3531
3532
3533
3534 return NULL;
3535}
3536
3537static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
3538{
3539 unsigned int block_nr = req->tp_block_nr;
3540 struct pgv *pg_vec;
3541 int i;
3542
3543 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
3544 if (unlikely(!pg_vec))
3545 goto out;
3546
3547 for (i = 0; i < block_nr; i++) {
3548 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
3549 if (unlikely(!pg_vec[i].buffer))
3550 goto out_free_pgvec;
3551 }
3552
3553out:
3554 return pg_vec;
3555
3556out_free_pgvec:
3557 free_pg_vec(pg_vec, order, block_nr);
3558 pg_vec = NULL;
3559 goto out;
3560}
3561
3562static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
3563 int closing, int tx_ring)
3564{
3565 struct pgv *pg_vec = NULL;
3566 struct packet_sock *po = pkt_sk(sk);
3567 int was_running, order = 0;
3568 struct packet_ring_buffer *rb;
3569 struct sk_buff_head *rb_queue;
3570 __be16 num;
3571 int err = -EINVAL;
3572
3573 struct tpacket_req *req = &req_u->req;
3574
3575
3576 if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
3577 WARN(1, "Tx-ring is not supported.\n");
3578 goto out;
3579 }
3580
3581 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
3582 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
3583
3584 err = -EBUSY;
3585 if (!closing) {
3586 if (atomic_read(&po->mapped))
3587 goto out;
3588 if (atomic_read(&rb->pending))
3589 goto out;
3590 }
3591
3592 if (req->tp_block_nr) {
3593
3594 err = -EBUSY;
3595 if (unlikely(rb->pg_vec))
3596 goto out;
3597
3598 switch (po->tp_version) {
3599 case TPACKET_V1:
3600 po->tp_hdrlen = TPACKET_HDRLEN;
3601 break;
3602 case TPACKET_V2:
3603 po->tp_hdrlen = TPACKET2_HDRLEN;
3604 break;
3605 case TPACKET_V3:
3606 po->tp_hdrlen = TPACKET3_HDRLEN;
3607 break;
3608 }
3609
3610 err = -EINVAL;
3611 if (unlikely((int)req->tp_block_size <= 0))
3612 goto out;
3613 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
3614 goto out;
3615 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
3616 po->tp_reserve))
3617 goto out;
3618 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
3619 goto out;
3620
3621 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
3622 if (unlikely(rb->frames_per_block <= 0))
3623 goto out;
3624 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
3625 req->tp_frame_nr))
3626 goto out;
3627
3628 err = -ENOMEM;
3629 order = get_order(req->tp_block_size);
3630 pg_vec = alloc_pg_vec(req, order);
3631 if (unlikely(!pg_vec))
3632 goto out;
3633 switch (po->tp_version) {
3634 case TPACKET_V3:
3635
3636
3637
3638 if (!tx_ring)
3639 init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
3640 break;
3641 default:
3642 break;
3643 }
3644 }
3645
3646 else {
3647 err = -EINVAL;
3648 if (unlikely(req->tp_frame_nr))
3649 goto out;
3650 }
3651
3652 lock_sock(sk);
3653
3654
3655 spin_lock(&po->bind_lock);
3656 was_running = po->running;
3657 num = po->num;
3658 if (was_running) {
3659 po->num = 0;
3660 __unregister_prot_hook(sk, false);
3661 }
3662 spin_unlock(&po->bind_lock);
3663
3664 synchronize_net();
3665
3666 err = -EBUSY;
3667 mutex_lock(&po->pg_vec_lock);
3668 if (closing || atomic_read(&po->mapped) == 0) {
3669 err = 0;
3670 spin_lock_bh(&rb_queue->lock);
3671 swap(rb->pg_vec, pg_vec);
3672 rb->frame_max = (req->tp_frame_nr - 1);
3673 rb->head = 0;
3674 rb->frame_size = req->tp_frame_size;
3675 spin_unlock_bh(&rb_queue->lock);
3676
3677 swap(rb->pg_vec_order, order);
3678 swap(rb->pg_vec_len, req->tp_block_nr);
3679
3680 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
3681 po->prot_hook.func = (po->rx_ring.pg_vec) ?
3682 tpacket_rcv : packet_rcv;
3683 skb_queue_purge(rb_queue);
3684 if (atomic_read(&po->mapped))
3685 pr_err("packet_mmap: vma is busy: %d\n",
3686 atomic_read(&po->mapped));
3687 }
3688 mutex_unlock(&po->pg_vec_lock);
3689
3690 spin_lock(&po->bind_lock);
3691 if (was_running) {
3692 po->num = num;
3693 register_prot_hook(sk);
3694 }
3695 spin_unlock(&po->bind_lock);
3696 if (closing && (po->tp_version > TPACKET_V2)) {
3697
3698 if (!tx_ring)
3699 prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
3700 }
3701 release_sock(sk);
3702
3703 if (pg_vec)
3704 free_pg_vec(pg_vec, order, req->tp_block_nr);
3705out:
3706 return err;
3707}
3708
3709static int packet_mmap(struct file *file, struct socket *sock,
3710 struct vm_area_struct *vma)
3711{
3712 struct sock *sk = sock->sk;
3713 struct packet_sock *po = pkt_sk(sk);
3714 unsigned long size, expected_size;
3715 struct packet_ring_buffer *rb;
3716 unsigned long start;
3717 int err = -EINVAL;
3718 int i;
3719
3720 if (vma->vm_pgoff)
3721 return -EINVAL;
3722
3723 mutex_lock(&po->pg_vec_lock);
3724
3725 expected_size = 0;
3726 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3727 if (rb->pg_vec) {
3728 expected_size += rb->pg_vec_len
3729 * rb->pg_vec_pages
3730 * PAGE_SIZE;
3731 }
3732 }
3733
3734 if (expected_size == 0)
3735 goto out;
3736
3737 size = vma->vm_end - vma->vm_start;
3738 if (size != expected_size)
3739 goto out;
3740
3741 start = vma->vm_start;
3742 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3743 if (rb->pg_vec == NULL)
3744 continue;
3745
3746 for (i = 0; i < rb->pg_vec_len; i++) {
3747 struct page *page;
3748 void *kaddr = rb->pg_vec[i].buffer;
3749 int pg_num;
3750
3751 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
3752 page = pgv_to_page(kaddr);
3753 err = vm_insert_page(vma, start, page);
3754 if (unlikely(err))
3755 goto out;
3756 start += PAGE_SIZE;
3757 kaddr += PAGE_SIZE;
3758 }
3759 }
3760 }
3761
3762 atomic_inc(&po->mapped);
3763 vma->vm_ops = &packet_mmap_ops;
3764 err = 0;
3765
3766out:
3767 mutex_unlock(&po->pg_vec_lock);
3768 return err;
3769}
3770
3771static const struct proto_ops packet_ops_spkt = {
3772 .family = PF_PACKET,
3773 .owner = THIS_MODULE,
3774 .release = packet_release,
3775 .bind = packet_bind_spkt,
3776 .connect = sock_no_connect,
3777 .socketpair = sock_no_socketpair,
3778 .accept = sock_no_accept,
3779 .getname = packet_getname_spkt,
3780 .poll = datagram_poll,
3781 .ioctl = packet_ioctl,
3782 .listen = sock_no_listen,
3783 .shutdown = sock_no_shutdown,
3784 .setsockopt = sock_no_setsockopt,
3785 .getsockopt = sock_no_getsockopt,
3786 .sendmsg = packet_sendmsg_spkt,
3787 .recvmsg = packet_recvmsg,
3788 .mmap = sock_no_mmap,
3789 .sendpage = sock_no_sendpage,
3790};
3791
3792static const struct proto_ops packet_ops = {
3793 .family = PF_PACKET,
3794 .owner = THIS_MODULE,
3795 .release = packet_release,
3796 .bind = packet_bind,
3797 .connect = sock_no_connect,
3798 .socketpair = sock_no_socketpair,
3799 .accept = sock_no_accept,
3800 .getname = packet_getname,
3801 .poll = packet_poll,
3802 .ioctl = packet_ioctl,
3803 .listen = sock_no_listen,
3804 .shutdown = sock_no_shutdown,
3805 .setsockopt = packet_setsockopt,
3806 .getsockopt = packet_getsockopt,
3807 .sendmsg = packet_sendmsg,
3808 .recvmsg = packet_recvmsg,
3809 .mmap = packet_mmap,
3810 .sendpage = sock_no_sendpage,
3811};
3812
3813static const struct net_proto_family packet_family_ops = {
3814 .family = PF_PACKET,
3815 .create = packet_create,
3816 .owner = THIS_MODULE,
3817};
3818
3819static struct notifier_block packet_netdev_notifier = {
3820 .notifier_call = packet_notifier,
3821};
3822
3823#ifdef CONFIG_PROC_FS
3824
3825static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
3826 __acquires(RCU)
3827{
3828 struct net *net = seq_file_net(seq);
3829
3830 rcu_read_lock();
3831 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
3832}
3833
3834static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3835{
3836 struct net *net = seq_file_net(seq);
3837 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
3838}
3839
3840static void packet_seq_stop(struct seq_file *seq, void *v)
3841 __releases(RCU)
3842{
3843 rcu_read_unlock();
3844}
3845
3846static int packet_seq_show(struct seq_file *seq, void *v)
3847{
3848 if (v == SEQ_START_TOKEN)
3849 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
3850 else {
3851 struct sock *s = sk_entry(v);
3852 const struct packet_sock *po = pkt_sk(s);
3853
3854 seq_printf(seq,
3855 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
3856 s,
3857 atomic_read(&s->sk_refcnt),
3858 s->sk_type,
3859 ntohs(po->num),
3860 po->ifindex,
3861 po->running,
3862 atomic_read(&s->sk_rmem_alloc),
3863 sock_i_uid(s),
3864 sock_i_ino(s));
3865 }
3866
3867 return 0;
3868}
3869
3870static const struct seq_operations packet_seq_ops = {
3871 .start = packet_seq_start,
3872 .next = packet_seq_next,
3873 .stop = packet_seq_stop,
3874 .show = packet_seq_show,
3875};
3876
3877static int packet_seq_open(struct inode *inode, struct file *file)
3878{
3879 return seq_open_net(inode, file, &packet_seq_ops,
3880 sizeof(struct seq_net_private));
3881}
3882
3883static const struct file_operations packet_seq_fops = {
3884 .owner = THIS_MODULE,
3885 .open = packet_seq_open,
3886 .read = seq_read,
3887 .llseek = seq_lseek,
3888 .release = seq_release_net,
3889};
3890
3891#endif
3892
3893static int __net_init packet_net_init(struct net *net)
3894{
3895 spin_lock_init(&net->packet.sklist_lock);
3896 INIT_HLIST_HEAD(&net->packet.sklist);
3897
3898 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
3899 return -ENOMEM;
3900
3901 return 0;
3902}
3903
3904static void __net_exit packet_net_exit(struct net *net)
3905{
3906 proc_net_remove(net, "packet");
3907}
3908
3909static struct pernet_operations packet_net_ops = {
3910 .init = packet_net_init,
3911 .exit = packet_net_exit,
3912};
3913
3914
3915static void __exit packet_exit(void)
3916{
3917 unregister_netdevice_notifier(&packet_netdev_notifier);
3918 unregister_pernet_subsys(&packet_net_ops);
3919 sock_unregister(PF_PACKET);
3920 proto_unregister(&packet_proto);
3921}
3922
3923static int __init packet_init(void)
3924{
3925 int rc = proto_register(&packet_proto, 0);
3926
3927 if (rc != 0)
3928 goto out;
3929
3930 sock_register(&packet_family_ops);
3931 register_pernet_subsys(&packet_net_ops);
3932 register_netdevice_notifier(&packet_netdev_notifier);
3933out:
3934 return rc;
3935}
3936
3937module_init(packet_init);
3938module_exit(packet_exit);
3939MODULE_LICENSE("GPL");
3940MODULE_ALIAS_NETPROTO(PF_PACKET);
3941