1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
51#include <linux/ethtool.h>
52#include <linux/filter.h>
53#include <linux/types.h>
54#include <linux/mm.h>
55#include <linux/capability.h>
56#include <linux/fcntl.h>
57#include <linux/socket.h>
58#include <linux/in.h>
59#include <linux/inet.h>
60#include <linux/netdevice.h>
61#include <linux/if_packet.h>
62#include <linux/wireless.h>
63#include <linux/kernel.h>
64#include <linux/kmod.h>
65#include <linux/slab.h>
66#include <linux/vmalloc.h>
67#include <net/net_namespace.h>
68#include <net/ip.h>
69#include <net/protocol.h>
70#include <linux/skbuff.h>
71#include <net/sock.h>
72#include <linux/errno.h>
73#include <linux/timer.h>
74#include <linux/uaccess.h>
75#include <asm/ioctls.h>
76#include <asm/page.h>
77#include <asm/cacheflush.h>
78#include <asm/io.h>
79#include <linux/proc_fs.h>
80#include <linux/seq_file.h>
81#include <linux/poll.h>
82#include <linux/module.h>
83#include <linux/init.h>
84#include <linux/mutex.h>
85#include <linux/if_vlan.h>
86#include <linux/virtio_net.h>
87#include <linux/errqueue.h>
88#include <linux/net_tstamp.h>
89#include <linux/percpu.h>
90#ifdef CONFIG_INET
91#include <net/inet_common.h>
92#endif
93#include <linux/bpf.h>
94#include <net/compat.h>
95#include <linux/netfilter_netdev.h>
96
97#include "internal.h"
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159struct packet_mreq_max {
160 int mr_ifindex;
161 unsigned short mr_type;
162 unsigned short mr_alen;
163 unsigned char mr_address[MAX_ADDR_LEN];
164};
165
166union tpacket_uhdr {
167 struct tpacket_hdr *h1;
168 struct tpacket2_hdr *h2;
169 struct tpacket3_hdr *h3;
170 void *raw;
171};
172
173static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
174 int closing, int tx_ring);
175
176#define V3_ALIGNMENT (8)
177
178#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
179
180#define BLK_PLUS_PRIV(sz_of_priv) \
181 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
182
183#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
184#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
185#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
186#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
187#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
188#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
189
190struct packet_sock;
191static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
192 struct packet_type *pt, struct net_device *orig_dev);
193
194static void *packet_previous_frame(struct packet_sock *po,
195 struct packet_ring_buffer *rb,
196 int status);
197static void packet_increment_head(struct packet_ring_buffer *buff);
198static int prb_curr_blk_in_use(struct tpacket_block_desc *);
199static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
200 struct packet_sock *);
201static void prb_retire_current_block(struct tpacket_kbdq_core *,
202 struct packet_sock *, unsigned int status);
203static int prb_queue_frozen(struct tpacket_kbdq_core *);
204static void prb_open_block(struct tpacket_kbdq_core *,
205 struct tpacket_block_desc *);
206static void prb_retire_rx_blk_timer_expired(struct timer_list *);
207static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
208static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
209static void prb_clear_rxhash(struct tpacket_kbdq_core *,
210 struct tpacket3_hdr *);
211static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
212 struct tpacket3_hdr *);
213static void packet_flush_mclist(struct sock *sk);
214static u16 packet_pick_tx_queue(struct sk_buff *skb);
215
216struct packet_skb_cb {
217 union {
218 struct sockaddr_pkt pkt;
219 union {
220
221
222
223
224 unsigned int origlen;
225 struct sockaddr_ll ll;
226 };
227 } sa;
228};
229
230#define vio_le() virtio_legacy_is_little_endian()
231
232#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
233
234#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
235#define GET_PBLOCK_DESC(x, bid) \
236 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
237#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
238 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
239#define GET_NEXT_PRB_BLK_NUM(x) \
240 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
241 ((x)->kactive_blk_num+1) : 0)
242
243static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
244static void __fanout_link(struct sock *sk, struct packet_sock *po);
245
246#ifdef CONFIG_NETFILTER_EGRESS
247static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb)
248{
249 struct sk_buff *next, *head = NULL, *tail;
250 int rc;
251
252 rcu_read_lock();
253 for (; skb != NULL; skb = next) {
254 next = skb->next;
255 skb_mark_not_on_list(skb);
256
257 if (!nf_hook_egress(skb, &rc, skb->dev))
258 continue;
259
260 if (!head)
261 head = skb;
262 else
263 tail->next = skb;
264
265 tail = skb;
266 }
267 rcu_read_unlock();
268
269 return head;
270}
271#endif
272
273static int packet_direct_xmit(struct sk_buff *skb)
274{
275#ifdef CONFIG_NETFILTER_EGRESS
276 if (nf_hook_egress_active()) {
277 skb = nf_hook_direct_egress(skb);
278 if (!skb)
279 return NET_XMIT_DROP;
280 }
281#endif
282 return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
283}
284
285static struct net_device *packet_cached_dev_get(struct packet_sock *po)
286{
287 struct net_device *dev;
288
289 rcu_read_lock();
290 dev = rcu_dereference(po->cached_dev);
291 dev_hold(dev);
292 rcu_read_unlock();
293
294 return dev;
295}
296
297static void packet_cached_dev_assign(struct packet_sock *po,
298 struct net_device *dev)
299{
300 rcu_assign_pointer(po->cached_dev, dev);
301}
302
303static void packet_cached_dev_reset(struct packet_sock *po)
304{
305 RCU_INIT_POINTER(po->cached_dev, NULL);
306}
307
308static bool packet_use_direct_xmit(const struct packet_sock *po)
309{
310 return po->xmit == packet_direct_xmit;
311}
312
313static u16 packet_pick_tx_queue(struct sk_buff *skb)
314{
315 struct net_device *dev = skb->dev;
316 const struct net_device_ops *ops = dev->netdev_ops;
317 int cpu = raw_smp_processor_id();
318 u16 queue_index;
319
320#ifdef CONFIG_XPS
321 skb->sender_cpu = cpu + 1;
322#endif
323 skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
324 if (ops->ndo_select_queue) {
325 queue_index = ops->ndo_select_queue(dev, skb, NULL);
326 queue_index = netdev_cap_txqueue(dev, queue_index);
327 } else {
328 queue_index = netdev_pick_tx(dev, skb, NULL);
329 }
330
331 return queue_index;
332}
333
334
335
336
337
338static void __register_prot_hook(struct sock *sk)
339{
340 struct packet_sock *po = pkt_sk(sk);
341
342 if (!po->running) {
343 if (po->fanout)
344 __fanout_link(sk, po);
345 else
346 dev_add_pack(&po->prot_hook);
347
348 sock_hold(sk);
349 po->running = 1;
350 }
351}
352
353static void register_prot_hook(struct sock *sk)
354{
355 lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
356 __register_prot_hook(sk);
357}
358
359
360
361
362
363
364
365static void __unregister_prot_hook(struct sock *sk, bool sync)
366{
367 struct packet_sock *po = pkt_sk(sk);
368
369 lockdep_assert_held_once(&po->bind_lock);
370
371 po->running = 0;
372
373 if (po->fanout)
374 __fanout_unlink(sk, po);
375 else
376 __dev_remove_pack(&po->prot_hook);
377
378 __sock_put(sk);
379
380 if (sync) {
381 spin_unlock(&po->bind_lock);
382 synchronize_net();
383 spin_lock(&po->bind_lock);
384 }
385}
386
387static void unregister_prot_hook(struct sock *sk, bool sync)
388{
389 struct packet_sock *po = pkt_sk(sk);
390
391 if (po->running)
392 __unregister_prot_hook(sk, sync);
393}
394
395static inline struct page * __pure pgv_to_page(void *addr)
396{
397 if (is_vmalloc_addr(addr))
398 return vmalloc_to_page(addr);
399 return virt_to_page(addr);
400}
401
402static void __packet_set_status(struct packet_sock *po, void *frame, int status)
403{
404 union tpacket_uhdr h;
405
406 h.raw = frame;
407 switch (po->tp_version) {
408 case TPACKET_V1:
409 h.h1->tp_status = status;
410 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
411 break;
412 case TPACKET_V2:
413 h.h2->tp_status = status;
414 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
415 break;
416 case TPACKET_V3:
417 h.h3->tp_status = status;
418 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
419 break;
420 default:
421 WARN(1, "TPACKET version not supported.\n");
422 BUG();
423 }
424
425 smp_wmb();
426}
427
428static int __packet_get_status(const struct packet_sock *po, void *frame)
429{
430 union tpacket_uhdr h;
431
432 smp_rmb();
433
434 h.raw = frame;
435 switch (po->tp_version) {
436 case TPACKET_V1:
437 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
438 return h.h1->tp_status;
439 case TPACKET_V2:
440 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
441 return h.h2->tp_status;
442 case TPACKET_V3:
443 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
444 return h.h3->tp_status;
445 default:
446 WARN(1, "TPACKET version not supported.\n");
447 BUG();
448 return 0;
449 }
450}
451
452static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
453 unsigned int flags)
454{
455 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
456
457 if (shhwtstamps &&
458 (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
459 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
460 return TP_STATUS_TS_RAW_HARDWARE;
461
462 if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
463 ktime_to_timespec64_cond(skb_tstamp(skb), ts))
464 return TP_STATUS_TS_SOFTWARE;
465
466 return 0;
467}
468
469static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
470 struct sk_buff *skb)
471{
472 union tpacket_uhdr h;
473 struct timespec64 ts;
474 __u32 ts_status;
475
476 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
477 return 0;
478
479 h.raw = frame;
480
481
482
483
484
485
486
487 switch (po->tp_version) {
488 case TPACKET_V1:
489 h.h1->tp_sec = ts.tv_sec;
490 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
491 break;
492 case TPACKET_V2:
493 h.h2->tp_sec = ts.tv_sec;
494 h.h2->tp_nsec = ts.tv_nsec;
495 break;
496 case TPACKET_V3:
497 h.h3->tp_sec = ts.tv_sec;
498 h.h3->tp_nsec = ts.tv_nsec;
499 break;
500 default:
501 WARN(1, "TPACKET version not supported.\n");
502 BUG();
503 }
504
505
506 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
507 smp_wmb();
508
509 return ts_status;
510}
511
512static void *packet_lookup_frame(const struct packet_sock *po,
513 const struct packet_ring_buffer *rb,
514 unsigned int position,
515 int status)
516{
517 unsigned int pg_vec_pos, frame_offset;
518 union tpacket_uhdr h;
519
520 pg_vec_pos = position / rb->frames_per_block;
521 frame_offset = position % rb->frames_per_block;
522
523 h.raw = rb->pg_vec[pg_vec_pos].buffer +
524 (frame_offset * rb->frame_size);
525
526 if (status != __packet_get_status(po, h.raw))
527 return NULL;
528
529 return h.raw;
530}
531
532static void *packet_current_frame(struct packet_sock *po,
533 struct packet_ring_buffer *rb,
534 int status)
535{
536 return packet_lookup_frame(po, rb, rb->head, status);
537}
538
539static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
540{
541 del_timer_sync(&pkc->retire_blk_timer);
542}
543
544static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
545 struct sk_buff_head *rb_queue)
546{
547 struct tpacket_kbdq_core *pkc;
548
549 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
550
551 spin_lock_bh(&rb_queue->lock);
552 pkc->delete_blk_timer = 1;
553 spin_unlock_bh(&rb_queue->lock);
554
555 prb_del_retire_blk_timer(pkc);
556}
557
558static void prb_setup_retire_blk_timer(struct packet_sock *po)
559{
560 struct tpacket_kbdq_core *pkc;
561
562 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
563 timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
564 0);
565 pkc->retire_blk_timer.expires = jiffies;
566}
567
568static int prb_calc_retire_blk_tmo(struct packet_sock *po,
569 int blk_size_in_bytes)
570{
571 struct net_device *dev;
572 unsigned int mbits, div;
573 struct ethtool_link_ksettings ecmd;
574 int err;
575
576 rtnl_lock();
577 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
578 if (unlikely(!dev)) {
579 rtnl_unlock();
580 return DEFAULT_PRB_RETIRE_TOV;
581 }
582 err = __ethtool_get_link_ksettings(dev, &ecmd);
583 rtnl_unlock();
584 if (err)
585 return DEFAULT_PRB_RETIRE_TOV;
586
587
588
589
590 if (ecmd.base.speed < SPEED_1000 ||
591 ecmd.base.speed == SPEED_UNKNOWN)
592 return DEFAULT_PRB_RETIRE_TOV;
593
594 div = ecmd.base.speed / 1000;
595 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
596
597 if (div)
598 mbits /= div;
599
600 if (div)
601 return mbits + 1;
602 return mbits;
603}
604
605static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
606 union tpacket_req_u *req_u)
607{
608 p1->feature_req_word = req_u->req3.tp_feature_req_word;
609}
610
611static void init_prb_bdqc(struct packet_sock *po,
612 struct packet_ring_buffer *rb,
613 struct pgv *pg_vec,
614 union tpacket_req_u *req_u)
615{
616 struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
617 struct tpacket_block_desc *pbd;
618
619 memset(p1, 0x0, sizeof(*p1));
620
621 p1->knxt_seq_num = 1;
622 p1->pkbdq = pg_vec;
623 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
624 p1->pkblk_start = pg_vec[0].buffer;
625 p1->kblk_size = req_u->req3.tp_block_size;
626 p1->knum_blocks = req_u->req3.tp_block_nr;
627 p1->hdrlen = po->tp_hdrlen;
628 p1->version = po->tp_version;
629 p1->last_kactive_blk_num = 0;
630 po->stats.stats3.tp_freeze_q_cnt = 0;
631 if (req_u->req3.tp_retire_blk_tov)
632 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
633 else
634 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
635 req_u->req3.tp_block_size);
636 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
637 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
638 rwlock_init(&p1->blk_fill_in_prog_lock);
639
640 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
641 prb_init_ft_ops(p1, req_u);
642 prb_setup_retire_blk_timer(po);
643 prb_open_block(p1, pbd);
644}
645
646
647
648
649static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
650{
651 mod_timer(&pkc->retire_blk_timer,
652 jiffies + pkc->tov_in_jiffies);
653 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
654}
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
680{
681 struct packet_sock *po =
682 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
683 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
684 unsigned int frozen;
685 struct tpacket_block_desc *pbd;
686
687 spin_lock(&po->sk.sk_receive_queue.lock);
688
689 frozen = prb_queue_frozen(pkc);
690 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
691
692 if (unlikely(pkc->delete_blk_timer))
693 goto out;
694
695
696
697
698
699
700
701
702
703
704 if (BLOCK_NUM_PKTS(pbd)) {
705
706 write_lock(&pkc->blk_fill_in_prog_lock);
707 write_unlock(&pkc->blk_fill_in_prog_lock);
708 }
709
710 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
711 if (!frozen) {
712 if (!BLOCK_NUM_PKTS(pbd)) {
713
714 goto refresh_timer;
715 }
716 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
717 if (!prb_dispatch_next_block(pkc, po))
718 goto refresh_timer;
719 else
720 goto out;
721 } else {
722
723
724
725 if (prb_curr_blk_in_use(pbd)) {
726
727
728
729
730 goto refresh_timer;
731 } else {
732
733
734
735
736
737
738
739 prb_open_block(pkc, pbd);
740 goto out;
741 }
742 }
743 }
744
745refresh_timer:
746 _prb_refresh_rx_retire_blk_timer(pkc);
747
748out:
749 spin_unlock(&po->sk.sk_receive_queue.lock);
750}
751
752static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
753 struct tpacket_block_desc *pbd1, __u32 status)
754{
755
756
757#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
758 u8 *start, *end;
759
760 start = (u8 *)pbd1;
761
762
763 start += PAGE_SIZE;
764
765 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
766 for (; start < end; start += PAGE_SIZE)
767 flush_dcache_page(pgv_to_page(start));
768
769 smp_wmb();
770#endif
771
772
773
774 BLOCK_STATUS(pbd1) = status;
775
776
777
778#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
779 start = (u8 *)pbd1;
780 flush_dcache_page(pgv_to_page(start));
781
782 smp_wmb();
783#endif
784}
785
786
787
788
789
790
791
792
793
794
795static void prb_close_block(struct tpacket_kbdq_core *pkc1,
796 struct tpacket_block_desc *pbd1,
797 struct packet_sock *po, unsigned int stat)
798{
799 __u32 status = TP_STATUS_USER | stat;
800
801 struct tpacket3_hdr *last_pkt;
802 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
803 struct sock *sk = &po->sk;
804
805 if (atomic_read(&po->tp_drops))
806 status |= TP_STATUS_LOSING;
807
808 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
809 last_pkt->tp_next_offset = 0;
810
811
812 if (BLOCK_NUM_PKTS(pbd1)) {
813 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
814 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
815 } else {
816
817
818
819
820
821 struct timespec64 ts;
822 ktime_get_real_ts64(&ts);
823 h1->ts_last_pkt.ts_sec = ts.tv_sec;
824 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
825 }
826
827 smp_wmb();
828
829
830 prb_flush_block(pkc1, pbd1, status);
831
832 sk->sk_data_ready(sk);
833
834 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
835}
836
837static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
838{
839 pkc->reset_pending_on_curr_blk = 0;
840}
841
842
843
844
845
846
847
848
849static void prb_open_block(struct tpacket_kbdq_core *pkc1,
850 struct tpacket_block_desc *pbd1)
851{
852 struct timespec64 ts;
853 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
854
855 smp_rmb();
856
857
858
859
860
861 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
862 BLOCK_NUM_PKTS(pbd1) = 0;
863 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
864
865 ktime_get_real_ts64(&ts);
866
867 h1->ts_first_pkt.ts_sec = ts.tv_sec;
868 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
869
870 pkc1->pkblk_start = (char *)pbd1;
871 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
872
873 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
874 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
875
876 pbd1->version = pkc1->version;
877 pkc1->prev = pkc1->nxt_offset;
878 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
879
880 prb_thaw_queue(pkc1);
881 _prb_refresh_rx_retire_blk_timer(pkc1);
882
883 smp_wmb();
884}
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
910 struct packet_sock *po)
911{
912 pkc->reset_pending_on_curr_blk = 1;
913 po->stats.stats3.tp_freeze_q_cnt++;
914}
915
916#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
917
918
919
920
921
922
923
924static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
925 struct packet_sock *po)
926{
927 struct tpacket_block_desc *pbd;
928
929 smp_rmb();
930
931
932 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
933
934
935 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
936 prb_freeze_queue(pkc, po);
937 return NULL;
938 }
939
940
941
942
943
944
945 prb_open_block(pkc, pbd);
946 return (void *)pkc->nxt_offset;
947}
948
949static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
950 struct packet_sock *po, unsigned int status)
951{
952 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
953
954
955 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
956
957
958
959
960
961
962
963
964
965 if (!(status & TP_STATUS_BLK_TMO)) {
966
967 write_lock(&pkc->blk_fill_in_prog_lock);
968 write_unlock(&pkc->blk_fill_in_prog_lock);
969 }
970 prb_close_block(pkc, pbd, po, status);
971 return;
972 }
973}
974
975static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
976{
977 return TP_STATUS_USER & BLOCK_STATUS(pbd);
978}
979
980static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
981{
982 return pkc->reset_pending_on_curr_blk;
983}
984
985static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
986 __releases(&pkc->blk_fill_in_prog_lock)
987{
988 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
989
990 read_unlock(&pkc->blk_fill_in_prog_lock);
991}
992
993static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
994 struct tpacket3_hdr *ppd)
995{
996 ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
997}
998
999static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
1000 struct tpacket3_hdr *ppd)
1001{
1002 ppd->hv1.tp_rxhash = 0;
1003}
1004
1005static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
1006 struct tpacket3_hdr *ppd)
1007{
1008 if (skb_vlan_tag_present(pkc->skb)) {
1009 ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
1010 ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
1011 ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
1012 } else {
1013 ppd->hv1.tp_vlan_tci = 0;
1014 ppd->hv1.tp_vlan_tpid = 0;
1015 ppd->tp_status = TP_STATUS_AVAILABLE;
1016 }
1017}
1018
1019static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
1020 struct tpacket3_hdr *ppd)
1021{
1022 ppd->hv1.tp_padding = 0;
1023 prb_fill_vlan_info(pkc, ppd);
1024
1025 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
1026 prb_fill_rxhash(pkc, ppd);
1027 else
1028 prb_clear_rxhash(pkc, ppd);
1029}
1030
1031static void prb_fill_curr_block(char *curr,
1032 struct tpacket_kbdq_core *pkc,
1033 struct tpacket_block_desc *pbd,
1034 unsigned int len)
1035 __acquires(&pkc->blk_fill_in_prog_lock)
1036{
1037 struct tpacket3_hdr *ppd;
1038
1039 ppd = (struct tpacket3_hdr *)curr;
1040 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
1041 pkc->prev = curr;
1042 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
1043 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
1044 BLOCK_NUM_PKTS(pbd) += 1;
1045 read_lock(&pkc->blk_fill_in_prog_lock);
1046 prb_run_all_ft_ops(pkc, ppd);
1047}
1048
1049
1050static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1051 struct sk_buff *skb,
1052 unsigned int len
1053 )
1054{
1055 struct tpacket_kbdq_core *pkc;
1056 struct tpacket_block_desc *pbd;
1057 char *curr, *end;
1058
1059 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
1060 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1061
1062
1063 if (prb_queue_frozen(pkc)) {
1064
1065
1066
1067
1068 if (prb_curr_blk_in_use(pbd)) {
1069
1070 return NULL;
1071 } else {
1072
1073
1074
1075
1076
1077
1078 prb_open_block(pkc, pbd);
1079 }
1080 }
1081
1082 smp_mb();
1083 curr = pkc->nxt_offset;
1084 pkc->skb = skb;
1085 end = (char *)pbd + pkc->kblk_size;
1086
1087
1088 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1089 prb_fill_curr_block(curr, pkc, pbd, len);
1090 return (void *)curr;
1091 }
1092
1093
1094 prb_retire_current_block(pkc, po, 0);
1095
1096
1097 curr = (char *)prb_dispatch_next_block(pkc, po);
1098 if (curr) {
1099 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1100 prb_fill_curr_block(curr, pkc, pbd, len);
1101 return (void *)curr;
1102 }
1103
1104
1105
1106
1107
1108 return NULL;
1109}
1110
1111static void *packet_current_rx_frame(struct packet_sock *po,
1112 struct sk_buff *skb,
1113 int status, unsigned int len)
1114{
1115 char *curr = NULL;
1116 switch (po->tp_version) {
1117 case TPACKET_V1:
1118 case TPACKET_V2:
1119 curr = packet_lookup_frame(po, &po->rx_ring,
1120 po->rx_ring.head, status);
1121 return curr;
1122 case TPACKET_V3:
1123 return __packet_lookup_frame_in_block(po, skb, len);
1124 default:
1125 WARN(1, "TPACKET version not supported\n");
1126 BUG();
1127 return NULL;
1128 }
1129}
1130
1131static void *prb_lookup_block(const struct packet_sock *po,
1132 const struct packet_ring_buffer *rb,
1133 unsigned int idx,
1134 int status)
1135{
1136 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1137 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1138
1139 if (status != BLOCK_STATUS(pbd))
1140 return NULL;
1141 return pbd;
1142}
1143
1144static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1145{
1146 unsigned int prev;
1147 if (rb->prb_bdqc.kactive_blk_num)
1148 prev = rb->prb_bdqc.kactive_blk_num-1;
1149 else
1150 prev = rb->prb_bdqc.knum_blocks-1;
1151 return prev;
1152}
1153
1154
1155static void *__prb_previous_block(struct packet_sock *po,
1156 struct packet_ring_buffer *rb,
1157 int status)
1158{
1159 unsigned int previous = prb_previous_blk_num(rb);
1160 return prb_lookup_block(po, rb, previous, status);
1161}
1162
1163static void *packet_previous_rx_frame(struct packet_sock *po,
1164 struct packet_ring_buffer *rb,
1165 int status)
1166{
1167 if (po->tp_version <= TPACKET_V2)
1168 return packet_previous_frame(po, rb, status);
1169
1170 return __prb_previous_block(po, rb, status);
1171}
1172
1173static void packet_increment_rx_head(struct packet_sock *po,
1174 struct packet_ring_buffer *rb)
1175{
1176 switch (po->tp_version) {
1177 case TPACKET_V1:
1178 case TPACKET_V2:
1179 return packet_increment_head(rb);
1180 case TPACKET_V3:
1181 default:
1182 WARN(1, "TPACKET version not supported.\n");
1183 BUG();
1184 return;
1185 }
1186}
1187
1188static void *packet_previous_frame(struct packet_sock *po,
1189 struct packet_ring_buffer *rb,
1190 int status)
1191{
1192 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1193 return packet_lookup_frame(po, rb, previous, status);
1194}
1195
1196static void packet_increment_head(struct packet_ring_buffer *buff)
1197{
1198 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1199}
1200
1201static void packet_inc_pending(struct packet_ring_buffer *rb)
1202{
1203 this_cpu_inc(*rb->pending_refcnt);
1204}
1205
1206static void packet_dec_pending(struct packet_ring_buffer *rb)
1207{
1208 this_cpu_dec(*rb->pending_refcnt);
1209}
1210
1211static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
1212{
1213 unsigned int refcnt = 0;
1214 int cpu;
1215
1216
1217 if (rb->pending_refcnt == NULL)
1218 return 0;
1219
1220 for_each_possible_cpu(cpu)
1221 refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
1222
1223 return refcnt;
1224}
1225
1226static int packet_alloc_pending(struct packet_sock *po)
1227{
1228 po->rx_ring.pending_refcnt = NULL;
1229
1230 po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
1231 if (unlikely(po->tx_ring.pending_refcnt == NULL))
1232 return -ENOBUFS;
1233
1234 return 0;
1235}
1236
1237static void packet_free_pending(struct packet_sock *po)
1238{
1239 free_percpu(po->tx_ring.pending_refcnt);
1240}
1241
1242#define ROOM_POW_OFF 2
1243#define ROOM_NONE 0x0
1244#define ROOM_LOW 0x1
1245#define ROOM_NORMAL 0x2
1246
1247static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
1248{
1249 int idx, len;
1250
1251 len = READ_ONCE(po->rx_ring.frame_max) + 1;
1252 idx = READ_ONCE(po->rx_ring.head);
1253 if (pow_off)
1254 idx += len >> pow_off;
1255 if (idx >= len)
1256 idx -= len;
1257 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1258}
1259
1260static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
1261{
1262 int idx, len;
1263
1264 len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
1265 idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
1266 if (pow_off)
1267 idx += len >> pow_off;
1268 if (idx >= len)
1269 idx -= len;
1270 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1271}
1272
1273static int __packet_rcv_has_room(const struct packet_sock *po,
1274 const struct sk_buff *skb)
1275{
1276 const struct sock *sk = &po->sk;
1277 int ret = ROOM_NONE;
1278
1279 if (po->prot_hook.func != tpacket_rcv) {
1280 int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1281 int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1282 - (skb ? skb->truesize : 0);
1283
1284 if (avail > (rcvbuf >> ROOM_POW_OFF))
1285 return ROOM_NORMAL;
1286 else if (avail > 0)
1287 return ROOM_LOW;
1288 else
1289 return ROOM_NONE;
1290 }
1291
1292 if (po->tp_version == TPACKET_V3) {
1293 if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1294 ret = ROOM_NORMAL;
1295 else if (__tpacket_v3_has_room(po, 0))
1296 ret = ROOM_LOW;
1297 } else {
1298 if (__tpacket_has_room(po, ROOM_POW_OFF))
1299 ret = ROOM_NORMAL;
1300 else if (__tpacket_has_room(po, 0))
1301 ret = ROOM_LOW;
1302 }
1303
1304 return ret;
1305}
1306
1307static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1308{
1309 int pressure, ret;
1310
1311 ret = __packet_rcv_has_room(po, skb);
1312 pressure = ret != ROOM_NORMAL;
1313
1314 if (READ_ONCE(po->pressure) != pressure)
1315 WRITE_ONCE(po->pressure, pressure);
1316
1317 return ret;
1318}
1319
1320static void packet_rcv_try_clear_pressure(struct packet_sock *po)
1321{
1322 if (READ_ONCE(po->pressure) &&
1323 __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
1324 WRITE_ONCE(po->pressure, 0);
1325}
1326
1327static void packet_sock_destruct(struct sock *sk)
1328{
1329 skb_queue_purge(&sk->sk_error_queue);
1330
1331 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1332 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
1333
1334 if (!sock_flag(sk, SOCK_DEAD)) {
1335 pr_err("Attempt to release alive packet socket: %p\n", sk);
1336 return;
1337 }
1338
1339 sk_refcnt_debug_dec(sk);
1340}
1341
1342static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
1343{
1344 u32 *history = po->rollover->history;
1345 u32 victim, rxhash;
1346 int i, count = 0;
1347
1348 rxhash = skb_get_hash(skb);
1349 for (i = 0; i < ROLLOVER_HLEN; i++)
1350 if (READ_ONCE(history[i]) == rxhash)
1351 count++;
1352
1353 victim = prandom_u32() % ROLLOVER_HLEN;
1354
1355
1356 if (READ_ONCE(history[victim]) != rxhash)
1357 WRITE_ONCE(history[victim], rxhash);
1358
1359 return count > (ROLLOVER_HLEN >> 1);
1360}
1361
1362static unsigned int fanout_demux_hash(struct packet_fanout *f,
1363 struct sk_buff *skb,
1364 unsigned int num)
1365{
1366 return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
1367}
1368
1369static unsigned int fanout_demux_lb(struct packet_fanout *f,
1370 struct sk_buff *skb,
1371 unsigned int num)
1372{
1373 unsigned int val = atomic_inc_return(&f->rr_cur);
1374
1375 return val % num;
1376}
1377
1378static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1379 struct sk_buff *skb,
1380 unsigned int num)
1381{
1382 return smp_processor_id() % num;
1383}
1384
1385static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1386 struct sk_buff *skb,
1387 unsigned int num)
1388{
1389 return prandom_u32_max(num);
1390}
1391
1392static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1393 struct sk_buff *skb,
1394 unsigned int idx, bool try_self,
1395 unsigned int num)
1396{
1397 struct packet_sock *po, *po_next, *po_skip = NULL;
1398 unsigned int i, j, room = ROOM_NONE;
1399
1400 po = pkt_sk(rcu_dereference(f->arr[idx]));
1401
1402 if (try_self) {
1403 room = packet_rcv_has_room(po, skb);
1404 if (room == ROOM_NORMAL ||
1405 (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
1406 return idx;
1407 po_skip = po;
1408 }
1409
1410 i = j = min_t(int, po->rollover->sock, num - 1);
1411 do {
1412 po_next = pkt_sk(rcu_dereference(f->arr[i]));
1413 if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
1414 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
1415 if (i != j)
1416 po->rollover->sock = i;
1417 atomic_long_inc(&po->rollover->num);
1418 if (room == ROOM_LOW)
1419 atomic_long_inc(&po->rollover->num_huge);
1420 return i;
1421 }
1422
1423 if (++i == num)
1424 i = 0;
1425 } while (i != j);
1426
1427 atomic_long_inc(&po->rollover->num_failed);
1428 return idx;
1429}
1430
1431static unsigned int fanout_demux_qm(struct packet_fanout *f,
1432 struct sk_buff *skb,
1433 unsigned int num)
1434{
1435 return skb_get_queue_mapping(skb) % num;
1436}
1437
1438static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1439 struct sk_buff *skb,
1440 unsigned int num)
1441{
1442 struct bpf_prog *prog;
1443 unsigned int ret = 0;
1444
1445 rcu_read_lock();
1446 prog = rcu_dereference(f->bpf_prog);
1447 if (prog)
1448 ret = bpf_prog_run_clear_cb(prog, skb) % num;
1449 rcu_read_unlock();
1450
1451 return ret;
1452}
1453
1454static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1455{
1456 return f->flags & (flag >> 8);
1457}
1458
1459static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1460 struct packet_type *pt, struct net_device *orig_dev)
1461{
1462 struct packet_fanout *f = pt->af_packet_priv;
1463 unsigned int num = READ_ONCE(f->num_members);
1464 struct net *net = read_pnet(&f->net);
1465 struct packet_sock *po;
1466 unsigned int idx;
1467
1468 if (!net_eq(dev_net(dev), net) || !num) {
1469 kfree_skb(skb);
1470 return 0;
1471 }
1472
1473 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1474 skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
1475 if (!skb)
1476 return 0;
1477 }
1478 switch (f->type) {
1479 case PACKET_FANOUT_HASH:
1480 default:
1481 idx = fanout_demux_hash(f, skb, num);
1482 break;
1483 case PACKET_FANOUT_LB:
1484 idx = fanout_demux_lb(f, skb, num);
1485 break;
1486 case PACKET_FANOUT_CPU:
1487 idx = fanout_demux_cpu(f, skb, num);
1488 break;
1489 case PACKET_FANOUT_RND:
1490 idx = fanout_demux_rnd(f, skb, num);
1491 break;
1492 case PACKET_FANOUT_QM:
1493 idx = fanout_demux_qm(f, skb, num);
1494 break;
1495 case PACKET_FANOUT_ROLLOVER:
1496 idx = fanout_demux_rollover(f, skb, 0, false, num);
1497 break;
1498 case PACKET_FANOUT_CBPF:
1499 case PACKET_FANOUT_EBPF:
1500 idx = fanout_demux_bpf(f, skb, num);
1501 break;
1502 }
1503
1504 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
1505 idx = fanout_demux_rollover(f, skb, idx, true, num);
1506
1507 po = pkt_sk(rcu_dereference(f->arr[idx]));
1508 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1509}
1510
1511DEFINE_MUTEX(fanout_mutex);
1512EXPORT_SYMBOL_GPL(fanout_mutex);
1513static LIST_HEAD(fanout_list);
1514static u16 fanout_next_id;
1515
1516static void __fanout_link(struct sock *sk, struct packet_sock *po)
1517{
1518 struct packet_fanout *f = po->fanout;
1519
1520 spin_lock(&f->lock);
1521 rcu_assign_pointer(f->arr[f->num_members], sk);
1522 smp_wmb();
1523 f->num_members++;
1524 if (f->num_members == 1)
1525 dev_add_pack(&f->prot_hook);
1526 spin_unlock(&f->lock);
1527}
1528
1529static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1530{
1531 struct packet_fanout *f = po->fanout;
1532 int i;
1533
1534 spin_lock(&f->lock);
1535 for (i = 0; i < f->num_members; i++) {
1536 if (rcu_dereference_protected(f->arr[i],
1537 lockdep_is_held(&f->lock)) == sk)
1538 break;
1539 }
1540 BUG_ON(i >= f->num_members);
1541 rcu_assign_pointer(f->arr[i],
1542 rcu_dereference_protected(f->arr[f->num_members - 1],
1543 lockdep_is_held(&f->lock)));
1544 f->num_members--;
1545 if (f->num_members == 0)
1546 __dev_remove_pack(&f->prot_hook);
1547 spin_unlock(&f->lock);
1548}
1549
1550static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1551{
1552 if (sk->sk_family != PF_PACKET)
1553 return false;
1554
1555 return ptype->af_packet_priv == pkt_sk(sk)->fanout;
1556}
1557
1558static void fanout_init_data(struct packet_fanout *f)
1559{
1560 switch (f->type) {
1561 case PACKET_FANOUT_LB:
1562 atomic_set(&f->rr_cur, 0);
1563 break;
1564 case PACKET_FANOUT_CBPF:
1565 case PACKET_FANOUT_EBPF:
1566 RCU_INIT_POINTER(f->bpf_prog, NULL);
1567 break;
1568 }
1569}
1570
1571static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
1572{
1573 struct bpf_prog *old;
1574
1575 spin_lock(&f->lock);
1576 old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
1577 rcu_assign_pointer(f->bpf_prog, new);
1578 spin_unlock(&f->lock);
1579
1580 if (old) {
1581 synchronize_net();
1582 bpf_prog_destroy(old);
1583 }
1584}
1585
1586static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
1587 unsigned int len)
1588{
1589 struct bpf_prog *new;
1590 struct sock_fprog fprog;
1591 int ret;
1592
1593 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1594 return -EPERM;
1595
1596 ret = copy_bpf_fprog_from_user(&fprog, data, len);
1597 if (ret)
1598 return ret;
1599
1600 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
1601 if (ret)
1602 return ret;
1603
1604 __fanout_set_data_bpf(po->fanout, new);
1605 return 0;
1606}
1607
1608static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
1609 unsigned int len)
1610{
1611 struct bpf_prog *new;
1612 u32 fd;
1613
1614 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1615 return -EPERM;
1616 if (len != sizeof(fd))
1617 return -EINVAL;
1618 if (copy_from_sockptr(&fd, data, len))
1619 return -EFAULT;
1620
1621 new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
1622 if (IS_ERR(new))
1623 return PTR_ERR(new);
1624
1625 __fanout_set_data_bpf(po->fanout, new);
1626 return 0;
1627}
1628
1629static int fanout_set_data(struct packet_sock *po, sockptr_t data,
1630 unsigned int len)
1631{
1632 switch (po->fanout->type) {
1633 case PACKET_FANOUT_CBPF:
1634 return fanout_set_data_cbpf(po, data, len);
1635 case PACKET_FANOUT_EBPF:
1636 return fanout_set_data_ebpf(po, data, len);
1637 default:
1638 return -EINVAL;
1639 }
1640}
1641
1642static void fanout_release_data(struct packet_fanout *f)
1643{
1644 switch (f->type) {
1645 case PACKET_FANOUT_CBPF:
1646 case PACKET_FANOUT_EBPF:
1647 __fanout_set_data_bpf(f, NULL);
1648 }
1649}
1650
1651static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
1652{
1653 struct packet_fanout *f;
1654
1655 list_for_each_entry(f, &fanout_list, list) {
1656 if (f->id == candidate_id &&
1657 read_pnet(&f->net) == sock_net(sk)) {
1658 return false;
1659 }
1660 }
1661 return true;
1662}
1663
1664static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
1665{
1666 u16 id = fanout_next_id;
1667
1668 do {
1669 if (__fanout_id_is_free(sk, id)) {
1670 *new_id = id;
1671 fanout_next_id = id + 1;
1672 return true;
1673 }
1674
1675 id++;
1676 } while (id != fanout_next_id);
1677
1678 return false;
1679}
1680
1681static int fanout_add(struct sock *sk, struct fanout_args *args)
1682{
1683 struct packet_rollover *rollover = NULL;
1684 struct packet_sock *po = pkt_sk(sk);
1685 u16 type_flags = args->type_flags;
1686 struct packet_fanout *f, *match;
1687 u8 type = type_flags & 0xff;
1688 u8 flags = type_flags >> 8;
1689 u16 id = args->id;
1690 int err;
1691
1692 switch (type) {
1693 case PACKET_FANOUT_ROLLOVER:
1694 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1695 return -EINVAL;
1696 break;
1697 case PACKET_FANOUT_HASH:
1698 case PACKET_FANOUT_LB:
1699 case PACKET_FANOUT_CPU:
1700 case PACKET_FANOUT_RND:
1701 case PACKET_FANOUT_QM:
1702 case PACKET_FANOUT_CBPF:
1703 case PACKET_FANOUT_EBPF:
1704 break;
1705 default:
1706 return -EINVAL;
1707 }
1708
1709 mutex_lock(&fanout_mutex);
1710
1711 err = -EALREADY;
1712 if (po->fanout)
1713 goto out;
1714
1715 if (type == PACKET_FANOUT_ROLLOVER ||
1716 (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
1717 err = -ENOMEM;
1718 rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
1719 if (!rollover)
1720 goto out;
1721 atomic_long_set(&rollover->num, 0);
1722 atomic_long_set(&rollover->num_huge, 0);
1723 atomic_long_set(&rollover->num_failed, 0);
1724 }
1725
1726 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
1727 if (id != 0) {
1728 err = -EINVAL;
1729 goto out;
1730 }
1731 if (!fanout_find_new_id(sk, &id)) {
1732 err = -ENOMEM;
1733 goto out;
1734 }
1735
1736 flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
1737 }
1738
1739 match = NULL;
1740 list_for_each_entry(f, &fanout_list, list) {
1741 if (f->id == id &&
1742 read_pnet(&f->net) == sock_net(sk)) {
1743 match = f;
1744 break;
1745 }
1746 }
1747 err = -EINVAL;
1748 if (match) {
1749 if (match->flags != flags)
1750 goto out;
1751 if (args->max_num_members &&
1752 args->max_num_members != match->max_num_members)
1753 goto out;
1754 } else {
1755 if (args->max_num_members > PACKET_FANOUT_MAX)
1756 goto out;
1757 if (!args->max_num_members)
1758
1759 args->max_num_members = 256;
1760 err = -ENOMEM;
1761 match = kvzalloc(struct_size(match, arr, args->max_num_members),
1762 GFP_KERNEL);
1763 if (!match)
1764 goto out;
1765 write_pnet(&match->net, sock_net(sk));
1766 match->id = id;
1767 match->type = type;
1768 match->flags = flags;
1769 INIT_LIST_HEAD(&match->list);
1770 spin_lock_init(&match->lock);
1771 refcount_set(&match->sk_ref, 0);
1772 fanout_init_data(match);
1773 match->prot_hook.type = po->prot_hook.type;
1774 match->prot_hook.dev = po->prot_hook.dev;
1775 match->prot_hook.func = packet_rcv_fanout;
1776 match->prot_hook.af_packet_priv = match;
1777 match->prot_hook.af_packet_net = read_pnet(&match->net);
1778 match->prot_hook.id_match = match_fanout_group;
1779 match->max_num_members = args->max_num_members;
1780 list_add(&match->list, &fanout_list);
1781 }
1782 err = -EINVAL;
1783
1784 spin_lock(&po->bind_lock);
1785 if (po->running &&
1786 match->type == type &&
1787 match->prot_hook.type == po->prot_hook.type &&
1788 match->prot_hook.dev == po->prot_hook.dev) {
1789 err = -ENOSPC;
1790 if (refcount_read(&match->sk_ref) < match->max_num_members) {
1791 __dev_remove_pack(&po->prot_hook);
1792
1793
1794 WRITE_ONCE(po->fanout, match);
1795
1796 po->rollover = rollover;
1797 rollover = NULL;
1798 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1799 __fanout_link(sk, po);
1800 err = 0;
1801 }
1802 }
1803 spin_unlock(&po->bind_lock);
1804
1805 if (err && !refcount_read(&match->sk_ref)) {
1806 list_del(&match->list);
1807 kvfree(match);
1808 }
1809
1810out:
1811 kfree(rollover);
1812 mutex_unlock(&fanout_mutex);
1813 return err;
1814}
1815
1816
1817
1818
1819
1820
1821static struct packet_fanout *fanout_release(struct sock *sk)
1822{
1823 struct packet_sock *po = pkt_sk(sk);
1824 struct packet_fanout *f;
1825
1826 mutex_lock(&fanout_mutex);
1827 f = po->fanout;
1828 if (f) {
1829 po->fanout = NULL;
1830
1831 if (refcount_dec_and_test(&f->sk_ref))
1832 list_del(&f->list);
1833 else
1834 f = NULL;
1835 }
1836 mutex_unlock(&fanout_mutex);
1837
1838 return f;
1839}
1840
1841static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1842 struct sk_buff *skb)
1843{
1844
1845
1846
1847
1848 if (unlikely(dev->type != ARPHRD_ETHER))
1849 return false;
1850
1851 skb_reset_mac_header(skb);
1852 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1853}
1854
1855static const struct proto_ops packet_ops;
1856
1857static const struct proto_ops packet_ops_spkt;
1858
1859static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1860 struct packet_type *pt, struct net_device *orig_dev)
1861{
1862 struct sock *sk;
1863 struct sockaddr_pkt *spkt;
1864
1865
1866
1867
1868
1869
1870 sk = pt->af_packet_priv;
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883 if (skb->pkt_type == PACKET_LOOPBACK)
1884 goto out;
1885
1886 if (!net_eq(dev_net(dev), sock_net(sk)))
1887 goto out;
1888
1889 skb = skb_share_check(skb, GFP_ATOMIC);
1890 if (skb == NULL)
1891 goto oom;
1892
1893
1894 skb_dst_drop(skb);
1895
1896
1897 nf_reset_ct(skb);
1898
1899 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1900
1901 skb_push(skb, skb->data - skb_mac_header(skb));
1902
1903
1904
1905
1906
1907 spkt->spkt_family = dev->type;
1908 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1909 spkt->spkt_protocol = skb->protocol;
1910
1911
1912
1913
1914
1915
1916 if (sock_queue_rcv_skb(sk, skb) == 0)
1917 return 0;
1918
1919out:
1920 kfree_skb(skb);
1921oom:
1922 return 0;
1923}
1924
1925static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
1926{
1927 int depth;
1928
1929 if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
1930 sock->type == SOCK_RAW) {
1931 skb_reset_mac_header(skb);
1932 skb->protocol = dev_parse_header_protocol(skb);
1933 }
1934
1935
1936 if (likely(skb->dev->type == ARPHRD_ETHER) &&
1937 eth_type_vlan(skb->protocol) &&
1938 __vlan_get_protocol(skb, skb->protocol, &depth) != 0) {
1939 if (pskb_may_pull(skb, depth))
1940 skb_set_network_header(skb, depth);
1941 }
1942
1943 skb_probe_transport_header(skb);
1944}
1945
1946
1947
1948
1949
1950
1951static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
1952 size_t len)
1953{
1954 struct sock *sk = sock->sk;
1955 DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
1956 struct sk_buff *skb = NULL;
1957 struct net_device *dev;
1958 struct sockcm_cookie sockc;
1959 __be16 proto = 0;
1960 int err;
1961 int extra_len = 0;
1962
1963
1964
1965
1966
1967 if (saddr) {
1968 if (msg->msg_namelen < sizeof(struct sockaddr))
1969 return -EINVAL;
1970 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1971 proto = saddr->spkt_protocol;
1972 } else
1973 return -ENOTCONN;
1974
1975
1976
1977
1978
1979 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1980retry:
1981 rcu_read_lock();
1982 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1983 err = -ENODEV;
1984 if (dev == NULL)
1985 goto out_unlock;
1986
1987 err = -ENETDOWN;
1988 if (!(dev->flags & IFF_UP))
1989 goto out_unlock;
1990
1991
1992
1993
1994
1995
1996 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1997 if (!netif_supports_nofcs(dev)) {
1998 err = -EPROTONOSUPPORT;
1999 goto out_unlock;
2000 }
2001 extra_len = 4;
2002 }
2003
2004 err = -EMSGSIZE;
2005 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
2006 goto out_unlock;
2007
2008 if (!skb) {
2009 size_t reserved = LL_RESERVED_SPACE(dev);
2010 int tlen = dev->needed_tailroom;
2011 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
2012
2013 rcu_read_unlock();
2014 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
2015 if (skb == NULL)
2016 return -ENOBUFS;
2017
2018
2019
2020
2021 skb_reserve(skb, reserved);
2022 skb_reset_network_header(skb);
2023
2024
2025 if (hhlen) {
2026 skb->data -= hhlen;
2027 skb->tail -= hhlen;
2028 if (len < hhlen)
2029 skb_reset_network_header(skb);
2030 }
2031 err = memcpy_from_msg(skb_put(skb, len), msg, len);
2032 if (err)
2033 goto out_free;
2034 goto retry;
2035 }
2036
2037 if (!dev_validate_header(dev, skb->data, len)) {
2038 err = -EINVAL;
2039 goto out_unlock;
2040 }
2041 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
2042 !packet_extra_vlan_len_allowed(dev, skb)) {
2043 err = -EMSGSIZE;
2044 goto out_unlock;
2045 }
2046
2047 sockcm_init(&sockc, sk);
2048 if (msg->msg_controllen) {
2049 err = sock_cmsg_send(sk, msg, &sockc);
2050 if (unlikely(err))
2051 goto out_unlock;
2052 }
2053
2054 skb->protocol = proto;
2055 skb->dev = dev;
2056 skb->priority = sk->sk_priority;
2057 skb->mark = sk->sk_mark;
2058 skb->tstamp = sockc.transmit_time;
2059
2060 skb_setup_tx_timestamp(skb, sockc.tsflags);
2061
2062 if (unlikely(extra_len == 4))
2063 skb->no_fcs = 1;
2064
2065 packet_parse_headers(skb, sock);
2066
2067 dev_queue_xmit(skb);
2068 rcu_read_unlock();
2069 return len;
2070
2071out_unlock:
2072 rcu_read_unlock();
2073out_free:
2074 kfree_skb(skb);
2075 return err;
2076}
2077
2078static unsigned int run_filter(struct sk_buff *skb,
2079 const struct sock *sk,
2080 unsigned int res)
2081{
2082 struct sk_filter *filter;
2083
2084 rcu_read_lock();
2085 filter = rcu_dereference(sk->sk_filter);
2086 if (filter != NULL)
2087 res = bpf_prog_run_clear_cb(filter->prog, skb);
2088 rcu_read_unlock();
2089
2090 return res;
2091}
2092
2093static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2094 size_t *len)
2095{
2096 struct virtio_net_hdr vnet_hdr;
2097
2098 if (*len < sizeof(vnet_hdr))
2099 return -EINVAL;
2100 *len -= sizeof(vnet_hdr);
2101
2102 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
2103 return -EINVAL;
2104
2105 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2106}
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
2121 struct packet_type *pt, struct net_device *orig_dev)
2122{
2123 struct sock *sk;
2124 struct sockaddr_ll *sll;
2125 struct packet_sock *po;
2126 u8 *skb_head = skb->data;
2127 int skb_len = skb->len;
2128 unsigned int snaplen, res;
2129 bool is_drop_n_account = false;
2130
2131 if (skb->pkt_type == PACKET_LOOPBACK)
2132 goto drop;
2133
2134 sk = pt->af_packet_priv;
2135 po = pkt_sk(sk);
2136
2137 if (!net_eq(dev_net(dev), sock_net(sk)))
2138 goto drop;
2139
2140 skb->dev = dev;
2141
2142 if (dev_has_header(dev)) {
2143
2144
2145
2146
2147
2148
2149
2150 if (sk->sk_type != SOCK_DGRAM)
2151 skb_push(skb, skb->data - skb_mac_header(skb));
2152 else if (skb->pkt_type == PACKET_OUTGOING) {
2153
2154 skb_pull(skb, skb_network_offset(skb));
2155 }
2156 }
2157
2158 snaplen = skb->len;
2159
2160 res = run_filter(skb, sk, snaplen);
2161 if (!res)
2162 goto drop_n_restore;
2163 if (snaplen > res)
2164 snaplen = res;
2165
2166 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2167 goto drop_n_acct;
2168
2169 if (skb_shared(skb)) {
2170 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2171 if (nskb == NULL)
2172 goto drop_n_acct;
2173
2174 if (skb_head != skb->data) {
2175 skb->data = skb_head;
2176 skb->len = skb_len;
2177 }
2178 consume_skb(skb);
2179 skb = nskb;
2180 }
2181
2182 sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
2183
2184 sll = &PACKET_SKB_CB(skb)->sa.ll;
2185 sll->sll_hatype = dev->type;
2186 sll->sll_pkttype = skb->pkt_type;
2187 if (unlikely(po->origdev))
2188 sll->sll_ifindex = orig_dev->ifindex;
2189 else
2190 sll->sll_ifindex = dev->ifindex;
2191
2192 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2193
2194
2195
2196
2197 PACKET_SKB_CB(skb)->sa.origlen = skb->len;
2198
2199 if (pskb_trim(skb, snaplen))
2200 goto drop_n_acct;
2201
2202 skb_set_owner_r(skb, sk);
2203 skb->dev = NULL;
2204 skb_dst_drop(skb);
2205
2206
2207 nf_reset_ct(skb);
2208
2209 spin_lock(&sk->sk_receive_queue.lock);
2210 po->stats.stats1.tp_packets++;
2211 sock_skb_set_dropcount(sk, skb);
2212 skb_clear_delivery_time(skb);
2213 __skb_queue_tail(&sk->sk_receive_queue, skb);
2214 spin_unlock(&sk->sk_receive_queue.lock);
2215 sk->sk_data_ready(sk);
2216 return 0;
2217
2218drop_n_acct:
2219 is_drop_n_account = true;
2220 atomic_inc(&po->tp_drops);
2221 atomic_inc(&sk->sk_drops);
2222
2223drop_n_restore:
2224 if (skb_head != skb->data && skb_shared(skb)) {
2225 skb->data = skb_head;
2226 skb->len = skb_len;
2227 }
2228drop:
2229 if (!is_drop_n_account)
2230 consume_skb(skb);
2231 else
2232 kfree_skb(skb);
2233 return 0;
2234}
2235
2236static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2237 struct packet_type *pt, struct net_device *orig_dev)
2238{
2239 struct sock *sk;
2240 struct packet_sock *po;
2241 struct sockaddr_ll *sll;
2242 union tpacket_uhdr h;
2243 u8 *skb_head = skb->data;
2244 int skb_len = skb->len;
2245 unsigned int snaplen, res;
2246 unsigned long status = TP_STATUS_USER;
2247 unsigned short macoff, hdrlen;
2248 unsigned int netoff;
2249 struct sk_buff *copy_skb = NULL;
2250 struct timespec64 ts;
2251 __u32 ts_status;
2252 bool is_drop_n_account = false;
2253 unsigned int slot_id = 0;
2254 bool do_vnet = false;
2255
2256
2257
2258
2259
2260 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
2261 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
2262
2263 if (skb->pkt_type == PACKET_LOOPBACK)
2264 goto drop;
2265
2266 sk = pt->af_packet_priv;
2267 po = pkt_sk(sk);
2268
2269 if (!net_eq(dev_net(dev), sock_net(sk)))
2270 goto drop;
2271
2272 if (dev_has_header(dev)) {
2273 if (sk->sk_type != SOCK_DGRAM)
2274 skb_push(skb, skb->data - skb_mac_header(skb));
2275 else if (skb->pkt_type == PACKET_OUTGOING) {
2276
2277 skb_pull(skb, skb_network_offset(skb));
2278 }
2279 }
2280
2281 snaplen = skb->len;
2282
2283 res = run_filter(skb, sk, snaplen);
2284 if (!res)
2285 goto drop_n_restore;
2286
2287
2288 if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
2289 atomic_inc(&po->tp_drops);
2290 goto drop_n_restore;
2291 }
2292
2293 if (skb->ip_summed == CHECKSUM_PARTIAL)
2294 status |= TP_STATUS_CSUMNOTREADY;
2295 else if (skb->pkt_type != PACKET_OUTGOING &&
2296 (skb->ip_summed == CHECKSUM_COMPLETE ||
2297 skb_csum_unnecessary(skb)))
2298 status |= TP_STATUS_CSUM_VALID;
2299
2300 if (snaplen > res)
2301 snaplen = res;
2302
2303 if (sk->sk_type == SOCK_DGRAM) {
2304 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
2305 po->tp_reserve;
2306 } else {
2307 unsigned int maclen = skb_network_offset(skb);
2308 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2309 (maclen < 16 ? 16 : maclen)) +
2310 po->tp_reserve;
2311 if (po->has_vnet_hdr) {
2312 netoff += sizeof(struct virtio_net_hdr);
2313 do_vnet = true;
2314 }
2315 macoff = netoff - maclen;
2316 }
2317 if (netoff > USHRT_MAX) {
2318 atomic_inc(&po->tp_drops);
2319 goto drop_n_restore;
2320 }
2321 if (po->tp_version <= TPACKET_V2) {
2322 if (macoff + snaplen > po->rx_ring.frame_size) {
2323 if (po->copy_thresh &&
2324 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
2325 if (skb_shared(skb)) {
2326 copy_skb = skb_clone(skb, GFP_ATOMIC);
2327 } else {
2328 copy_skb = skb_get(skb);
2329 skb_head = skb->data;
2330 }
2331 if (copy_skb) {
2332 memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
2333 sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
2334 skb_set_owner_r(copy_skb, sk);
2335 }
2336 }
2337 snaplen = po->rx_ring.frame_size - macoff;
2338 if ((int)snaplen < 0) {
2339 snaplen = 0;
2340 do_vnet = false;
2341 }
2342 }
2343 } else if (unlikely(macoff + snaplen >
2344 GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
2345 u32 nval;
2346
2347 nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
2348 pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
2349 snaplen, nval, macoff);
2350 snaplen = nval;
2351 if (unlikely((int)snaplen < 0)) {
2352 snaplen = 0;
2353 macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
2354 do_vnet = false;
2355 }
2356 }
2357 spin_lock(&sk->sk_receive_queue.lock);
2358 h.raw = packet_current_rx_frame(po, skb,
2359 TP_STATUS_KERNEL, (macoff+snaplen));
2360 if (!h.raw)
2361 goto drop_n_account;
2362
2363 if (po->tp_version <= TPACKET_V2) {
2364 slot_id = po->rx_ring.head;
2365 if (test_bit(slot_id, po->rx_ring.rx_owner_map))
2366 goto drop_n_account;
2367 __set_bit(slot_id, po->rx_ring.rx_owner_map);
2368 }
2369
2370 if (do_vnet &&
2371 virtio_net_hdr_from_skb(skb, h.raw + macoff -
2372 sizeof(struct virtio_net_hdr),
2373 vio_le(), true, 0)) {
2374 if (po->tp_version == TPACKET_V3)
2375 prb_clear_blk_fill_status(&po->rx_ring);
2376 goto drop_n_account;
2377 }
2378
2379 if (po->tp_version <= TPACKET_V2) {
2380 packet_increment_rx_head(po, &po->rx_ring);
2381
2382
2383
2384
2385
2386
2387 if (atomic_read(&po->tp_drops))
2388 status |= TP_STATUS_LOSING;
2389 }
2390
2391 po->stats.stats1.tp_packets++;
2392 if (copy_skb) {
2393 status |= TP_STATUS_COPY;
2394 skb_clear_delivery_time(copy_skb);
2395 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
2396 }
2397 spin_unlock(&sk->sk_receive_queue.lock);
2398
2399 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2400
2401
2402
2403
2404 ts_status = tpacket_get_timestamp(skb, &ts,
2405 po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
2406 if (!ts_status)
2407 ktime_get_real_ts64(&ts);
2408
2409 status |= ts_status;
2410
2411 switch (po->tp_version) {
2412 case TPACKET_V1:
2413 h.h1->tp_len = skb->len;
2414 h.h1->tp_snaplen = snaplen;
2415 h.h1->tp_mac = macoff;
2416 h.h1->tp_net = netoff;
2417 h.h1->tp_sec = ts.tv_sec;
2418 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
2419 hdrlen = sizeof(*h.h1);
2420 break;
2421 case TPACKET_V2:
2422 h.h2->tp_len = skb->len;
2423 h.h2->tp_snaplen = snaplen;
2424 h.h2->tp_mac = macoff;
2425 h.h2->tp_net = netoff;
2426 h.h2->tp_sec = ts.tv_sec;
2427 h.h2->tp_nsec = ts.tv_nsec;
2428 if (skb_vlan_tag_present(skb)) {
2429 h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
2430 h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
2431 status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
2432 } else {
2433 h.h2->tp_vlan_tci = 0;
2434 h.h2->tp_vlan_tpid = 0;
2435 }
2436 memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
2437 hdrlen = sizeof(*h.h2);
2438 break;
2439 case TPACKET_V3:
2440
2441
2442
2443 h.h3->tp_status |= status;
2444 h.h3->tp_len = skb->len;
2445 h.h3->tp_snaplen = snaplen;
2446 h.h3->tp_mac = macoff;
2447 h.h3->tp_net = netoff;
2448 h.h3->tp_sec = ts.tv_sec;
2449 h.h3->tp_nsec = ts.tv_nsec;
2450 memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
2451 hdrlen = sizeof(*h.h3);
2452 break;
2453 default:
2454 BUG();
2455 }
2456
2457 sll = h.raw + TPACKET_ALIGN(hdrlen);
2458 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2459 sll->sll_family = AF_PACKET;
2460 sll->sll_hatype = dev->type;
2461 sll->sll_protocol = skb->protocol;
2462 sll->sll_pkttype = skb->pkt_type;
2463 if (unlikely(po->origdev))
2464 sll->sll_ifindex = orig_dev->ifindex;
2465 else
2466 sll->sll_ifindex = dev->ifindex;
2467
2468 smp_mb();
2469
2470#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
2471 if (po->tp_version <= TPACKET_V2) {
2472 u8 *start, *end;
2473
2474 end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
2475 macoff + snaplen);
2476
2477 for (start = h.raw; start < end; start += PAGE_SIZE)
2478 flush_dcache_page(pgv_to_page(start));
2479 }
2480 smp_wmb();
2481#endif
2482
2483 if (po->tp_version <= TPACKET_V2) {
2484 spin_lock(&sk->sk_receive_queue.lock);
2485 __packet_set_status(po, h.raw, status);
2486 __clear_bit(slot_id, po->rx_ring.rx_owner_map);
2487 spin_unlock(&sk->sk_receive_queue.lock);
2488 sk->sk_data_ready(sk);
2489 } else if (po->tp_version == TPACKET_V3) {
2490 prb_clear_blk_fill_status(&po->rx_ring);
2491 }
2492
2493drop_n_restore:
2494 if (skb_head != skb->data && skb_shared(skb)) {
2495 skb->data = skb_head;
2496 skb->len = skb_len;
2497 }
2498drop:
2499 if (!is_drop_n_account)
2500 consume_skb(skb);
2501 else
2502 kfree_skb(skb);
2503 return 0;
2504
2505drop_n_account:
2506 spin_unlock(&sk->sk_receive_queue.lock);
2507 atomic_inc(&po->tp_drops);
2508 is_drop_n_account = true;
2509
2510 sk->sk_data_ready(sk);
2511 kfree_skb(copy_skb);
2512 goto drop_n_restore;
2513}
2514
2515static void tpacket_destruct_skb(struct sk_buff *skb)
2516{
2517 struct packet_sock *po = pkt_sk(skb->sk);
2518
2519 if (likely(po->tx_ring.pg_vec)) {
2520 void *ph;
2521 __u32 ts;
2522
2523 ph = skb_zcopy_get_nouarg(skb);
2524 packet_dec_pending(&po->tx_ring);
2525
2526 ts = __packet_set_timestamp(po, ph, skb);
2527 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2528
2529 if (!packet_read_pending(&po->tx_ring))
2530 complete(&po->skb_completion);
2531 }
2532
2533 sock_wfree(skb);
2534}
2535
2536static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2537{
2538 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2539 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2540 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2541 __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2542 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2543 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2544 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2545
2546 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2547 return -EINVAL;
2548
2549 return 0;
2550}
2551
2552static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2553 struct virtio_net_hdr *vnet_hdr)
2554{
2555 if (*len < sizeof(*vnet_hdr))
2556 return -EINVAL;
2557 *len -= sizeof(*vnet_hdr);
2558
2559 if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
2560 return -EFAULT;
2561
2562 return __packet_snd_vnet_parse(vnet_hdr, *len);
2563}
2564
2565static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2566 void *frame, struct net_device *dev, void *data, int tp_len,
2567 __be16 proto, unsigned char *addr, int hlen, int copylen,
2568 const struct sockcm_cookie *sockc)
2569{
2570 union tpacket_uhdr ph;
2571 int to_write, offset, len, nr_frags, len_max;
2572 struct socket *sock = po->sk.sk_socket;
2573 struct page *page;
2574 int err;
2575
2576 ph.raw = frame;
2577
2578 skb->protocol = proto;
2579 skb->dev = dev;
2580 skb->priority = po->sk.sk_priority;
2581 skb->mark = po->sk.sk_mark;
2582 skb->tstamp = sockc->transmit_time;
2583 skb_setup_tx_timestamp(skb, sockc->tsflags);
2584 skb_zcopy_set_nouarg(skb, ph.raw);
2585
2586 skb_reserve(skb, hlen);
2587 skb_reset_network_header(skb);
2588
2589 to_write = tp_len;
2590
2591 if (sock->type == SOCK_DGRAM) {
2592 err = dev_hard_header(skb, dev, ntohs(proto), addr,
2593 NULL, tp_len);
2594 if (unlikely(err < 0))
2595 return -EINVAL;
2596 } else if (copylen) {
2597 int hdrlen = min_t(int, copylen, tp_len);
2598
2599 skb_push(skb, dev->hard_header_len);
2600 skb_put(skb, copylen - dev->hard_header_len);
2601 err = skb_store_bits(skb, 0, data, hdrlen);
2602 if (unlikely(err))
2603 return err;
2604 if (!dev_validate_header(dev, skb->data, hdrlen))
2605 return -EINVAL;
2606
2607 data += hdrlen;
2608 to_write -= hdrlen;
2609 }
2610
2611 offset = offset_in_page(data);
2612 len_max = PAGE_SIZE - offset;
2613 len = ((to_write > len_max) ? len_max : to_write);
2614
2615 skb->data_len = to_write;
2616 skb->len += to_write;
2617 skb->truesize += to_write;
2618 refcount_add(to_write, &po->sk.sk_wmem_alloc);
2619
2620 while (likely(to_write)) {
2621 nr_frags = skb_shinfo(skb)->nr_frags;
2622
2623 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2624 pr_err("Packet exceed the number of skb frags(%lu)\n",
2625 MAX_SKB_FRAGS);
2626 return -EFAULT;
2627 }
2628
2629 page = pgv_to_page(data);
2630 data += len;
2631 flush_dcache_page(page);
2632 get_page(page);
2633 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2634 to_write -= len;
2635 offset = 0;
2636 len_max = PAGE_SIZE;
2637 len = ((to_write > len_max) ? len_max : to_write);
2638 }
2639
2640 packet_parse_headers(skb, sock);
2641
2642 return tp_len;
2643}
2644
2645static int tpacket_parse_header(struct packet_sock *po, void *frame,
2646 int size_max, void **data)
2647{
2648 union tpacket_uhdr ph;
2649 int tp_len, off;
2650
2651 ph.raw = frame;
2652
2653 switch (po->tp_version) {
2654 case TPACKET_V3:
2655 if (ph.h3->tp_next_offset != 0) {
2656 pr_warn_once("variable sized slot not supported");
2657 return -EINVAL;
2658 }
2659 tp_len = ph.h3->tp_len;
2660 break;
2661 case TPACKET_V2:
2662 tp_len = ph.h2->tp_len;
2663 break;
2664 default:
2665 tp_len = ph.h1->tp_len;
2666 break;
2667 }
2668 if (unlikely(tp_len > size_max)) {
2669 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2670 return -EMSGSIZE;
2671 }
2672
2673 if (unlikely(po->tp_tx_has_off)) {
2674 int off_min, off_max;
2675
2676 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2677 off_max = po->tx_ring.frame_size - tp_len;
2678 if (po->sk.sk_type == SOCK_DGRAM) {
2679 switch (po->tp_version) {
2680 case TPACKET_V3:
2681 off = ph.h3->tp_net;
2682 break;
2683 case TPACKET_V2:
2684 off = ph.h2->tp_net;
2685 break;
2686 default:
2687 off = ph.h1->tp_net;
2688 break;
2689 }
2690 } else {
2691 switch (po->tp_version) {
2692 case TPACKET_V3:
2693 off = ph.h3->tp_mac;
2694 break;
2695 case TPACKET_V2:
2696 off = ph.h2->tp_mac;
2697 break;
2698 default:
2699 off = ph.h1->tp_mac;
2700 break;
2701 }
2702 }
2703 if (unlikely((off < off_min) || (off_max < off)))
2704 return -EINVAL;
2705 } else {
2706 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2707 }
2708
2709 *data = frame + off;
2710 return tp_len;
2711}
2712
2713static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2714{
2715 struct sk_buff *skb = NULL;
2716 struct net_device *dev;
2717 struct virtio_net_hdr *vnet_hdr = NULL;
2718 struct sockcm_cookie sockc;
2719 __be16 proto;
2720 int err, reserve = 0;
2721 void *ph;
2722 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2723 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2724 unsigned char *addr = NULL;
2725 int tp_len, size_max;
2726 void *data;
2727 int len_sum = 0;
2728 int status = TP_STATUS_AVAILABLE;
2729 int hlen, tlen, copylen = 0;
2730 long timeo = 0;
2731
2732 mutex_lock(&po->pg_vec_lock);
2733
2734
2735
2736
2737 if (unlikely(!po->tx_ring.pg_vec)) {
2738 err = -EBUSY;
2739 goto out;
2740 }
2741 if (likely(saddr == NULL)) {
2742 dev = packet_cached_dev_get(po);
2743 proto = READ_ONCE(po->num);
2744 } else {
2745 err = -EINVAL;
2746 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2747 goto out;
2748 if (msg->msg_namelen < (saddr->sll_halen
2749 + offsetof(struct sockaddr_ll,
2750 sll_addr)))
2751 goto out;
2752 proto = saddr->sll_protocol;
2753 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2754 if (po->sk.sk_socket->type == SOCK_DGRAM) {
2755 if (dev && msg->msg_namelen < dev->addr_len +
2756 offsetof(struct sockaddr_ll, sll_addr))
2757 goto out_put;
2758 addr = saddr->sll_addr;
2759 }
2760 }
2761
2762 err = -ENXIO;
2763 if (unlikely(dev == NULL))
2764 goto out;
2765 err = -ENETDOWN;
2766 if (unlikely(!(dev->flags & IFF_UP)))
2767 goto out_put;
2768
2769 sockcm_init(&sockc, &po->sk);
2770 if (msg->msg_controllen) {
2771 err = sock_cmsg_send(&po->sk, msg, &sockc);
2772 if (unlikely(err))
2773 goto out_put;
2774 }
2775
2776 if (po->sk.sk_socket->type == SOCK_RAW)
2777 reserve = dev->hard_header_len;
2778 size_max = po->tx_ring.frame_size
2779 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2780
2781 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2782 size_max = dev->mtu + reserve + VLAN_HLEN;
2783
2784 reinit_completion(&po->skb_completion);
2785
2786 do {
2787 ph = packet_current_frame(po, &po->tx_ring,
2788 TP_STATUS_SEND_REQUEST);
2789 if (unlikely(ph == NULL)) {
2790 if (need_wait && skb) {
2791 timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
2792 timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
2793 if (timeo <= 0) {
2794 err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
2795 goto out_put;
2796 }
2797 }
2798
2799 continue;
2800 }
2801
2802 skb = NULL;
2803 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2804 if (tp_len < 0)
2805 goto tpacket_error;
2806
2807 status = TP_STATUS_SEND_REQUEST;
2808 hlen = LL_RESERVED_SPACE(dev);
2809 tlen = dev->needed_tailroom;
2810 if (po->has_vnet_hdr) {
2811 vnet_hdr = data;
2812 data += sizeof(*vnet_hdr);
2813 tp_len -= sizeof(*vnet_hdr);
2814 if (tp_len < 0 ||
2815 __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2816 tp_len = -EINVAL;
2817 goto tpacket_error;
2818 }
2819 copylen = __virtio16_to_cpu(vio_le(),
2820 vnet_hdr->hdr_len);
2821 }
2822 copylen = max_t(int, copylen, dev->hard_header_len);
2823 skb = sock_alloc_send_skb(&po->sk,
2824 hlen + tlen + sizeof(struct sockaddr_ll) +
2825 (copylen - dev->hard_header_len),
2826 !need_wait, &err);
2827
2828 if (unlikely(skb == NULL)) {
2829
2830 if (likely(len_sum > 0))
2831 err = len_sum;
2832 goto out_status;
2833 }
2834 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2835 addr, hlen, copylen, &sockc);
2836 if (likely(tp_len >= 0) &&
2837 tp_len > dev->mtu + reserve &&
2838 !po->has_vnet_hdr &&
2839 !packet_extra_vlan_len_allowed(dev, skb))
2840 tp_len = -EMSGSIZE;
2841
2842 if (unlikely(tp_len < 0)) {
2843tpacket_error:
2844 if (po->tp_loss) {
2845 __packet_set_status(po, ph,
2846 TP_STATUS_AVAILABLE);
2847 packet_increment_head(&po->tx_ring);
2848 kfree_skb(skb);
2849 continue;
2850 } else {
2851 status = TP_STATUS_WRONG_FORMAT;
2852 err = tp_len;
2853 goto out_status;
2854 }
2855 }
2856
2857 if (po->has_vnet_hdr) {
2858 if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
2859 tp_len = -EINVAL;
2860 goto tpacket_error;
2861 }
2862 virtio_net_hdr_set_proto(skb, vnet_hdr);
2863 }
2864
2865 skb->destructor = tpacket_destruct_skb;
2866 __packet_set_status(po, ph, TP_STATUS_SENDING);
2867 packet_inc_pending(&po->tx_ring);
2868
2869 status = TP_STATUS_SEND_REQUEST;
2870 err = po->xmit(skb);
2871 if (unlikely(err != 0)) {
2872 if (err > 0)
2873 err = net_xmit_errno(err);
2874 if (err && __packet_get_status(po, ph) ==
2875 TP_STATUS_AVAILABLE) {
2876
2877 skb = NULL;
2878 goto out_status;
2879 }
2880
2881
2882
2883
2884 err = 0;
2885 }
2886 packet_increment_head(&po->tx_ring);
2887 len_sum += tp_len;
2888 } while (likely((ph != NULL) ||
2889
2890
2891
2892
2893
2894
2895 (need_wait && packet_read_pending(&po->tx_ring))));
2896
2897 err = len_sum;
2898 goto out_put;
2899
2900out_status:
2901 __packet_set_status(po, ph, status);
2902 kfree_skb(skb);
2903out_put:
2904 dev_put(dev);
2905out:
2906 mutex_unlock(&po->pg_vec_lock);
2907 return err;
2908}
2909
2910static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2911 size_t reserve, size_t len,
2912 size_t linear, int noblock,
2913 int *err)
2914{
2915 struct sk_buff *skb;
2916
2917
2918 if (prepad + len < PAGE_SIZE || !linear)
2919 linear = len;
2920
2921 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2922 err, 0);
2923 if (!skb)
2924 return NULL;
2925
2926 skb_reserve(skb, reserve);
2927 skb_put(skb, linear);
2928 skb->data_len = len - linear;
2929 skb->len += len - linear;
2930
2931 return skb;
2932}
2933
2934static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2935{
2936 struct sock *sk = sock->sk;
2937 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2938 struct sk_buff *skb;
2939 struct net_device *dev;
2940 __be16 proto;
2941 unsigned char *addr = NULL;
2942 int err, reserve = 0;
2943 struct sockcm_cookie sockc;
2944 struct virtio_net_hdr vnet_hdr = { 0 };
2945 int offset = 0;
2946 struct packet_sock *po = pkt_sk(sk);
2947 bool has_vnet_hdr = false;
2948 int hlen, tlen, linear;
2949 int extra_len = 0;
2950
2951
2952
2953
2954
2955 if (likely(saddr == NULL)) {
2956 dev = packet_cached_dev_get(po);
2957 proto = READ_ONCE(po->num);
2958 } else {
2959 err = -EINVAL;
2960 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2961 goto out;
2962 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2963 goto out;
2964 proto = saddr->sll_protocol;
2965 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2966 if (sock->type == SOCK_DGRAM) {
2967 if (dev && msg->msg_namelen < dev->addr_len +
2968 offsetof(struct sockaddr_ll, sll_addr))
2969 goto out_unlock;
2970 addr = saddr->sll_addr;
2971 }
2972 }
2973
2974 err = -ENXIO;
2975 if (unlikely(dev == NULL))
2976 goto out_unlock;
2977 err = -ENETDOWN;
2978 if (unlikely(!(dev->flags & IFF_UP)))
2979 goto out_unlock;
2980
2981 sockcm_init(&sockc, sk);
2982 sockc.mark = sk->sk_mark;
2983 if (msg->msg_controllen) {
2984 err = sock_cmsg_send(sk, msg, &sockc);
2985 if (unlikely(err))
2986 goto out_unlock;
2987 }
2988
2989 if (sock->type == SOCK_RAW)
2990 reserve = dev->hard_header_len;
2991 if (po->has_vnet_hdr) {
2992 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2993 if (err)
2994 goto out_unlock;
2995 has_vnet_hdr = true;
2996 }
2997
2998 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2999 if (!netif_supports_nofcs(dev)) {
3000 err = -EPROTONOSUPPORT;
3001 goto out_unlock;
3002 }
3003 extra_len = 4;
3004 }
3005
3006 err = -EMSGSIZE;
3007 if (!vnet_hdr.gso_type &&
3008 (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
3009 goto out_unlock;
3010
3011 err = -ENOBUFS;
3012 hlen = LL_RESERVED_SPACE(dev);
3013 tlen = dev->needed_tailroom;
3014 linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
3015 linear = max(linear, min_t(int, len, dev->hard_header_len));
3016 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
3017 msg->msg_flags & MSG_DONTWAIT, &err);
3018 if (skb == NULL)
3019 goto out_unlock;
3020
3021 skb_reset_network_header(skb);
3022
3023 err = -EINVAL;
3024 if (sock->type == SOCK_DGRAM) {
3025 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
3026 if (unlikely(offset < 0))
3027 goto out_free;
3028 } else if (reserve) {
3029 skb_reserve(skb, -reserve);
3030 if (len < reserve + sizeof(struct ipv6hdr) &&
3031 dev->min_header_len != dev->hard_header_len)
3032 skb_reset_network_header(skb);
3033 }
3034
3035
3036 err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
3037 if (err)
3038 goto out_free;
3039
3040 if ((sock->type == SOCK_RAW &&
3041 !dev_validate_header(dev, skb->data, len)) || !skb->len) {
3042 err = -EINVAL;
3043 goto out_free;
3044 }
3045
3046 skb_setup_tx_timestamp(skb, sockc.tsflags);
3047
3048 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
3049 !packet_extra_vlan_len_allowed(dev, skb)) {
3050 err = -EMSGSIZE;
3051 goto out_free;
3052 }
3053
3054 skb->protocol = proto;
3055 skb->dev = dev;
3056 skb->priority = sk->sk_priority;
3057 skb->mark = sockc.mark;
3058 skb->tstamp = sockc.transmit_time;
3059
3060 if (unlikely(extra_len == 4))
3061 skb->no_fcs = 1;
3062
3063 packet_parse_headers(skb, sock);
3064
3065 if (has_vnet_hdr) {
3066 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
3067 if (err)
3068 goto out_free;
3069 len += sizeof(vnet_hdr);
3070 virtio_net_hdr_set_proto(skb, &vnet_hdr);
3071 }
3072
3073 err = po->xmit(skb);
3074 if (unlikely(err != 0)) {
3075 if (err > 0)
3076 err = net_xmit_errno(err);
3077 if (err)
3078 goto out_unlock;
3079 }
3080
3081 dev_put(dev);
3082
3083 return len;
3084
3085out_free:
3086 kfree_skb(skb);
3087out_unlock:
3088 dev_put(dev);
3089out:
3090 return err;
3091}
3092
3093static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
3094{
3095 struct sock *sk = sock->sk;
3096 struct packet_sock *po = pkt_sk(sk);
3097
3098
3099
3100
3101 if (data_race(po->tx_ring.pg_vec))
3102 return tpacket_snd(po, msg);
3103
3104 return packet_snd(sock, msg, len);
3105}
3106
3107
3108
3109
3110
3111
3112static int packet_release(struct socket *sock)
3113{
3114 struct sock *sk = sock->sk;
3115 struct packet_sock *po;
3116 struct packet_fanout *f;
3117 struct net *net;
3118 union tpacket_req_u req_u;
3119
3120 if (!sk)
3121 return 0;
3122
3123 net = sock_net(sk);
3124 po = pkt_sk(sk);
3125
3126 mutex_lock(&net->packet.sklist_lock);
3127 sk_del_node_init_rcu(sk);
3128 mutex_unlock(&net->packet.sklist_lock);
3129
3130 sock_prot_inuse_add(net, sk->sk_prot, -1);
3131
3132 spin_lock(&po->bind_lock);
3133 unregister_prot_hook(sk, false);
3134 packet_cached_dev_reset(po);
3135
3136 if (po->prot_hook.dev) {
3137 netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
3138 po->prot_hook.dev = NULL;
3139 }
3140 spin_unlock(&po->bind_lock);
3141
3142 packet_flush_mclist(sk);
3143
3144 lock_sock(sk);
3145 if (po->rx_ring.pg_vec) {
3146 memset(&req_u, 0, sizeof(req_u));
3147 packet_set_ring(sk, &req_u, 1, 0);
3148 }
3149
3150 if (po->tx_ring.pg_vec) {
3151 memset(&req_u, 0, sizeof(req_u));
3152 packet_set_ring(sk, &req_u, 1, 1);
3153 }
3154 release_sock(sk);
3155
3156 f = fanout_release(sk);
3157
3158 synchronize_net();
3159
3160 kfree(po->rollover);
3161 if (f) {
3162 fanout_release_data(f);
3163 kvfree(f);
3164 }
3165
3166
3167
3168 sock_orphan(sk);
3169 sock->sk = NULL;
3170
3171
3172
3173 skb_queue_purge(&sk->sk_receive_queue);
3174 packet_free_pending(po);
3175 sk_refcnt_debug_release(sk);
3176
3177 sock_put(sk);
3178 return 0;
3179}
3180
3181
3182
3183
3184
3185static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3186 __be16 proto)
3187{
3188 struct packet_sock *po = pkt_sk(sk);
3189 struct net_device *dev = NULL;
3190 bool unlisted = false;
3191 bool need_rehook;
3192 int ret = 0;
3193
3194 lock_sock(sk);
3195 spin_lock(&po->bind_lock);
3196 rcu_read_lock();
3197
3198 if (po->fanout) {
3199 ret = -EINVAL;
3200 goto out_unlock;
3201 }
3202
3203 if (name) {
3204 dev = dev_get_by_name_rcu(sock_net(sk), name);
3205 if (!dev) {
3206 ret = -ENODEV;
3207 goto out_unlock;
3208 }
3209 } else if (ifindex) {
3210 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3211 if (!dev) {
3212 ret = -ENODEV;
3213 goto out_unlock;
3214 }
3215 }
3216
3217 need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev;
3218
3219 if (need_rehook) {
3220 dev_hold(dev);
3221 if (po->running) {
3222 rcu_read_unlock();
3223
3224
3225
3226 WRITE_ONCE(po->num, 0);
3227 __unregister_prot_hook(sk, true);
3228 rcu_read_lock();
3229 if (dev)
3230 unlisted = !dev_get_by_index_rcu(sock_net(sk),
3231 dev->ifindex);
3232 }
3233
3234 BUG_ON(po->running);
3235 WRITE_ONCE(po->num, proto);
3236 po->prot_hook.type = proto;
3237
3238 netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
3239
3240 if (unlikely(unlisted)) {
3241 po->prot_hook.dev = NULL;
3242 WRITE_ONCE(po->ifindex, -1);
3243 packet_cached_dev_reset(po);
3244 } else {
3245 netdev_hold(dev, &po->prot_hook.dev_tracker,
3246 GFP_ATOMIC);
3247 po->prot_hook.dev = dev;
3248 WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
3249 packet_cached_dev_assign(po, dev);
3250 }
3251 dev_put(dev);
3252 }
3253
3254 if (proto == 0 || !need_rehook)
3255 goto out_unlock;
3256
3257 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
3258 register_prot_hook(sk);
3259 } else {
3260 sk->sk_err = ENETDOWN;
3261 if (!sock_flag(sk, SOCK_DEAD))
3262 sk_error_report(sk);
3263 }
3264
3265out_unlock:
3266 rcu_read_unlock();
3267 spin_unlock(&po->bind_lock);
3268 release_sock(sk);
3269 return ret;
3270}
3271
3272
3273
3274
3275
3276static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3277 int addr_len)
3278{
3279 struct sock *sk = sock->sk;
3280 char name[sizeof(uaddr->sa_data) + 1];
3281
3282
3283
3284
3285
3286 if (addr_len != sizeof(struct sockaddr))
3287 return -EINVAL;
3288
3289
3290
3291 memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
3292 name[sizeof(uaddr->sa_data)] = 0;
3293
3294 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
3295}
3296
3297static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
3298{
3299 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
3300 struct sock *sk = sock->sk;
3301
3302
3303
3304
3305
3306 if (addr_len < sizeof(struct sockaddr_ll))
3307 return -EINVAL;
3308 if (sll->sll_family != AF_PACKET)
3309 return -EINVAL;
3310
3311 return packet_do_bind(sk, NULL, sll->sll_ifindex,
3312 sll->sll_protocol ? : pkt_sk(sk)->num);
3313}
3314
3315static struct proto packet_proto = {
3316 .name = "PACKET",
3317 .owner = THIS_MODULE,
3318 .obj_size = sizeof(struct packet_sock),
3319};
3320
3321
3322
3323
3324
3325static int packet_create(struct net *net, struct socket *sock, int protocol,
3326 int kern)
3327{
3328 struct sock *sk;
3329 struct packet_sock *po;
3330 __be16 proto = (__force __be16)protocol;
3331 int err;
3332
3333 if (!ns_capable(net->user_ns, CAP_NET_RAW))
3334 return -EPERM;
3335 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
3336 sock->type != SOCK_PACKET)
3337 return -ESOCKTNOSUPPORT;
3338
3339 sock->state = SS_UNCONNECTED;
3340
3341 err = -ENOBUFS;
3342 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
3343 if (sk == NULL)
3344 goto out;
3345
3346 sock->ops = &packet_ops;
3347 if (sock->type == SOCK_PACKET)
3348 sock->ops = &packet_ops_spkt;
3349
3350 sock_init_data(sock, sk);
3351
3352 po = pkt_sk(sk);
3353 init_completion(&po->skb_completion);
3354 sk->sk_family = PF_PACKET;
3355 po->num = proto;
3356 po->xmit = dev_queue_xmit;
3357
3358 err = packet_alloc_pending(po);
3359 if (err)
3360 goto out2;
3361
3362 packet_cached_dev_reset(po);
3363
3364 sk->sk_destruct = packet_sock_destruct;
3365 sk_refcnt_debug_inc(sk);
3366
3367
3368
3369
3370
3371 spin_lock_init(&po->bind_lock);
3372 mutex_init(&po->pg_vec_lock);
3373 po->rollover = NULL;
3374 po->prot_hook.func = packet_rcv;
3375
3376 if (sock->type == SOCK_PACKET)
3377 po->prot_hook.func = packet_rcv_spkt;
3378
3379 po->prot_hook.af_packet_priv = sk;
3380 po->prot_hook.af_packet_net = sock_net(sk);
3381
3382 if (proto) {
3383 po->prot_hook.type = proto;
3384 __register_prot_hook(sk);
3385 }
3386
3387 mutex_lock(&net->packet.sklist_lock);
3388 sk_add_node_tail_rcu(sk, &net->packet.sklist);
3389 mutex_unlock(&net->packet.sklist_lock);
3390
3391 sock_prot_inuse_add(net, &packet_proto, 1);
3392
3393 return 0;
3394out2:
3395 sk_free(sk);
3396out:
3397 return err;
3398}
3399
3400
3401
3402
3403
3404
3405static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3406 int flags)
3407{
3408 struct sock *sk = sock->sk;
3409 struct sk_buff *skb;
3410 int copied, err;
3411 int vnet_hdr_len = 0;
3412 unsigned int origlen = 0;
3413
3414 err = -EINVAL;
3415 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
3416 goto out;
3417
3418#if 0
3419
3420 if (pkt_sk(sk)->ifindex < 0)
3421 return -ENODEV;
3422#endif
3423
3424 if (flags & MSG_ERRQUEUE) {
3425 err = sock_recv_errqueue(sk, msg, len,
3426 SOL_PACKET, PACKET_TX_TIMESTAMP);
3427 goto out;
3428 }
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439 skb = skb_recv_datagram(sk, flags, &err);
3440
3441
3442
3443
3444
3445
3446
3447 if (skb == NULL)
3448 goto out;
3449
3450 packet_rcv_try_clear_pressure(pkt_sk(sk));
3451
3452 if (pkt_sk(sk)->has_vnet_hdr) {
3453 err = packet_rcv_vnet(msg, skb, &len);
3454 if (err)
3455 goto out_free;
3456 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3457 }
3458
3459
3460
3461
3462
3463 copied = skb->len;
3464 if (copied > len) {
3465 copied = len;
3466 msg->msg_flags |= MSG_TRUNC;
3467 }
3468
3469 err = skb_copy_datagram_msg(skb, 0, msg, copied);
3470 if (err)
3471 goto out_free;
3472
3473 if (sock->type != SOCK_PACKET) {
3474 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3475
3476
3477 origlen = PACKET_SKB_CB(skb)->sa.origlen;
3478 sll->sll_family = AF_PACKET;
3479 sll->sll_protocol = skb->protocol;
3480 }
3481
3482 sock_recv_cmsgs(msg, sk, skb);
3483
3484 if (msg->msg_name) {
3485 const size_t max_len = min(sizeof(skb->cb),
3486 sizeof(struct sockaddr_storage));
3487 int copy_len;
3488
3489
3490
3491
3492 if (sock->type == SOCK_PACKET) {
3493 __sockaddr_check_size(sizeof(struct sockaddr_pkt));
3494 msg->msg_namelen = sizeof(struct sockaddr_pkt);
3495 copy_len = msg->msg_namelen;
3496 } else {
3497 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3498
3499 msg->msg_namelen = sll->sll_halen +
3500 offsetof(struct sockaddr_ll, sll_addr);
3501 copy_len = msg->msg_namelen;
3502 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
3503 memset(msg->msg_name +
3504 offsetof(struct sockaddr_ll, sll_addr),
3505 0, sizeof(sll->sll_addr));
3506 msg->msg_namelen = sizeof(struct sockaddr_ll);
3507 }
3508 }
3509 if (WARN_ON_ONCE(copy_len > max_len)) {
3510 copy_len = max_len;
3511 msg->msg_namelen = copy_len;
3512 }
3513 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
3514 }
3515
3516 if (pkt_sk(sk)->auxdata) {
3517 struct tpacket_auxdata aux;
3518
3519 aux.tp_status = TP_STATUS_USER;
3520 if (skb->ip_summed == CHECKSUM_PARTIAL)
3521 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
3522 else if (skb->pkt_type != PACKET_OUTGOING &&
3523 (skb->ip_summed == CHECKSUM_COMPLETE ||
3524 skb_csum_unnecessary(skb)))
3525 aux.tp_status |= TP_STATUS_CSUM_VALID;
3526
3527 aux.tp_len = origlen;
3528 aux.tp_snaplen = skb->len;
3529 aux.tp_mac = 0;
3530 aux.tp_net = skb_network_offset(skb);
3531 if (skb_vlan_tag_present(skb)) {
3532 aux.tp_vlan_tci = skb_vlan_tag_get(skb);
3533 aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
3534 aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
3535 } else {
3536 aux.tp_vlan_tci = 0;
3537 aux.tp_vlan_tpid = 0;
3538 }
3539 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
3540 }
3541
3542
3543
3544
3545
3546 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
3547
3548out_free:
3549 skb_free_datagram(sk, skb);
3550out:
3551 return err;
3552}
3553
3554static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3555 int peer)
3556{
3557 struct net_device *dev;
3558 struct sock *sk = sock->sk;
3559
3560 if (peer)
3561 return -EOPNOTSUPP;
3562
3563 uaddr->sa_family = AF_PACKET;
3564 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
3565 rcu_read_lock();
3566 dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
3567 if (dev)
3568 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3569 rcu_read_unlock();
3570
3571 return sizeof(*uaddr);
3572}
3573
3574static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3575 int peer)
3576{
3577 struct net_device *dev;
3578 struct sock *sk = sock->sk;
3579 struct packet_sock *po = pkt_sk(sk);
3580 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
3581 int ifindex;
3582
3583 if (peer)
3584 return -EOPNOTSUPP;
3585
3586 ifindex = READ_ONCE(po->ifindex);
3587 sll->sll_family = AF_PACKET;
3588 sll->sll_ifindex = ifindex;
3589 sll->sll_protocol = READ_ONCE(po->num);
3590 sll->sll_pkttype = 0;
3591 rcu_read_lock();
3592 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3593 if (dev) {
3594 sll->sll_hatype = dev->type;
3595 sll->sll_halen = dev->addr_len;
3596 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
3597 } else {
3598 sll->sll_hatype = 0;
3599 sll->sll_halen = 0;
3600 }
3601 rcu_read_unlock();
3602
3603 return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3604}
3605
3606static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
3607 int what)
3608{
3609 switch (i->type) {
3610 case PACKET_MR_MULTICAST:
3611 if (i->alen != dev->addr_len)
3612 return -EINVAL;
3613 if (what > 0)
3614 return dev_mc_add(dev, i->addr);
3615 else
3616 return dev_mc_del(dev, i->addr);
3617 break;
3618 case PACKET_MR_PROMISC:
3619 return dev_set_promiscuity(dev, what);
3620 case PACKET_MR_ALLMULTI:
3621 return dev_set_allmulti(dev, what);
3622 case PACKET_MR_UNICAST:
3623 if (i->alen != dev->addr_len)
3624 return -EINVAL;
3625 if (what > 0)
3626 return dev_uc_add(dev, i->addr);
3627 else
3628 return dev_uc_del(dev, i->addr);
3629 break;
3630 default:
3631 break;
3632 }
3633 return 0;
3634}
3635
3636static void packet_dev_mclist_delete(struct net_device *dev,
3637 struct packet_mclist **mlp)
3638{
3639 struct packet_mclist *ml;
3640
3641 while ((ml = *mlp) != NULL) {
3642 if (ml->ifindex == dev->ifindex) {
3643 packet_dev_mc(dev, ml, -1);
3644 *mlp = ml->next;
3645 kfree(ml);
3646 } else
3647 mlp = &ml->next;
3648 }
3649}
3650
3651static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3652{
3653 struct packet_sock *po = pkt_sk(sk);
3654 struct packet_mclist *ml, *i;
3655 struct net_device *dev;
3656 int err;
3657
3658 rtnl_lock();
3659
3660 err = -ENODEV;
3661 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
3662 if (!dev)
3663 goto done;
3664
3665 err = -EINVAL;
3666 if (mreq->mr_alen > dev->addr_len)
3667 goto done;
3668
3669 err = -ENOBUFS;
3670 i = kmalloc(sizeof(*i), GFP_KERNEL);
3671 if (i == NULL)
3672 goto done;
3673
3674 err = 0;
3675 for (ml = po->mclist; ml; ml = ml->next) {
3676 if (ml->ifindex == mreq->mr_ifindex &&
3677 ml->type == mreq->mr_type &&
3678 ml->alen == mreq->mr_alen &&
3679 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3680 ml->count++;
3681
3682 kfree(i);
3683 goto done;
3684 }
3685 }
3686
3687 i->type = mreq->mr_type;
3688 i->ifindex = mreq->mr_ifindex;
3689 i->alen = mreq->mr_alen;
3690 memcpy(i->addr, mreq->mr_address, i->alen);
3691 memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
3692 i->count = 1;
3693 i->next = po->mclist;
3694 po->mclist = i;
3695 err = packet_dev_mc(dev, i, 1);
3696 if (err) {
3697 po->mclist = i->next;
3698 kfree(i);
3699 }
3700
3701done:
3702 rtnl_unlock();
3703 return err;
3704}
3705
3706static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
3707{
3708 struct packet_mclist *ml, **mlp;
3709
3710 rtnl_lock();
3711
3712 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
3713 if (ml->ifindex == mreq->mr_ifindex &&
3714 ml->type == mreq->mr_type &&
3715 ml->alen == mreq->mr_alen &&
3716 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3717 if (--ml->count == 0) {
3718 struct net_device *dev;
3719 *mlp = ml->next;
3720 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3721 if (dev)
3722 packet_dev_mc(dev, ml, -1);
3723 kfree(ml);
3724 }
3725 break;
3726 }
3727 }
3728 rtnl_unlock();
3729 return 0;
3730}
3731
3732static void packet_flush_mclist(struct sock *sk)
3733{
3734 struct packet_sock *po = pkt_sk(sk);
3735 struct packet_mclist *ml;
3736
3737 if (!po->mclist)
3738 return;
3739
3740 rtnl_lock();
3741 while ((ml = po->mclist) != NULL) {
3742 struct net_device *dev;
3743
3744 po->mclist = ml->next;
3745 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3746 if (dev != NULL)
3747 packet_dev_mc(dev, ml, -1);
3748 kfree(ml);
3749 }
3750 rtnl_unlock();
3751}
3752
3753static int
3754packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
3755 unsigned int optlen)
3756{
3757 struct sock *sk = sock->sk;
3758 struct packet_sock *po = pkt_sk(sk);
3759 int ret;
3760
3761 if (level != SOL_PACKET)
3762 return -ENOPROTOOPT;
3763
3764 switch (optname) {
3765 case PACKET_ADD_MEMBERSHIP:
3766 case PACKET_DROP_MEMBERSHIP:
3767 {
3768 struct packet_mreq_max mreq;
3769 int len = optlen;
3770 memset(&mreq, 0, sizeof(mreq));
3771 if (len < sizeof(struct packet_mreq))
3772 return -EINVAL;
3773 if (len > sizeof(mreq))
3774 len = sizeof(mreq);
3775 if (copy_from_sockptr(&mreq, optval, len))
3776 return -EFAULT;
3777 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3778 return -EINVAL;
3779 if (optname == PACKET_ADD_MEMBERSHIP)
3780 ret = packet_mc_add(sk, &mreq);
3781 else
3782 ret = packet_mc_drop(sk, &mreq);
3783 return ret;
3784 }
3785
3786 case PACKET_RX_RING:
3787 case PACKET_TX_RING:
3788 {
3789 union tpacket_req_u req_u;
3790 int len;
3791
3792 lock_sock(sk);
3793 switch (po->tp_version) {
3794 case TPACKET_V1:
3795 case TPACKET_V2:
3796 len = sizeof(req_u.req);
3797 break;
3798 case TPACKET_V3:
3799 default:
3800 len = sizeof(req_u.req3);
3801 break;
3802 }
3803 if (optlen < len) {
3804 ret = -EINVAL;
3805 } else {
3806 if (copy_from_sockptr(&req_u.req, optval, len))
3807 ret = -EFAULT;
3808 else
3809 ret = packet_set_ring(sk, &req_u, 0,
3810 optname == PACKET_TX_RING);
3811 }
3812 release_sock(sk);
3813 return ret;
3814 }
3815 case PACKET_COPY_THRESH:
3816 {
3817 int val;
3818
3819 if (optlen != sizeof(val))
3820 return -EINVAL;
3821 if (copy_from_sockptr(&val, optval, sizeof(val)))
3822 return -EFAULT;
3823
3824 pkt_sk(sk)->copy_thresh = val;
3825 return 0;
3826 }
3827 case PACKET_VERSION:
3828 {
3829 int val;
3830
3831 if (optlen != sizeof(val))
3832 return -EINVAL;
3833 if (copy_from_sockptr(&val, optval, sizeof(val)))
3834 return -EFAULT;
3835 switch (val) {
3836 case TPACKET_V1:
3837 case TPACKET_V2:
3838 case TPACKET_V3:
3839 break;
3840 default:
3841 return -EINVAL;
3842 }
3843 lock_sock(sk);
3844 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3845 ret = -EBUSY;
3846 } else {
3847 po->tp_version = val;
3848 ret = 0;
3849 }
3850 release_sock(sk);
3851 return ret;
3852 }
3853 case PACKET_RESERVE:
3854 {
3855 unsigned int val;
3856
3857 if (optlen != sizeof(val))
3858 return -EINVAL;
3859 if (copy_from_sockptr(&val, optval, sizeof(val)))
3860 return -EFAULT;
3861 if (val > INT_MAX)
3862 return -EINVAL;
3863 lock_sock(sk);
3864 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3865 ret = -EBUSY;
3866 } else {
3867 po->tp_reserve = val;
3868 ret = 0;
3869 }
3870 release_sock(sk);
3871 return ret;
3872 }
3873 case PACKET_LOSS:
3874 {
3875 unsigned int val;
3876
3877 if (optlen != sizeof(val))
3878 return -EINVAL;
3879 if (copy_from_sockptr(&val, optval, sizeof(val)))
3880 return -EFAULT;
3881
3882 lock_sock(sk);
3883 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3884 ret = -EBUSY;
3885 } else {
3886 po->tp_loss = !!val;
3887 ret = 0;
3888 }
3889 release_sock(sk);
3890 return ret;
3891 }
3892 case PACKET_AUXDATA:
3893 {
3894 int val;
3895
3896 if (optlen < sizeof(val))
3897 return -EINVAL;
3898 if (copy_from_sockptr(&val, optval, sizeof(val)))
3899 return -EFAULT;
3900
3901 lock_sock(sk);
3902 po->auxdata = !!val;
3903 release_sock(sk);
3904 return 0;
3905 }
3906 case PACKET_ORIGDEV:
3907 {
3908 int val;
3909
3910 if (optlen < sizeof(val))
3911 return -EINVAL;
3912 if (copy_from_sockptr(&val, optval, sizeof(val)))
3913 return -EFAULT;
3914
3915 lock_sock(sk);
3916 po->origdev = !!val;
3917 release_sock(sk);
3918 return 0;
3919 }
3920 case PACKET_VNET_HDR:
3921 {
3922 int val;
3923
3924 if (sock->type != SOCK_RAW)
3925 return -EINVAL;
3926 if (optlen < sizeof(val))
3927 return -EINVAL;
3928 if (copy_from_sockptr(&val, optval, sizeof(val)))
3929 return -EFAULT;
3930
3931 lock_sock(sk);
3932 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3933 ret = -EBUSY;
3934 } else {
3935 po->has_vnet_hdr = !!val;
3936 ret = 0;
3937 }
3938 release_sock(sk);
3939 return ret;
3940 }
3941 case PACKET_TIMESTAMP:
3942 {
3943 int val;
3944
3945 if (optlen != sizeof(val))
3946 return -EINVAL;
3947 if (copy_from_sockptr(&val, optval, sizeof(val)))
3948 return -EFAULT;
3949
3950 po->tp_tstamp = val;
3951 return 0;
3952 }
3953 case PACKET_FANOUT:
3954 {
3955 struct fanout_args args = { 0 };
3956
3957 if (optlen != sizeof(int) && optlen != sizeof(args))
3958 return -EINVAL;
3959 if (copy_from_sockptr(&args, optval, optlen))
3960 return -EFAULT;
3961
3962 return fanout_add(sk, &args);
3963 }
3964 case PACKET_FANOUT_DATA:
3965 {
3966
3967 if (!READ_ONCE(po->fanout))
3968 return -EINVAL;
3969
3970 return fanout_set_data(po, optval, optlen);
3971 }
3972 case PACKET_IGNORE_OUTGOING:
3973 {
3974 int val;
3975
3976 if (optlen != sizeof(val))
3977 return -EINVAL;
3978 if (copy_from_sockptr(&val, optval, sizeof(val)))
3979 return -EFAULT;
3980 if (val < 0 || val > 1)
3981 return -EINVAL;
3982
3983 po->prot_hook.ignore_outgoing = !!val;
3984 return 0;
3985 }
3986 case PACKET_TX_HAS_OFF:
3987 {
3988 unsigned int val;
3989
3990 if (optlen != sizeof(val))
3991 return -EINVAL;
3992 if (copy_from_sockptr(&val, optval, sizeof(val)))
3993 return -EFAULT;
3994
3995 lock_sock(sk);
3996 if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
3997 po->tp_tx_has_off = !!val;
3998
3999 release_sock(sk);
4000 return 0;
4001 }
4002 case PACKET_QDISC_BYPASS:
4003 {
4004 int val;
4005
4006 if (optlen != sizeof(val))
4007 return -EINVAL;
4008 if (copy_from_sockptr(&val, optval, sizeof(val)))
4009 return -EFAULT;
4010
4011 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
4012 return 0;
4013 }
4014 default:
4015 return -ENOPROTOOPT;
4016 }
4017}
4018
4019static int packet_getsockopt(struct socket *sock, int level, int optname,
4020 char __user *optval, int __user *optlen)
4021{
4022 int len;
4023 int val, lv = sizeof(val);
4024 struct sock *sk = sock->sk;
4025 struct packet_sock *po = pkt_sk(sk);
4026 void *data = &val;
4027 union tpacket_stats_u st;
4028 struct tpacket_rollover_stats rstats;
4029 int drops;
4030
4031 if (level != SOL_PACKET)
4032 return -ENOPROTOOPT;
4033
4034 if (get_user(len, optlen))
4035 return -EFAULT;
4036
4037 if (len < 0)
4038 return -EINVAL;
4039
4040 switch (optname) {
4041 case PACKET_STATISTICS:
4042 spin_lock_bh(&sk->sk_receive_queue.lock);
4043 memcpy(&st, &po->stats, sizeof(st));
4044 memset(&po->stats, 0, sizeof(po->stats));
4045 spin_unlock_bh(&sk->sk_receive_queue.lock);
4046 drops = atomic_xchg(&po->tp_drops, 0);
4047
4048 if (po->tp_version == TPACKET_V3) {
4049 lv = sizeof(struct tpacket_stats_v3);
4050 st.stats3.tp_drops = drops;
4051 st.stats3.tp_packets += drops;
4052 data = &st.stats3;
4053 } else {
4054 lv = sizeof(struct tpacket_stats);
4055 st.stats1.tp_drops = drops;
4056 st.stats1.tp_packets += drops;
4057 data = &st.stats1;
4058 }
4059
4060 break;
4061 case PACKET_AUXDATA:
4062 val = po->auxdata;
4063 break;
4064 case PACKET_ORIGDEV:
4065 val = po->origdev;
4066 break;
4067 case PACKET_VNET_HDR:
4068 val = po->has_vnet_hdr;
4069 break;
4070 case PACKET_VERSION:
4071 val = po->tp_version;
4072 break;
4073 case PACKET_HDRLEN:
4074 if (len > sizeof(int))
4075 len = sizeof(int);
4076 if (len < sizeof(int))
4077 return -EINVAL;
4078 if (copy_from_user(&val, optval, len))
4079 return -EFAULT;
4080 switch (val) {
4081 case TPACKET_V1:
4082 val = sizeof(struct tpacket_hdr);
4083 break;
4084 case TPACKET_V2:
4085 val = sizeof(struct tpacket2_hdr);
4086 break;
4087 case TPACKET_V3:
4088 val = sizeof(struct tpacket3_hdr);
4089 break;
4090 default:
4091 return -EINVAL;
4092 }
4093 break;
4094 case PACKET_RESERVE:
4095 val = po->tp_reserve;
4096 break;
4097 case PACKET_LOSS:
4098 val = po->tp_loss;
4099 break;
4100 case PACKET_TIMESTAMP:
4101 val = po->tp_tstamp;
4102 break;
4103 case PACKET_FANOUT:
4104 val = (po->fanout ?
4105 ((u32)po->fanout->id |
4106 ((u32)po->fanout->type << 16) |
4107 ((u32)po->fanout->flags << 24)) :
4108 0);
4109 break;
4110 case PACKET_IGNORE_OUTGOING:
4111 val = po->prot_hook.ignore_outgoing;
4112 break;
4113 case PACKET_ROLLOVER_STATS:
4114 if (!po->rollover)
4115 return -EINVAL;
4116 rstats.tp_all = atomic_long_read(&po->rollover->num);
4117 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
4118 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
4119 data = &rstats;
4120 lv = sizeof(rstats);
4121 break;
4122 case PACKET_TX_HAS_OFF:
4123 val = po->tp_tx_has_off;
4124 break;
4125 case PACKET_QDISC_BYPASS:
4126 val = packet_use_direct_xmit(po);
4127 break;
4128 default:
4129 return -ENOPROTOOPT;
4130 }
4131
4132 if (len > lv)
4133 len = lv;
4134 if (put_user(len, optlen))
4135 return -EFAULT;
4136 if (copy_to_user(optval, data, len))
4137 return -EFAULT;
4138 return 0;
4139}
4140
4141static int packet_notifier(struct notifier_block *this,
4142 unsigned long msg, void *ptr)
4143{
4144 struct sock *sk;
4145 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4146 struct net *net = dev_net(dev);
4147
4148 rcu_read_lock();
4149 sk_for_each_rcu(sk, &net->packet.sklist) {
4150 struct packet_sock *po = pkt_sk(sk);
4151
4152 switch (msg) {
4153 case NETDEV_UNREGISTER:
4154 if (po->mclist)
4155 packet_dev_mclist_delete(dev, &po->mclist);
4156 fallthrough;
4157
4158 case NETDEV_DOWN:
4159 if (dev->ifindex == po->ifindex) {
4160 spin_lock(&po->bind_lock);
4161 if (po->running) {
4162 __unregister_prot_hook(sk, false);
4163 sk->sk_err = ENETDOWN;
4164 if (!sock_flag(sk, SOCK_DEAD))
4165 sk_error_report(sk);
4166 }
4167 if (msg == NETDEV_UNREGISTER) {
4168 packet_cached_dev_reset(po);
4169 WRITE_ONCE(po->ifindex, -1);
4170 netdev_put(po->prot_hook.dev,
4171 &po->prot_hook.dev_tracker);
4172 po->prot_hook.dev = NULL;
4173 }
4174 spin_unlock(&po->bind_lock);
4175 }
4176 break;
4177 case NETDEV_UP:
4178 if (dev->ifindex == po->ifindex) {
4179 spin_lock(&po->bind_lock);
4180 if (po->num)
4181 register_prot_hook(sk);
4182 spin_unlock(&po->bind_lock);
4183 }
4184 break;
4185 }
4186 }
4187 rcu_read_unlock();
4188 return NOTIFY_DONE;
4189}
4190
4191
4192static int packet_ioctl(struct socket *sock, unsigned int cmd,
4193 unsigned long arg)
4194{
4195 struct sock *sk = sock->sk;
4196
4197 switch (cmd) {
4198 case SIOCOUTQ:
4199 {
4200 int amount = sk_wmem_alloc_get(sk);
4201
4202 return put_user(amount, (int __user *)arg);
4203 }
4204 case SIOCINQ:
4205 {
4206 struct sk_buff *skb;
4207 int amount = 0;
4208
4209 spin_lock_bh(&sk->sk_receive_queue.lock);
4210 skb = skb_peek(&sk->sk_receive_queue);
4211 if (skb)
4212 amount = skb->len;
4213 spin_unlock_bh(&sk->sk_receive_queue.lock);
4214 return put_user(amount, (int __user *)arg);
4215 }
4216#ifdef CONFIG_INET
4217 case SIOCADDRT:
4218 case SIOCDELRT:
4219 case SIOCDARP:
4220 case SIOCGARP:
4221 case SIOCSARP:
4222 case SIOCGIFADDR:
4223 case SIOCSIFADDR:
4224 case SIOCGIFBRDADDR:
4225 case SIOCSIFBRDADDR:
4226 case SIOCGIFNETMASK:
4227 case SIOCSIFNETMASK:
4228 case SIOCGIFDSTADDR:
4229 case SIOCSIFDSTADDR:
4230 case SIOCSIFFLAGS:
4231 return inet_dgram_ops.ioctl(sock, cmd, arg);
4232#endif
4233
4234 default:
4235 return -ENOIOCTLCMD;
4236 }
4237 return 0;
4238}
4239
4240static __poll_t packet_poll(struct file *file, struct socket *sock,
4241 poll_table *wait)
4242{
4243 struct sock *sk = sock->sk;
4244 struct packet_sock *po = pkt_sk(sk);
4245 __poll_t mask = datagram_poll(file, sock, wait);
4246
4247 spin_lock_bh(&sk->sk_receive_queue.lock);
4248 if (po->rx_ring.pg_vec) {
4249 if (!packet_previous_rx_frame(po, &po->rx_ring,
4250 TP_STATUS_KERNEL))
4251 mask |= EPOLLIN | EPOLLRDNORM;
4252 }
4253 packet_rcv_try_clear_pressure(po);
4254 spin_unlock_bh(&sk->sk_receive_queue.lock);
4255 spin_lock_bh(&sk->sk_write_queue.lock);
4256 if (po->tx_ring.pg_vec) {
4257 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
4258 mask |= EPOLLOUT | EPOLLWRNORM;
4259 }
4260 spin_unlock_bh(&sk->sk_write_queue.lock);
4261 return mask;
4262}
4263
4264
4265
4266
4267
4268
4269static void packet_mm_open(struct vm_area_struct *vma)
4270{
4271 struct file *file = vma->vm_file;
4272 struct socket *sock = file->private_data;
4273 struct sock *sk = sock->sk;
4274
4275 if (sk)
4276 atomic_inc(&pkt_sk(sk)->mapped);
4277}
4278
4279static void packet_mm_close(struct vm_area_struct *vma)
4280{
4281 struct file *file = vma->vm_file;
4282 struct socket *sock = file->private_data;
4283 struct sock *sk = sock->sk;
4284
4285 if (sk)
4286 atomic_dec(&pkt_sk(sk)->mapped);
4287}
4288
4289static const struct vm_operations_struct packet_mmap_ops = {
4290 .open = packet_mm_open,
4291 .close = packet_mm_close,
4292};
4293
4294static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
4295 unsigned int len)
4296{
4297 int i;
4298
4299 for (i = 0; i < len; i++) {
4300 if (likely(pg_vec[i].buffer)) {
4301 if (is_vmalloc_addr(pg_vec[i].buffer))
4302 vfree(pg_vec[i].buffer);
4303 else
4304 free_pages((unsigned long)pg_vec[i].buffer,
4305 order);
4306 pg_vec[i].buffer = NULL;
4307 }
4308 }
4309 kfree(pg_vec);
4310}
4311
4312static char *alloc_one_pg_vec_page(unsigned long order)
4313{
4314 char *buffer;
4315 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
4316 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
4317
4318 buffer = (char *) __get_free_pages(gfp_flags, order);
4319 if (buffer)
4320 return buffer;
4321
4322
4323 buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
4324 if (buffer)
4325 return buffer;
4326
4327
4328 gfp_flags &= ~__GFP_NORETRY;
4329 buffer = (char *) __get_free_pages(gfp_flags, order);
4330 if (buffer)
4331 return buffer;
4332
4333
4334 return NULL;
4335}
4336
4337static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4338{
4339 unsigned int block_nr = req->tp_block_nr;
4340 struct pgv *pg_vec;
4341 int i;
4342
4343 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
4344 if (unlikely(!pg_vec))
4345 goto out;
4346
4347 for (i = 0; i < block_nr; i++) {
4348 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
4349 if (unlikely(!pg_vec[i].buffer))
4350 goto out_free_pgvec;
4351 }
4352
4353out:
4354 return pg_vec;
4355
4356out_free_pgvec:
4357 free_pg_vec(pg_vec, order, block_nr);
4358 pg_vec = NULL;
4359 goto out;
4360}
4361
4362static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4363 int closing, int tx_ring)
4364{
4365 struct pgv *pg_vec = NULL;
4366 struct packet_sock *po = pkt_sk(sk);
4367 unsigned long *rx_owner_map = NULL;
4368 int was_running, order = 0;
4369 struct packet_ring_buffer *rb;
4370 struct sk_buff_head *rb_queue;
4371 __be16 num;
4372 int err;
4373
4374 struct tpacket_req *req = &req_u->req;
4375
4376 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
4377 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
4378
4379 err = -EBUSY;
4380 if (!closing) {
4381 if (atomic_read(&po->mapped))
4382 goto out;
4383 if (packet_read_pending(rb))
4384 goto out;
4385 }
4386
4387 if (req->tp_block_nr) {
4388 unsigned int min_frame_size;
4389
4390
4391 err = -EBUSY;
4392 if (unlikely(rb->pg_vec))
4393 goto out;
4394
4395 switch (po->tp_version) {
4396 case TPACKET_V1:
4397 po->tp_hdrlen = TPACKET_HDRLEN;
4398 break;
4399 case TPACKET_V2:
4400 po->tp_hdrlen = TPACKET2_HDRLEN;
4401 break;
4402 case TPACKET_V3:
4403 po->tp_hdrlen = TPACKET3_HDRLEN;
4404 break;
4405 }
4406
4407 err = -EINVAL;
4408 if (unlikely((int)req->tp_block_size <= 0))
4409 goto out;
4410 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4411 goto out;
4412 min_frame_size = po->tp_hdrlen + po->tp_reserve;
4413 if (po->tp_version >= TPACKET_V3 &&
4414 req->tp_block_size <
4415 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
4416 goto out;
4417 if (unlikely(req->tp_frame_size < min_frame_size))
4418 goto out;
4419 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4420 goto out;
4421
4422 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4423 if (unlikely(rb->frames_per_block == 0))
4424 goto out;
4425 if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
4426 goto out;
4427 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4428 req->tp_frame_nr))
4429 goto out;
4430
4431 err = -ENOMEM;
4432 order = get_order(req->tp_block_size);
4433 pg_vec = alloc_pg_vec(req, order);
4434 if (unlikely(!pg_vec))
4435 goto out;
4436 switch (po->tp_version) {
4437 case TPACKET_V3:
4438
4439 if (!tx_ring) {
4440 init_prb_bdqc(po, rb, pg_vec, req_u);
4441 } else {
4442 struct tpacket_req3 *req3 = &req_u->req3;
4443
4444 if (req3->tp_retire_blk_tov ||
4445 req3->tp_sizeof_priv ||
4446 req3->tp_feature_req_word) {
4447 err = -EINVAL;
4448 goto out_free_pg_vec;
4449 }
4450 }
4451 break;
4452 default:
4453 if (!tx_ring) {
4454 rx_owner_map = bitmap_alloc(req->tp_frame_nr,
4455 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
4456 if (!rx_owner_map)
4457 goto out_free_pg_vec;
4458 }
4459 break;
4460 }
4461 }
4462
4463 else {
4464 err = -EINVAL;
4465 if (unlikely(req->tp_frame_nr))
4466 goto out;
4467 }
4468
4469
4470
4471 spin_lock(&po->bind_lock);
4472 was_running = po->running;
4473 num = po->num;
4474 if (was_running) {
4475 WRITE_ONCE(po->num, 0);
4476 __unregister_prot_hook(sk, false);
4477 }
4478 spin_unlock(&po->bind_lock);
4479
4480 synchronize_net();
4481
4482 err = -EBUSY;
4483 mutex_lock(&po->pg_vec_lock);
4484 if (closing || atomic_read(&po->mapped) == 0) {
4485 err = 0;
4486 spin_lock_bh(&rb_queue->lock);
4487 swap(rb->pg_vec, pg_vec);
4488 if (po->tp_version <= TPACKET_V2)
4489 swap(rb->rx_owner_map, rx_owner_map);
4490 rb->frame_max = (req->tp_frame_nr - 1);
4491 rb->head = 0;
4492 rb->frame_size = req->tp_frame_size;
4493 spin_unlock_bh(&rb_queue->lock);
4494
4495 swap(rb->pg_vec_order, order);
4496 swap(rb->pg_vec_len, req->tp_block_nr);
4497
4498 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
4499 po->prot_hook.func = (po->rx_ring.pg_vec) ?
4500 tpacket_rcv : packet_rcv;
4501 skb_queue_purge(rb_queue);
4502 if (atomic_read(&po->mapped))
4503 pr_err("packet_mmap: vma is busy: %d\n",
4504 atomic_read(&po->mapped));
4505 }
4506 mutex_unlock(&po->pg_vec_lock);
4507
4508 spin_lock(&po->bind_lock);
4509 if (was_running) {
4510 WRITE_ONCE(po->num, num);
4511 register_prot_hook(sk);
4512 }
4513 spin_unlock(&po->bind_lock);
4514 if (pg_vec && (po->tp_version > TPACKET_V2)) {
4515
4516 if (!tx_ring)
4517 prb_shutdown_retire_blk_timer(po, rb_queue);
4518 }
4519
4520out_free_pg_vec:
4521 if (pg_vec) {
4522 bitmap_free(rx_owner_map);
4523 free_pg_vec(pg_vec, order, req->tp_block_nr);
4524 }
4525out:
4526 return err;
4527}
4528
4529static int packet_mmap(struct file *file, struct socket *sock,
4530 struct vm_area_struct *vma)
4531{
4532 struct sock *sk = sock->sk;
4533 struct packet_sock *po = pkt_sk(sk);
4534 unsigned long size, expected_size;
4535 struct packet_ring_buffer *rb;
4536 unsigned long start;
4537 int err = -EINVAL;
4538 int i;
4539
4540 if (vma->vm_pgoff)
4541 return -EINVAL;
4542
4543 mutex_lock(&po->pg_vec_lock);
4544
4545 expected_size = 0;
4546 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4547 if (rb->pg_vec) {
4548 expected_size += rb->pg_vec_len
4549 * rb->pg_vec_pages
4550 * PAGE_SIZE;
4551 }
4552 }
4553
4554 if (expected_size == 0)
4555 goto out;
4556
4557 size = vma->vm_end - vma->vm_start;
4558 if (size != expected_size)
4559 goto out;
4560
4561 start = vma->vm_start;
4562 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4563 if (rb->pg_vec == NULL)
4564 continue;
4565
4566 for (i = 0; i < rb->pg_vec_len; i++) {
4567 struct page *page;
4568 void *kaddr = rb->pg_vec[i].buffer;
4569 int pg_num;
4570
4571 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
4572 page = pgv_to_page(kaddr);
4573 err = vm_insert_page(vma, start, page);
4574 if (unlikely(err))
4575 goto out;
4576 start += PAGE_SIZE;
4577 kaddr += PAGE_SIZE;
4578 }
4579 }
4580 }
4581
4582 atomic_inc(&po->mapped);
4583 vma->vm_ops = &packet_mmap_ops;
4584 err = 0;
4585
4586out:
4587 mutex_unlock(&po->pg_vec_lock);
4588 return err;
4589}
4590
4591static const struct proto_ops packet_ops_spkt = {
4592 .family = PF_PACKET,
4593 .owner = THIS_MODULE,
4594 .release = packet_release,
4595 .bind = packet_bind_spkt,
4596 .connect = sock_no_connect,
4597 .socketpair = sock_no_socketpair,
4598 .accept = sock_no_accept,
4599 .getname = packet_getname_spkt,
4600 .poll = datagram_poll,
4601 .ioctl = packet_ioctl,
4602 .gettstamp = sock_gettstamp,
4603 .listen = sock_no_listen,
4604 .shutdown = sock_no_shutdown,
4605 .sendmsg = packet_sendmsg_spkt,
4606 .recvmsg = packet_recvmsg,
4607 .mmap = sock_no_mmap,
4608 .sendpage = sock_no_sendpage,
4609};
4610
4611static const struct proto_ops packet_ops = {
4612 .family = PF_PACKET,
4613 .owner = THIS_MODULE,
4614 .release = packet_release,
4615 .bind = packet_bind,
4616 .connect = sock_no_connect,
4617 .socketpair = sock_no_socketpair,
4618 .accept = sock_no_accept,
4619 .getname = packet_getname,
4620 .poll = packet_poll,
4621 .ioctl = packet_ioctl,
4622 .gettstamp = sock_gettstamp,
4623 .listen = sock_no_listen,
4624 .shutdown = sock_no_shutdown,
4625 .setsockopt = packet_setsockopt,
4626 .getsockopt = packet_getsockopt,
4627 .sendmsg = packet_sendmsg,
4628 .recvmsg = packet_recvmsg,
4629 .mmap = packet_mmap,
4630 .sendpage = sock_no_sendpage,
4631};
4632
4633static const struct net_proto_family packet_family_ops = {
4634 .family = PF_PACKET,
4635 .create = packet_create,
4636 .owner = THIS_MODULE,
4637};
4638
4639static struct notifier_block packet_netdev_notifier = {
4640 .notifier_call = packet_notifier,
4641};
4642
4643#ifdef CONFIG_PROC_FS
4644
4645static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
4646 __acquires(RCU)
4647{
4648 struct net *net = seq_file_net(seq);
4649
4650 rcu_read_lock();
4651 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
4652}
4653
4654static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4655{
4656 struct net *net = seq_file_net(seq);
4657 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
4658}
4659
4660static void packet_seq_stop(struct seq_file *seq, void *v)
4661 __releases(RCU)
4662{
4663 rcu_read_unlock();
4664}
4665
4666static int packet_seq_show(struct seq_file *seq, void *v)
4667{
4668 if (v == SEQ_START_TOKEN)
4669 seq_printf(seq,
4670 "%*sRefCnt Type Proto Iface R Rmem User Inode\n",
4671 IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk");
4672 else {
4673 struct sock *s = sk_entry(v);
4674 const struct packet_sock *po = pkt_sk(s);
4675
4676 seq_printf(seq,
4677 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
4678 s,
4679 refcount_read(&s->sk_refcnt),
4680 s->sk_type,
4681 ntohs(READ_ONCE(po->num)),
4682 READ_ONCE(po->ifindex),
4683 po->running,
4684 atomic_read(&s->sk_rmem_alloc),
4685 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4686 sock_i_ino(s));
4687 }
4688
4689 return 0;
4690}
4691
4692static const struct seq_operations packet_seq_ops = {
4693 .start = packet_seq_start,
4694 .next = packet_seq_next,
4695 .stop = packet_seq_stop,
4696 .show = packet_seq_show,
4697};
4698#endif
4699
4700static int __net_init packet_net_init(struct net *net)
4701{
4702 mutex_init(&net->packet.sklist_lock);
4703 INIT_HLIST_HEAD(&net->packet.sklist);
4704
4705#ifdef CONFIG_PROC_FS
4706 if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
4707 sizeof(struct seq_net_private)))
4708 return -ENOMEM;
4709#endif
4710
4711 return 0;
4712}
4713
4714static void __net_exit packet_net_exit(struct net *net)
4715{
4716 remove_proc_entry("packet", net->proc_net);
4717 WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4718}
4719
4720static struct pernet_operations packet_net_ops = {
4721 .init = packet_net_init,
4722 .exit = packet_net_exit,
4723};
4724
4725
4726static void __exit packet_exit(void)
4727{
4728 unregister_netdevice_notifier(&packet_netdev_notifier);
4729 unregister_pernet_subsys(&packet_net_ops);
4730 sock_unregister(PF_PACKET);
4731 proto_unregister(&packet_proto);
4732}
4733
4734static int __init packet_init(void)
4735{
4736 int rc;
4737
4738 rc = proto_register(&packet_proto, 0);
4739 if (rc)
4740 goto out;
4741 rc = sock_register(&packet_family_ops);
4742 if (rc)
4743 goto out_proto;
4744 rc = register_pernet_subsys(&packet_net_ops);
4745 if (rc)
4746 goto out_sock;
4747 rc = register_netdevice_notifier(&packet_netdev_notifier);
4748 if (rc)
4749 goto out_pernet;
4750
4751 return 0;
4752
4753out_pernet:
4754 unregister_pernet_subsys(&packet_net_ops);
4755out_sock:
4756 sock_unregister(PF_PACKET);
4757out_proto:
4758 proto_unregister(&packet_proto);
4759out:
4760 return rc;
4761}
4762
4763module_init(packet_init);
4764module_exit(packet_exit);
4765MODULE_LICENSE("GPL");
4766MODULE_ALIAS_NETPROTO(PF_PACKET);
4767