1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109#define pr_fmt(fmt) "IPv4: " fmt
110
111#include <linux/module.h>
112#include <linux/types.h>
113#include <linux/kernel.h>
114#include <linux/string.h>
115#include <linux/errno.h>
116#include <linux/slab.h>
117
118#include <linux/net.h>
119#include <linux/socket.h>
120#include <linux/sockios.h>
121#include <linux/in.h>
122#include <linux/inet.h>
123#include <linux/inetdevice.h>
124#include <linux/netdevice.h>
125#include <linux/etherdevice.h>
126#include <linux/indirect_call_wrapper.h>
127
128#include <net/snmp.h>
129#include <net/ip.h>
130#include <net/protocol.h>
131#include <net/route.h>
132#include <linux/skbuff.h>
133#include <net/sock.h>
134#include <net/arp.h>
135#include <net/icmp.h>
136#include <net/raw.h>
137#include <net/checksum.h>
138#include <net/inet_ecn.h>
139#include <linux/netfilter_ipv4.h>
140#include <net/xfrm.h>
141#include <linux/mroute.h>
142#include <linux/netlink.h>
143#include <net/dst_metadata.h>
144
145
146
147
148bool ip_call_ra_chain(struct sk_buff *skb)
149{
150 struct ip_ra_chain *ra;
151 u8 protocol = ip_hdr(skb)->protocol;
152 struct sock *last = NULL;
153 struct net_device *dev = skb->dev;
154 struct net *net = dev_net(dev);
155
156 for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
157 struct sock *sk = ra->sk;
158
159
160
161
162 if (sk && inet_sk(sk)->inet_num == protocol &&
163 (!sk->sk_bound_dev_if ||
164 sk->sk_bound_dev_if == dev->ifindex)) {
165 if (ip_is_fragment(ip_hdr(skb))) {
166 if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
167 return true;
168 }
169 if (last) {
170 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
171 if (skb2)
172 raw_rcv(last, skb2);
173 }
174 last = sk;
175 }
176 }
177
178 if (last) {
179 raw_rcv(last, skb);
180 return true;
181 }
182 return false;
183}
184
185INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
186INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
187void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
188{
189 const struct net_protocol *ipprot;
190 int raw, ret;
191
192resubmit:
193 raw = raw_local_deliver(skb, protocol);
194
195 ipprot = rcu_dereference(inet_protos[protocol]);
196 if (ipprot) {
197 if (!ipprot->no_policy) {
198 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
199 kfree_skb_reason(skb,
200 SKB_DROP_REASON_XFRM_POLICY);
201 return;
202 }
203 nf_reset_ct(skb);
204 }
205 ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
206 skb);
207 if (ret < 0) {
208 protocol = -ret;
209 goto resubmit;
210 }
211 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
212 } else {
213 if (!raw) {
214 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
215 __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
216 icmp_send(skb, ICMP_DEST_UNREACH,
217 ICMP_PROT_UNREACH, 0);
218 }
219 kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO);
220 } else {
221 __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
222 consume_skb(skb);
223 }
224 }
225}
226
227static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
228{
229 skb_clear_delivery_time(skb);
230 __skb_pull(skb, skb_network_header_len(skb));
231
232 rcu_read_lock();
233 ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
234 rcu_read_unlock();
235
236 return 0;
237}
238
239
240
241
242int ip_local_deliver(struct sk_buff *skb)
243{
244
245
246
247 struct net *net = dev_net(skb->dev);
248
249 if (ip_is_fragment(ip_hdr(skb))) {
250 if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
251 return 0;
252 }
253
254 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
255 net, NULL, skb, skb->dev, NULL,
256 ip_local_deliver_finish);
257}
258EXPORT_SYMBOL(ip_local_deliver);
259
260static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
261{
262 struct ip_options *opt;
263 const struct iphdr *iph;
264
265
266
267
268
269
270
271
272 if (skb_cow(skb, skb_headroom(skb))) {
273 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
274 goto drop;
275 }
276
277 iph = ip_hdr(skb);
278 opt = &(IPCB(skb)->opt);
279 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
280
281 if (ip_options_compile(dev_net(dev), opt, skb)) {
282 __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
283 goto drop;
284 }
285
286 if (unlikely(opt->srr)) {
287 struct in_device *in_dev = __in_dev_get_rcu(dev);
288
289 if (in_dev) {
290 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
291 if (IN_DEV_LOG_MARTIANS(in_dev))
292 net_info_ratelimited("source route option %pI4 -> %pI4\n",
293 &iph->saddr,
294 &iph->daddr);
295 goto drop;
296 }
297 }
298
299 if (ip_options_rcv_srr(skb, dev))
300 goto drop;
301 }
302
303 return false;
304drop:
305 return true;
306}
307
308static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
309 const struct sk_buff *hint)
310{
311 return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
312 ip_hdr(hint)->tos == iph->tos;
313}
314
315int tcp_v4_early_demux(struct sk_buff *skb);
316int udp_v4_early_demux(struct sk_buff *skb);
317static int ip_rcv_finish_core(struct net *net, struct sock *sk,
318 struct sk_buff *skb, struct net_device *dev,
319 const struct sk_buff *hint)
320{
321 const struct iphdr *iph = ip_hdr(skb);
322 int err, drop_reason;
323 struct rtable *rt;
324
325 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
326
327 if (ip_can_use_hint(skb, iph, hint)) {
328 err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
329 dev, hint);
330 if (unlikely(err))
331 goto drop_error;
332 }
333
334 if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
335 !skb_dst(skb) &&
336 !skb->sk &&
337 !ip_is_fragment(iph)) {
338 switch (iph->protocol) {
339 case IPPROTO_TCP:
340 if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
341 tcp_v4_early_demux(skb);
342
343
344 iph = ip_hdr(skb);
345 }
346 break;
347 case IPPROTO_UDP:
348 if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
349 err = udp_v4_early_demux(skb);
350 if (unlikely(err))
351 goto drop_error;
352
353
354 iph = ip_hdr(skb);
355 }
356 break;
357 }
358 }
359
360
361
362
363
364 if (!skb_valid_dst(skb)) {
365 err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
366 iph->tos, dev);
367 if (unlikely(err))
368 goto drop_error;
369 } else {
370 struct in_device *in_dev = __in_dev_get_rcu(dev);
371
372 if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY))
373 IPCB(skb)->flags |= IPSKB_NOPOLICY;
374 }
375
376#ifdef CONFIG_IP_ROUTE_CLASSID
377 if (unlikely(skb_dst(skb)->tclassid)) {
378 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
379 u32 idx = skb_dst(skb)->tclassid;
380 st[idx&0xFF].o_packets++;
381 st[idx&0xFF].o_bytes += skb->len;
382 st[(idx>>16)&0xFF].i_packets++;
383 st[(idx>>16)&0xFF].i_bytes += skb->len;
384 }
385#endif
386
387 if (iph->ihl > 5 && ip_rcv_options(skb, dev))
388 goto drop;
389
390 rt = skb_rtable(skb);
391 if (rt->rt_type == RTN_MULTICAST) {
392 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
393 } else if (rt->rt_type == RTN_BROADCAST) {
394 __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
395 } else if (skb->pkt_type == PACKET_BROADCAST ||
396 skb->pkt_type == PACKET_MULTICAST) {
397 struct in_device *in_dev = __in_dev_get_rcu(dev);
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414 if (in_dev &&
415 IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) {
416 drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST;
417 goto drop;
418 }
419 }
420
421 return NET_RX_SUCCESS;
422
423drop:
424 kfree_skb_reason(skb, drop_reason);
425 return NET_RX_DROP;
426
427drop_error:
428 if (err == -EXDEV) {
429 drop_reason = SKB_DROP_REASON_IP_RPFILTER;
430 __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
431 }
432 goto drop;
433}
434
435static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
436{
437 struct net_device *dev = skb->dev;
438 int ret;
439
440
441
442
443 skb = l3mdev_ip_rcv(skb);
444 if (!skb)
445 return NET_RX_SUCCESS;
446
447 ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
448 if (ret != NET_RX_DROP)
449 ret = dst_input(skb);
450 return ret;
451}
452
453
454
455
456static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
457{
458 const struct iphdr *iph;
459 int drop_reason;
460 u32 len;
461
462
463
464
465 if (skb->pkt_type == PACKET_OTHERHOST) {
466 dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
467 drop_reason = SKB_DROP_REASON_OTHERHOST;
468 goto drop;
469 }
470
471 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
472
473 skb = skb_share_check(skb, GFP_ATOMIC);
474 if (!skb) {
475 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
476 goto out;
477 }
478
479 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
480 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
481 goto inhdr_error;
482
483 iph = ip_hdr(skb);
484
485
486
487
488
489
490
491
492
493
494
495
496 if (iph->ihl < 5 || iph->version != 4)
497 goto inhdr_error;
498
499 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
500 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
501 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
502 __IP_ADD_STATS(net,
503 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
504 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
505
506 if (!pskb_may_pull(skb, iph->ihl*4))
507 goto inhdr_error;
508
509 iph = ip_hdr(skb);
510
511 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
512 goto csum_error;
513
514 len = iph_totlen(skb, iph);
515 if (skb->len < len) {
516 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
517 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
518 goto drop;
519 } else if (len < (iph->ihl*4))
520 goto inhdr_error;
521
522
523
524
525
526 if (pskb_trim_rcsum(skb, len)) {
527 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
528 goto drop;
529 }
530
531 iph = ip_hdr(skb);
532 skb->transport_header = skb->network_header + iph->ihl*4;
533
534
535 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
536 IPCB(skb)->iif = skb->skb_iif;
537
538
539 if (!skb_sk_is_prefetched(skb))
540 skb_orphan(skb);
541
542 return skb;
543
544csum_error:
545 drop_reason = SKB_DROP_REASON_IP_CSUM;
546 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
547inhdr_error:
548 if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED)
549 drop_reason = SKB_DROP_REASON_IP_INHDR;
550 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
551drop:
552 kfree_skb_reason(skb, drop_reason);
553out:
554 return NULL;
555}
556
557
558
559
560int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
561 struct net_device *orig_dev)
562{
563 struct net *net = dev_net(dev);
564
565 skb = ip_rcv_core(skb, net);
566 if (skb == NULL)
567 return NET_RX_DROP;
568
569 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
570 net, NULL, skb, dev, NULL,
571 ip_rcv_finish);
572}
573
574static void ip_sublist_rcv_finish(struct list_head *head)
575{
576 struct sk_buff *skb, *next;
577
578 list_for_each_entry_safe(skb, next, head, list) {
579 skb_list_del_init(skb);
580 dst_input(skb);
581 }
582}
583
584static struct sk_buff *ip_extract_route_hint(const struct net *net,
585 struct sk_buff *skb, int rt_type)
586{
587 if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
588 IPCB(skb)->flags & IPSKB_MULTIPATH)
589 return NULL;
590
591 return skb;
592}
593
594static void ip_list_rcv_finish(struct net *net, struct sock *sk,
595 struct list_head *head)
596{
597 struct sk_buff *skb, *next, *hint = NULL;
598 struct dst_entry *curr_dst = NULL;
599 struct list_head sublist;
600
601 INIT_LIST_HEAD(&sublist);
602 list_for_each_entry_safe(skb, next, head, list) {
603 struct net_device *dev = skb->dev;
604 struct dst_entry *dst;
605
606 skb_list_del_init(skb);
607
608
609
610 skb = l3mdev_ip_rcv(skb);
611 if (!skb)
612 continue;
613 if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
614 continue;
615
616 dst = skb_dst(skb);
617 if (curr_dst != dst) {
618 hint = ip_extract_route_hint(net, skb,
619 ((struct rtable *)dst)->rt_type);
620
621
622 if (!list_empty(&sublist))
623 ip_sublist_rcv_finish(&sublist);
624
625 INIT_LIST_HEAD(&sublist);
626 curr_dst = dst;
627 }
628 list_add_tail(&skb->list, &sublist);
629 }
630
631 ip_sublist_rcv_finish(&sublist);
632}
633
634static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
635 struct net *net)
636{
637 NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
638 head, dev, NULL, ip_rcv_finish);
639 ip_list_rcv_finish(net, NULL, head);
640}
641
642
643void ip_list_rcv(struct list_head *head, struct packet_type *pt,
644 struct net_device *orig_dev)
645{
646 struct net_device *curr_dev = NULL;
647 struct net *curr_net = NULL;
648 struct sk_buff *skb, *next;
649 struct list_head sublist;
650
651 INIT_LIST_HEAD(&sublist);
652 list_for_each_entry_safe(skb, next, head, list) {
653 struct net_device *dev = skb->dev;
654 struct net *net = dev_net(dev);
655
656 skb_list_del_init(skb);
657 skb = ip_rcv_core(skb, net);
658 if (skb == NULL)
659 continue;
660
661 if (curr_dev != dev || curr_net != net) {
662
663 if (!list_empty(&sublist))
664 ip_sublist_rcv(&sublist, curr_dev, curr_net);
665
666 INIT_LIST_HEAD(&sublist);
667 curr_dev = dev;
668 curr_net = net;
669 }
670 list_add_tail(&skb->list, &sublist);
671 }
672
673 if (!list_empty(&sublist))
674 ip_sublist_rcv(&sublist, curr_dev, curr_net);
675}
676