1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <net/net_namespace.h>
98#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
102#include <linux/stat.h>
103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <net/xfrm.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
114#include <net/wext.h>
115#include <net/iw_handler.h>
116#include <asm/current.h>
117#include <linux/audit.h>
118#include <linux/dmaengine.h>
119#include <linux/err.h>
120#include <linux/ctype.h>
121#include <linux/if_arp.h>
122#include <linux/if_vlan.h>
123#include <linux/ip.h>
124#include <net/ip.h>
125#include <linux/ipv6.h>
126#include <linux/in.h>
127#include <linux/jhash.h>
128#include <linux/random.h>
129#include <trace/events/napi.h>
130#include <trace/events/net.h>
131#include <trace/events/skb.h>
132#include <linux/pci.h>
133#include <linux/inetdevice.h>
134#include <linux/cpu_rmap.h>
135#include <linux/net_tstamp.h>
136#include <linux/static_key.h>
137#include <net/flow_keys.h>
138
139#include "net-sysfs.h"
140
141
142#define MAX_GRO_SKBS 8
143
144
145#define GRO_MAX_HEAD (MAX_HEADER + 128)
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175#define PTYPE_HASH_SIZE (16)
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177
178static DEFINE_SPINLOCK(ptype_lock);
179static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180static struct list_head ptype_all __read_mostly;
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201DEFINE_RWLOCK(dev_base_lock);
202EXPORT_SYMBOL(dev_base_lock);
203
204static inline void dev_base_seq_inc(struct net *net)
205{
206 while (++net->dev_base_seq == 0);
207}
208
209static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
210{
211 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
212
213 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
214}
215
216static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
217{
218 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
219}
220
221static inline void rps_lock(struct softnet_data *sd)
222{
223#ifdef CONFIG_RPS
224 spin_lock(&sd->input_pkt_queue.lock);
225#endif
226}
227
228static inline void rps_unlock(struct softnet_data *sd)
229{
230#ifdef CONFIG_RPS
231 spin_unlock(&sd->input_pkt_queue.lock);
232#endif
233}
234
235
236static int list_netdevice(struct net_device *dev)
237{
238 struct net *net = dev_net(dev);
239
240 ASSERT_RTNL();
241
242 write_lock_bh(&dev_base_lock);
243 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
244 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
245 hlist_add_head_rcu(&dev->index_hlist,
246 dev_index_hash(net, dev->ifindex));
247 write_unlock_bh(&dev_base_lock);
248
249 dev_base_seq_inc(net);
250
251 return 0;
252}
253
254
255
256
257static void unlist_netdevice(struct net_device *dev)
258{
259 ASSERT_RTNL();
260
261
262 write_lock_bh(&dev_base_lock);
263 list_del_rcu(&dev->dev_list);
264 hlist_del_rcu(&dev->name_hlist);
265 hlist_del_rcu(&dev->index_hlist);
266 write_unlock_bh(&dev_base_lock);
267
268 dev_base_seq_inc(dev_net(dev));
269}
270
271
272
273
274
275static RAW_NOTIFIER_HEAD(netdev_chain);
276
277
278
279
280
281
282DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
283EXPORT_PER_CPU_SYMBOL(softnet_data);
284
285#ifdef CONFIG_LOCKDEP
286
287
288
289
290static const unsigned short netdev_lock_type[] =
291 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
292 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
293 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
294 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
295 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
296 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
297 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
298 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
299 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
300 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
301 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
302 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
303 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
304 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
305 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
306
307static const char *const netdev_lock_name[] =
308 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
309 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
310 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
311 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
312 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
313 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
314 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
315 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
316 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
317 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
318 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
319 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
320 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
321 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
322 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
323
324static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
325static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
326
327static inline unsigned short netdev_lock_pos(unsigned short dev_type)
328{
329 int i;
330
331 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
332 if (netdev_lock_type[i] == dev_type)
333 return i;
334
335 return ARRAY_SIZE(netdev_lock_type) - 1;
336}
337
338static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
339 unsigned short dev_type)
340{
341 int i;
342
343 i = netdev_lock_pos(dev_type);
344 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
345 netdev_lock_name[i]);
346}
347
348static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
349{
350 int i;
351
352 i = netdev_lock_pos(dev->type);
353 lockdep_set_class_and_name(&dev->addr_list_lock,
354 &netdev_addr_lock_key[i],
355 netdev_lock_name[i]);
356}
357#else
358static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
359 unsigned short dev_type)
360{
361}
362static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
363{
364}
365#endif
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389static inline struct list_head *ptype_head(const struct packet_type *pt)
390{
391 if (pt->type == htons(ETH_P_ALL))
392 return &ptype_all;
393 else
394 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
395}
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410void dev_add_pack(struct packet_type *pt)
411{
412 struct list_head *head = ptype_head(pt);
413
414 spin_lock(&ptype_lock);
415 list_add_rcu(&pt->list, head);
416 spin_unlock(&ptype_lock);
417}
418EXPORT_SYMBOL(dev_add_pack);
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433void __dev_remove_pack(struct packet_type *pt)
434{
435 struct list_head *head = ptype_head(pt);
436 struct packet_type *pt1;
437
438 spin_lock(&ptype_lock);
439
440 list_for_each_entry(pt1, head, list) {
441 if (pt == pt1) {
442 list_del_rcu(&pt->list);
443 goto out;
444 }
445 }
446
447 pr_warn("dev_remove_pack: %p not found\n", pt);
448out:
449 spin_unlock(&ptype_lock);
450}
451EXPORT_SYMBOL(__dev_remove_pack);
452
453
454
455
456
457
458
459
460
461
462
463
464
465void dev_remove_pack(struct packet_type *pt)
466{
467 __dev_remove_pack(pt);
468
469 synchronize_net();
470}
471EXPORT_SYMBOL(dev_remove_pack);
472
473
474
475
476
477
478
479
480static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
481
482
483
484
485
486
487
488
489
490
491static int netdev_boot_setup_add(char *name, struct ifmap *map)
492{
493 struct netdev_boot_setup *s;
494 int i;
495
496 s = dev_boot_setup;
497 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
498 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
499 memset(s[i].name, 0, sizeof(s[i].name));
500 strlcpy(s[i].name, name, IFNAMSIZ);
501 memcpy(&s[i].map, map, sizeof(s[i].map));
502 break;
503 }
504 }
505
506 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
507}
508
509
510
511
512
513
514
515
516
517
518int netdev_boot_setup_check(struct net_device *dev)
519{
520 struct netdev_boot_setup *s = dev_boot_setup;
521 int i;
522
523 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
524 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
525 !strcmp(dev->name, s[i].name)) {
526 dev->irq = s[i].map.irq;
527 dev->base_addr = s[i].map.base_addr;
528 dev->mem_start = s[i].map.mem_start;
529 dev->mem_end = s[i].map.mem_end;
530 return 1;
531 }
532 }
533 return 0;
534}
535EXPORT_SYMBOL(netdev_boot_setup_check);
536
537
538
539
540
541
542
543
544
545
546
547
548unsigned long netdev_boot_base(const char *prefix, int unit)
549{
550 const struct netdev_boot_setup *s = dev_boot_setup;
551 char name[IFNAMSIZ];
552 int i;
553
554 sprintf(name, "%s%d", prefix, unit);
555
556
557
558
559
560 if (__dev_get_by_name(&init_net, name))
561 return 1;
562
563 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
564 if (!strcmp(name, s[i].name))
565 return s[i].map.base_addr;
566 return 0;
567}
568
569
570
571
572int __init netdev_boot_setup(char *str)
573{
574 int ints[5];
575 struct ifmap map;
576
577 str = get_options(str, ARRAY_SIZE(ints), ints);
578 if (!str || !*str)
579 return 0;
580
581
582 memset(&map, 0, sizeof(map));
583 if (ints[0] > 0)
584 map.irq = ints[1];
585 if (ints[0] > 1)
586 map.base_addr = ints[2];
587 if (ints[0] > 2)
588 map.mem_start = ints[3];
589 if (ints[0] > 3)
590 map.mem_end = ints[4];
591
592
593 return netdev_boot_setup_add(str, &map);
594}
595
596__setup("netdev=", netdev_boot_setup);
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616struct net_device *__dev_get_by_name(struct net *net, const char *name)
617{
618 struct hlist_node *p;
619 struct net_device *dev;
620 struct hlist_head *head = dev_name_hash(net, name);
621
622 hlist_for_each_entry(dev, p, head, name_hlist)
623 if (!strncmp(dev->name, name, IFNAMSIZ))
624 return dev;
625
626 return NULL;
627}
628EXPORT_SYMBOL(__dev_get_by_name);
629
630
631
632
633
634
635
636
637
638
639
640
641
642struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
643{
644 struct hlist_node *p;
645 struct net_device *dev;
646 struct hlist_head *head = dev_name_hash(net, name);
647
648 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
649 if (!strncmp(dev->name, name, IFNAMSIZ))
650 return dev;
651
652 return NULL;
653}
654EXPORT_SYMBOL(dev_get_by_name_rcu);
655
656
657
658
659
660
661
662
663
664
665
666
667
668struct net_device *dev_get_by_name(struct net *net, const char *name)
669{
670 struct net_device *dev;
671
672 rcu_read_lock();
673 dev = dev_get_by_name_rcu(net, name);
674 if (dev)
675 dev_hold(dev);
676 rcu_read_unlock();
677 return dev;
678}
679EXPORT_SYMBOL(dev_get_by_name);
680
681
682
683
684
685
686
687
688
689
690
691
692
693struct net_device *__dev_get_by_index(struct net *net, int ifindex)
694{
695 struct hlist_node *p;
696 struct net_device *dev;
697 struct hlist_head *head = dev_index_hash(net, ifindex);
698
699 hlist_for_each_entry(dev, p, head, index_hlist)
700 if (dev->ifindex == ifindex)
701 return dev;
702
703 return NULL;
704}
705EXPORT_SYMBOL(__dev_get_by_index);
706
707
708
709
710
711
712
713
714
715
716
717
718struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
719{
720 struct hlist_node *p;
721 struct net_device *dev;
722 struct hlist_head *head = dev_index_hash(net, ifindex);
723
724 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
725 if (dev->ifindex == ifindex)
726 return dev;
727
728 return NULL;
729}
730EXPORT_SYMBOL(dev_get_by_index_rcu);
731
732
733
734
735
736
737
738
739
740
741
742
743
744struct net_device *dev_get_by_index(struct net *net, int ifindex)
745{
746 struct net_device *dev;
747
748 rcu_read_lock();
749 dev = dev_get_by_index_rcu(net, ifindex);
750 if (dev)
751 dev_hold(dev);
752 rcu_read_unlock();
753 return dev;
754}
755EXPORT_SYMBOL(dev_get_by_index);
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
772 const char *ha)
773{
774 struct net_device *dev;
775
776 for_each_netdev_rcu(net, dev)
777 if (dev->type == type &&
778 !memcmp(dev->dev_addr, ha, dev->addr_len))
779 return dev;
780
781 return NULL;
782}
783EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
784
785struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
786{
787 struct net_device *dev;
788
789 ASSERT_RTNL();
790 for_each_netdev(net, dev)
791 if (dev->type == type)
792 return dev;
793
794 return NULL;
795}
796EXPORT_SYMBOL(__dev_getfirstbyhwtype);
797
798struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
799{
800 struct net_device *dev, *ret = NULL;
801
802 rcu_read_lock();
803 for_each_netdev_rcu(net, dev)
804 if (dev->type == type) {
805 dev_hold(dev);
806 ret = dev;
807 break;
808 }
809 rcu_read_unlock();
810 return ret;
811}
812EXPORT_SYMBOL(dev_getfirstbyhwtype);
813
814
815
816
817
818
819
820
821
822
823
824
825struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
826 unsigned short mask)
827{
828 struct net_device *dev, *ret;
829
830 ret = NULL;
831 for_each_netdev_rcu(net, dev) {
832 if (((dev->flags ^ if_flags) & mask) == 0) {
833 ret = dev;
834 break;
835 }
836 }
837 return ret;
838}
839EXPORT_SYMBOL(dev_get_by_flags_rcu);
840
841
842
843
844
845
846
847
848
849bool dev_valid_name(const char *name)
850{
851 if (*name == '\0')
852 return false;
853 if (strlen(name) >= IFNAMSIZ)
854 return false;
855 if (!strcmp(name, ".") || !strcmp(name, ".."))
856 return false;
857
858 while (*name) {
859 if (*name == '/' || isspace(*name))
860 return false;
861 name++;
862 }
863 return true;
864}
865EXPORT_SYMBOL(dev_valid_name);
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882static int __dev_alloc_name(struct net *net, const char *name, char *buf)
883{
884 int i = 0;
885 const char *p;
886 const int max_netdevices = 8*PAGE_SIZE;
887 unsigned long *inuse;
888 struct net_device *d;
889
890 p = strnchr(name, IFNAMSIZ-1, '%');
891 if (p) {
892
893
894
895
896
897 if (p[1] != 'd' || strchr(p + 2, '%'))
898 return -EINVAL;
899
900
901 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
902 if (!inuse)
903 return -ENOMEM;
904
905 for_each_netdev(net, d) {
906 if (!sscanf(d->name, name, &i))
907 continue;
908 if (i < 0 || i >= max_netdevices)
909 continue;
910
911
912 snprintf(buf, IFNAMSIZ, name, i);
913 if (!strncmp(buf, d->name, IFNAMSIZ))
914 set_bit(i, inuse);
915 }
916
917 i = find_first_zero_bit(inuse, max_netdevices);
918 free_page((unsigned long) inuse);
919 }
920
921 if (buf != name)
922 snprintf(buf, IFNAMSIZ, name, i);
923 if (!__dev_get_by_name(net, buf))
924 return i;
925
926
927
928
929
930 return -ENFILE;
931}
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947int dev_alloc_name(struct net_device *dev, const char *name)
948{
949 char buf[IFNAMSIZ];
950 struct net *net;
951 int ret;
952
953 BUG_ON(!dev_net(dev));
954 net = dev_net(dev);
955 ret = __dev_alloc_name(net, name, buf);
956 if (ret >= 0)
957 strlcpy(dev->name, buf, IFNAMSIZ);
958 return ret;
959}
960EXPORT_SYMBOL(dev_alloc_name);
961
962static int dev_alloc_name_ns(struct net *net,
963 struct net_device *dev,
964 const char *name)
965{
966 char buf[IFNAMSIZ];
967 int ret;
968
969 ret = __dev_alloc_name(net, name, buf);
970 if (ret >= 0)
971 strlcpy(dev->name, buf, IFNAMSIZ);
972 return ret;
973}
974
975static int dev_get_valid_name(struct net *net,
976 struct net_device *dev,
977 const char *name)
978{
979 BUG_ON(!net);
980
981 if (!dev_valid_name(name))
982 return -EINVAL;
983
984 if (strchr(name, '%'))
985 return dev_alloc_name_ns(net, dev, name);
986 else if (__dev_get_by_name(net, name))
987 return -EEXIST;
988 else if (dev->name != name)
989 strlcpy(dev->name, name, IFNAMSIZ);
990
991 return 0;
992}
993
994
995
996
997
998
999
1000
1001
1002int dev_change_name(struct net_device *dev, const char *newname)
1003{
1004 char oldname[IFNAMSIZ];
1005 int err = 0;
1006 int ret;
1007 struct net *net;
1008
1009 ASSERT_RTNL();
1010 BUG_ON(!dev_net(dev));
1011
1012 net = dev_net(dev);
1013 if (dev->flags & IFF_UP)
1014 return -EBUSY;
1015
1016 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1017 return 0;
1018
1019 memcpy(oldname, dev->name, IFNAMSIZ);
1020
1021 err = dev_get_valid_name(net, dev, newname);
1022 if (err < 0)
1023 return err;
1024
1025rollback:
1026 ret = device_rename(&dev->dev, dev->name);
1027 if (ret) {
1028 memcpy(dev->name, oldname, IFNAMSIZ);
1029 return ret;
1030 }
1031
1032 write_lock_bh(&dev_base_lock);
1033 hlist_del_rcu(&dev->name_hlist);
1034 write_unlock_bh(&dev_base_lock);
1035
1036 synchronize_rcu();
1037
1038 write_lock_bh(&dev_base_lock);
1039 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1040 write_unlock_bh(&dev_base_lock);
1041
1042 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1043 ret = notifier_to_errno(ret);
1044
1045 if (ret) {
1046
1047 if (err >= 0) {
1048 err = ret;
1049 memcpy(dev->name, oldname, IFNAMSIZ);
1050 goto rollback;
1051 } else {
1052 pr_err("%s: name change rollback failed: %d\n",
1053 dev->name, ret);
1054 }
1055 }
1056
1057 return err;
1058}
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1069{
1070 char *new_ifalias;
1071
1072 ASSERT_RTNL();
1073
1074 if (len >= IFALIASZ)
1075 return -EINVAL;
1076
1077 if (!len) {
1078 if (dev->ifalias) {
1079 kfree(dev->ifalias);
1080 dev->ifalias = NULL;
1081 }
1082 return 0;
1083 }
1084
1085 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1086 if (!new_ifalias)
1087 return -ENOMEM;
1088 dev->ifalias = new_ifalias;
1089
1090 strlcpy(dev->ifalias, alias, len+1);
1091 return len;
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101void netdev_features_change(struct net_device *dev)
1102{
1103 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1104}
1105EXPORT_SYMBOL(netdev_features_change);
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115void netdev_state_change(struct net_device *dev)
1116{
1117 if (dev->flags & IFF_UP) {
1118 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1119 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1120 }
1121}
1122EXPORT_SYMBOL(netdev_state_change);
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134void netdev_notify_peers(struct net_device *dev)
1135{
1136 rtnl_lock();
1137 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1138 rtnl_unlock();
1139}
1140EXPORT_SYMBOL(netdev_notify_peers);
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152void dev_load(struct net *net, const char *name)
1153{
1154 struct net_device *dev;
1155 int no_module;
1156
1157 rcu_read_lock();
1158 dev = dev_get_by_name_rcu(net, name);
1159 rcu_read_unlock();
1160
1161 no_module = !dev;
1162 if (no_module && capable(CAP_NET_ADMIN))
1163 no_module = request_module("netdev-%s", name);
1164 if (no_module && capable(CAP_SYS_MODULE)) {
1165 if (!request_module("%s", name))
1166 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1167 name);
1168 }
1169}
1170EXPORT_SYMBOL(dev_load);
1171
1172static int __dev_open(struct net_device *dev)
1173{
1174 const struct net_device_ops *ops = dev->netdev_ops;
1175 int ret;
1176
1177 ASSERT_RTNL();
1178
1179 if (!netif_device_present(dev))
1180 return -ENODEV;
1181
1182 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1183 ret = notifier_to_errno(ret);
1184 if (ret)
1185 return ret;
1186
1187 set_bit(__LINK_STATE_START, &dev->state);
1188
1189 if (ops->ndo_validate_addr)
1190 ret = ops->ndo_validate_addr(dev);
1191
1192 if (!ret && ops->ndo_open)
1193 ret = ops->ndo_open(dev);
1194
1195 if (ret)
1196 clear_bit(__LINK_STATE_START, &dev->state);
1197 else {
1198 dev->flags |= IFF_UP;
1199 net_dmaengine_get();
1200 dev_set_rx_mode(dev);
1201 dev_activate(dev);
1202 add_device_randomness(dev->dev_addr, dev->addr_len);
1203 }
1204
1205 return ret;
1206}
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220int dev_open(struct net_device *dev)
1221{
1222 int ret;
1223
1224 if (dev->flags & IFF_UP)
1225 return 0;
1226
1227 ret = __dev_open(dev);
1228 if (ret < 0)
1229 return ret;
1230
1231 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1232 call_netdevice_notifiers(NETDEV_UP, dev);
1233
1234 return ret;
1235}
1236EXPORT_SYMBOL(dev_open);
1237
1238static int __dev_close_many(struct list_head *head)
1239{
1240 struct net_device *dev;
1241
1242 ASSERT_RTNL();
1243 might_sleep();
1244
1245 list_for_each_entry(dev, head, unreg_list) {
1246 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1247
1248 clear_bit(__LINK_STATE_START, &dev->state);
1249
1250
1251
1252
1253
1254
1255
1256 smp_mb__after_clear_bit();
1257 }
1258
1259 dev_deactivate_many(head);
1260
1261 list_for_each_entry(dev, head, unreg_list) {
1262 const struct net_device_ops *ops = dev->netdev_ops;
1263
1264
1265
1266
1267
1268
1269
1270
1271 if (ops->ndo_stop)
1272 ops->ndo_stop(dev);
1273
1274 dev->flags &= ~IFF_UP;
1275 net_dmaengine_put();
1276 }
1277
1278 return 0;
1279}
1280
1281static int __dev_close(struct net_device *dev)
1282{
1283 int retval;
1284 LIST_HEAD(single);
1285
1286 list_add(&dev->unreg_list, &single);
1287 retval = __dev_close_many(&single);
1288 list_del(&single);
1289 return retval;
1290}
1291
1292static int dev_close_many(struct list_head *head)
1293{
1294 struct net_device *dev, *tmp;
1295 LIST_HEAD(tmp_list);
1296
1297 list_for_each_entry_safe(dev, tmp, head, unreg_list)
1298 if (!(dev->flags & IFF_UP))
1299 list_move(&dev->unreg_list, &tmp_list);
1300
1301 __dev_close_many(head);
1302
1303 list_for_each_entry(dev, head, unreg_list) {
1304 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1305 call_netdevice_notifiers(NETDEV_DOWN, dev);
1306 }
1307
1308
1309 list_splice(&tmp_list, head);
1310 return 0;
1311}
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322int dev_close(struct net_device *dev)
1323{
1324 if (dev->flags & IFF_UP) {
1325 LIST_HEAD(single);
1326
1327 list_add(&dev->unreg_list, &single);
1328 dev_close_many(&single);
1329 list_del(&single);
1330 }
1331 return 0;
1332}
1333EXPORT_SYMBOL(dev_close);
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344void dev_disable_lro(struct net_device *dev)
1345{
1346
1347
1348
1349
1350 if (is_vlan_dev(dev))
1351 dev = vlan_dev_real_dev(dev);
1352
1353 dev->wanted_features &= ~NETIF_F_LRO;
1354 netdev_update_features(dev);
1355
1356 if (unlikely(dev->features & NETIF_F_LRO))
1357 netdev_WARN(dev, "failed to disable LRO!\n");
1358}
1359EXPORT_SYMBOL(dev_disable_lro);
1360
1361
1362static int dev_boot_phase = 1;
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378int register_netdevice_notifier(struct notifier_block *nb)
1379{
1380 struct net_device *dev;
1381 struct net_device *last;
1382 struct net *net;
1383 int err;
1384
1385 rtnl_lock();
1386 err = raw_notifier_chain_register(&netdev_chain, nb);
1387 if (err)
1388 goto unlock;
1389 if (dev_boot_phase)
1390 goto unlock;
1391 for_each_net(net) {
1392 for_each_netdev(net, dev) {
1393 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1394 err = notifier_to_errno(err);
1395 if (err)
1396 goto rollback;
1397
1398 if (!(dev->flags & IFF_UP))
1399 continue;
1400
1401 nb->notifier_call(nb, NETDEV_UP, dev);
1402 }
1403 }
1404
1405unlock:
1406 rtnl_unlock();
1407 return err;
1408
1409rollback:
1410 last = dev;
1411 for_each_net(net) {
1412 for_each_netdev(net, dev) {
1413 if (dev == last)
1414 goto outroll;
1415
1416 if (dev->flags & IFF_UP) {
1417 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1418 nb->notifier_call(nb, NETDEV_DOWN, dev);
1419 }
1420 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1421 }
1422 }
1423
1424outroll:
1425 raw_notifier_chain_unregister(&netdev_chain, nb);
1426 goto unlock;
1427}
1428EXPORT_SYMBOL(register_netdevice_notifier);
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444int unregister_netdevice_notifier(struct notifier_block *nb)
1445{
1446 struct net_device *dev;
1447 struct net *net;
1448 int err;
1449
1450 rtnl_lock();
1451 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1452 if (err)
1453 goto unlock;
1454
1455 for_each_net(net) {
1456 for_each_netdev(net, dev) {
1457 if (dev->flags & IFF_UP) {
1458 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1459 nb->notifier_call(nb, NETDEV_DOWN, dev);
1460 }
1461 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1462 }
1463 }
1464unlock:
1465 rtnl_unlock();
1466 return err;
1467}
1468EXPORT_SYMBOL(unregister_netdevice_notifier);
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1480{
1481 ASSERT_RTNL();
1482 return raw_notifier_call_chain(&netdev_chain, val, dev);
1483}
1484EXPORT_SYMBOL(call_netdevice_notifiers);
1485
1486static struct static_key netstamp_needed __read_mostly;
1487#ifdef HAVE_JUMP_LABEL
1488
1489
1490
1491
1492static atomic_t netstamp_needed_deferred;
1493#endif
1494
1495void net_enable_timestamp(void)
1496{
1497#ifdef HAVE_JUMP_LABEL
1498 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1499
1500 if (deferred) {
1501 while (--deferred)
1502 static_key_slow_dec(&netstamp_needed);
1503 return;
1504 }
1505#endif
1506 WARN_ON(in_interrupt());
1507 static_key_slow_inc(&netstamp_needed);
1508}
1509EXPORT_SYMBOL(net_enable_timestamp);
1510
1511void net_disable_timestamp(void)
1512{
1513#ifdef HAVE_JUMP_LABEL
1514 if (in_interrupt()) {
1515 atomic_inc(&netstamp_needed_deferred);
1516 return;
1517 }
1518#endif
1519 static_key_slow_dec(&netstamp_needed);
1520}
1521EXPORT_SYMBOL(net_disable_timestamp);
1522
1523static inline void net_timestamp_set(struct sk_buff *skb)
1524{
1525 skb->tstamp.tv64 = 0;
1526 if (static_key_false(&netstamp_needed))
1527 __net_timestamp(skb);
1528}
1529
1530#define net_timestamp_check(COND, SKB) \
1531 if (static_key_false(&netstamp_needed)) { \
1532 if ((COND) && !(SKB)->tstamp.tv64) \
1533 __net_timestamp(SKB); \
1534 } \
1535
1536static int net_hwtstamp_validate(struct ifreq *ifr)
1537{
1538 struct hwtstamp_config cfg;
1539 enum hwtstamp_tx_types tx_type;
1540 enum hwtstamp_rx_filters rx_filter;
1541 int tx_type_valid = 0;
1542 int rx_filter_valid = 0;
1543
1544 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1545 return -EFAULT;
1546
1547 if (cfg.flags)
1548 return -EINVAL;
1549
1550 tx_type = cfg.tx_type;
1551 rx_filter = cfg.rx_filter;
1552
1553 switch (tx_type) {
1554 case HWTSTAMP_TX_OFF:
1555 case HWTSTAMP_TX_ON:
1556 case HWTSTAMP_TX_ONESTEP_SYNC:
1557 tx_type_valid = 1;
1558 break;
1559 }
1560
1561 switch (rx_filter) {
1562 case HWTSTAMP_FILTER_NONE:
1563 case HWTSTAMP_FILTER_ALL:
1564 case HWTSTAMP_FILTER_SOME:
1565 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1566 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1567 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1568 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1569 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1570 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1571 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1572 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1573 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1574 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1575 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1576 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1577 rx_filter_valid = 1;
1578 break;
1579 }
1580
1581 if (!tx_type_valid || !rx_filter_valid)
1582 return -ERANGE;
1583
1584 return 0;
1585}
1586
1587static inline bool is_skb_forwardable(struct net_device *dev,
1588 struct sk_buff *skb)
1589{
1590 unsigned int len;
1591
1592 if (!(dev->flags & IFF_UP))
1593 return false;
1594
1595 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1596 if (skb->len <= len)
1597 return true;
1598
1599
1600
1601
1602 if (skb_is_gso(skb))
1603 return true;
1604
1605 return false;
1606}
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1627{
1628 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1629 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1630 atomic_long_inc(&dev->rx_dropped);
1631 kfree_skb(skb);
1632 return NET_RX_DROP;
1633 }
1634 }
1635
1636 skb_orphan(skb);
1637 nf_reset(skb);
1638
1639 if (unlikely(!is_skb_forwardable(dev, skb))) {
1640 atomic_long_inc(&dev->rx_dropped);
1641 kfree_skb(skb);
1642 return NET_RX_DROP;
1643 }
1644 skb->skb_iif = 0;
1645 skb->dev = dev;
1646 skb_dst_drop(skb);
1647 skb->tstamp.tv64 = 0;
1648 skb->pkt_type = PACKET_HOST;
1649 skb->protocol = eth_type_trans(skb, dev);
1650 skb->mark = 0;
1651 secpath_reset(skb);
1652 nf_reset(skb);
1653 return netif_rx(skb);
1654}
1655EXPORT_SYMBOL_GPL(dev_forward_skb);
1656
1657static inline int deliver_skb(struct sk_buff *skb,
1658 struct packet_type *pt_prev,
1659 struct net_device *orig_dev)
1660{
1661 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1662 return -ENOMEM;
1663 atomic_inc(&skb->users);
1664 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1665}
1666
1667static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1668{
1669 if (!ptype->af_packet_priv || !skb->sk)
1670 return false;
1671
1672 if (ptype->id_match)
1673 return ptype->id_match(ptype, skb->sk);
1674 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1675 return true;
1676
1677 return false;
1678}
1679
1680
1681
1682
1683
1684
1685static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1686{
1687 struct packet_type *ptype;
1688 struct sk_buff *skb2 = NULL;
1689 struct packet_type *pt_prev = NULL;
1690
1691 rcu_read_lock();
1692 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1693
1694
1695
1696 if ((ptype->dev == dev || !ptype->dev) &&
1697 (!skb_loop_sk(ptype, skb))) {
1698 if (pt_prev) {
1699 deliver_skb(skb2, pt_prev, skb->dev);
1700 pt_prev = ptype;
1701 continue;
1702 }
1703
1704 skb2 = skb_clone(skb, GFP_ATOMIC);
1705 if (!skb2)
1706 break;
1707
1708 net_timestamp_set(skb2);
1709
1710
1711
1712
1713
1714 skb_reset_mac_header(skb2);
1715
1716 if (skb_network_header(skb2) < skb2->data ||
1717 skb2->network_header > skb2->tail) {
1718 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1719 ntohs(skb2->protocol),
1720 dev->name);
1721 skb_reset_network_header(skb2);
1722 }
1723
1724 skb2->transport_header = skb2->network_header;
1725 skb2->pkt_type = PACKET_OUTGOING;
1726 pt_prev = ptype;
1727 }
1728 }
1729 if (pt_prev)
1730 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1731 rcu_read_unlock();
1732}
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1748{
1749 int i;
1750 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1751
1752
1753 if (tc->offset + tc->count > txq) {
1754 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1755 dev->num_tc = 0;
1756 return;
1757 }
1758
1759
1760 for (i = 1; i < TC_BITMASK + 1; i++) {
1761 int q = netdev_get_prio_tc_map(dev, i);
1762
1763 tc = &dev->tc_to_txq[q];
1764 if (tc->offset + tc->count > txq) {
1765 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1766 i, q);
1767 netdev_set_prio_tc_map(dev, i, 0);
1768 }
1769 }
1770}
1771
1772
1773
1774
1775
1776int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1777{
1778 int rc;
1779
1780 if (txq < 1 || txq > dev->num_tx_queues)
1781 return -EINVAL;
1782
1783 if (dev->reg_state == NETREG_REGISTERED ||
1784 dev->reg_state == NETREG_UNREGISTERING) {
1785 ASSERT_RTNL();
1786
1787 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1788 txq);
1789 if (rc)
1790 return rc;
1791
1792 if (dev->num_tc)
1793 netif_setup_tc(dev, txq);
1794
1795 if (txq < dev->real_num_tx_queues)
1796 qdisc_reset_all_tx_gt(dev, txq);
1797 }
1798
1799 dev->real_num_tx_queues = txq;
1800 return 0;
1801}
1802EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1803
1804#ifdef CONFIG_RPS
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1816{
1817 int rc;
1818
1819 if (rxq < 1 || rxq > dev->num_rx_queues)
1820 return -EINVAL;
1821
1822 if (dev->reg_state == NETREG_REGISTERED) {
1823 ASSERT_RTNL();
1824
1825 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1826 rxq);
1827 if (rc)
1828 return rc;
1829 }
1830
1831 dev->real_num_rx_queues = rxq;
1832 return 0;
1833}
1834EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1835#endif
1836
1837
1838
1839
1840
1841
1842
1843int netif_get_num_default_rss_queues(void)
1844{
1845 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
1846}
1847EXPORT_SYMBOL(netif_get_num_default_rss_queues);
1848
1849static inline void __netif_reschedule(struct Qdisc *q)
1850{
1851 struct softnet_data *sd;
1852 unsigned long flags;
1853
1854 local_irq_save(flags);
1855 sd = &__get_cpu_var(softnet_data);
1856 q->next_sched = NULL;
1857 *sd->output_queue_tailp = q;
1858 sd->output_queue_tailp = &q->next_sched;
1859 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1860 local_irq_restore(flags);
1861}
1862
1863void __netif_schedule(struct Qdisc *q)
1864{
1865 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1866 __netif_reschedule(q);
1867}
1868EXPORT_SYMBOL(__netif_schedule);
1869
1870void dev_kfree_skb_irq(struct sk_buff *skb)
1871{
1872 if (atomic_dec_and_test(&skb->users)) {
1873 struct softnet_data *sd;
1874 unsigned long flags;
1875
1876 local_irq_save(flags);
1877 sd = &__get_cpu_var(softnet_data);
1878 skb->next = sd->completion_queue;
1879 sd->completion_queue = skb;
1880 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1881 local_irq_restore(flags);
1882 }
1883}
1884EXPORT_SYMBOL(dev_kfree_skb_irq);
1885
1886void dev_kfree_skb_any(struct sk_buff *skb)
1887{
1888 if (in_irq() || irqs_disabled())
1889 dev_kfree_skb_irq(skb);
1890 else
1891 dev_kfree_skb(skb);
1892}
1893EXPORT_SYMBOL(dev_kfree_skb_any);
1894
1895
1896
1897
1898
1899
1900
1901
1902void netif_device_detach(struct net_device *dev)
1903{
1904 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1905 netif_running(dev)) {
1906 netif_tx_stop_all_queues(dev);
1907 }
1908}
1909EXPORT_SYMBOL(netif_device_detach);
1910
1911
1912
1913
1914
1915
1916
1917void netif_device_attach(struct net_device *dev)
1918{
1919 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1920 netif_running(dev)) {
1921 netif_tx_wake_all_queues(dev);
1922 __netdev_watchdog_up(dev);
1923 }
1924}
1925EXPORT_SYMBOL(netif_device_attach);
1926
1927static void skb_warn_bad_offload(const struct sk_buff *skb)
1928{
1929 static const netdev_features_t null_features = 0;
1930 struct net_device *dev = skb->dev;
1931 const char *driver = "";
1932
1933 if (dev && dev->dev.parent)
1934 driver = dev_driver_string(dev->dev.parent);
1935
1936 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
1937 "gso_type=%d ip_summed=%d\n",
1938 driver, dev ? &dev->features : &null_features,
1939 skb->sk ? &skb->sk->sk_route_caps : &null_features,
1940 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
1941 skb_shinfo(skb)->gso_type, skb->ip_summed);
1942}
1943
1944
1945
1946
1947
1948int skb_checksum_help(struct sk_buff *skb)
1949{
1950 __wsum csum;
1951 int ret = 0, offset;
1952
1953 if (skb->ip_summed == CHECKSUM_COMPLETE)
1954 goto out_set_summed;
1955
1956 if (unlikely(skb_shinfo(skb)->gso_size)) {
1957 skb_warn_bad_offload(skb);
1958 return -EINVAL;
1959 }
1960
1961 offset = skb_checksum_start_offset(skb);
1962 BUG_ON(offset >= skb_headlen(skb));
1963 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1964
1965 offset += skb->csum_offset;
1966 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1967
1968 if (skb_cloned(skb) &&
1969 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1970 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1971 if (ret)
1972 goto out;
1973 }
1974
1975 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1976out_set_summed:
1977 skb->ip_summed = CHECKSUM_NONE;
1978out:
1979 return ret;
1980}
1981EXPORT_SYMBOL(skb_checksum_help);
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1994 netdev_features_t features)
1995{
1996 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1997 struct packet_type *ptype;
1998 __be16 type = skb->protocol;
1999 int vlan_depth = ETH_HLEN;
2000 int err;
2001
2002 while (type == htons(ETH_P_8021Q)) {
2003 struct vlan_hdr *vh;
2004
2005 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2006 return ERR_PTR(-EINVAL);
2007
2008 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2009 type = vh->h_vlan_encapsulated_proto;
2010 vlan_depth += VLAN_HLEN;
2011 }
2012
2013 skb_reset_mac_header(skb);
2014 skb->mac_len = skb->network_header - skb->mac_header;
2015 __skb_pull(skb, skb->mac_len);
2016
2017 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2018 skb_warn_bad_offload(skb);
2019
2020 if (skb_header_cloned(skb) &&
2021 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2022 return ERR_PTR(err);
2023 }
2024
2025 rcu_read_lock();
2026 list_for_each_entry_rcu(ptype,
2027 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2028 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
2029 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2030 err = ptype->gso_send_check(skb);
2031 segs = ERR_PTR(err);
2032 if (err || skb_gso_ok(skb, features))
2033 break;
2034 __skb_push(skb, (skb->data -
2035 skb_network_header(skb)));
2036 }
2037 segs = ptype->gso_segment(skb, features);
2038 break;
2039 }
2040 }
2041 rcu_read_unlock();
2042
2043 __skb_push(skb, skb->data - skb_mac_header(skb));
2044
2045 return segs;
2046}
2047EXPORT_SYMBOL(skb_gso_segment);
2048
2049
2050#ifdef CONFIG_BUG
2051void netdev_rx_csum_fault(struct net_device *dev)
2052{
2053 if (net_ratelimit()) {
2054 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2055 dump_stack();
2056 }
2057}
2058EXPORT_SYMBOL(netdev_rx_csum_fault);
2059#endif
2060
2061
2062
2063
2064
2065
2066static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2067{
2068#ifdef CONFIG_HIGHMEM
2069 int i;
2070 if (!(dev->features & NETIF_F_HIGHDMA)) {
2071 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2072 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2073 if (PageHighMem(skb_frag_page(frag)))
2074 return 1;
2075 }
2076 }
2077
2078 if (PCI_DMA_BUS_IS_PHYS) {
2079 struct device *pdev = dev->dev.parent;
2080
2081 if (!pdev)
2082 return 0;
2083 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2084 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2085 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2086 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2087 return 1;
2088 }
2089 }
2090#endif
2091 return 0;
2092}
2093
2094struct dev_gso_cb {
2095 void (*destructor)(struct sk_buff *skb);
2096};
2097
2098#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2099
2100static void dev_gso_skb_destructor(struct sk_buff *skb)
2101{
2102 struct dev_gso_cb *cb;
2103
2104 do {
2105 struct sk_buff *nskb = skb->next;
2106
2107 skb->next = nskb->next;
2108 nskb->next = NULL;
2109 kfree_skb(nskb);
2110 } while (skb->next);
2111
2112 cb = DEV_GSO_CB(skb);
2113 if (cb->destructor)
2114 cb->destructor(skb);
2115}
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2126{
2127 struct sk_buff *segs;
2128
2129 segs = skb_gso_segment(skb, features);
2130
2131
2132 if (!segs)
2133 return 0;
2134
2135 if (IS_ERR(segs))
2136 return PTR_ERR(segs);
2137
2138 skb->next = segs;
2139 DEV_GSO_CB(skb)->destructor = skb->destructor;
2140 skb->destructor = dev_gso_skb_destructor;
2141
2142 return 0;
2143}
2144
2145static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2146{
2147 return ((features & NETIF_F_GEN_CSUM) ||
2148 ((features & NETIF_F_V4_CSUM) &&
2149 protocol == htons(ETH_P_IP)) ||
2150 ((features & NETIF_F_V6_CSUM) &&
2151 protocol == htons(ETH_P_IPV6)) ||
2152 ((features & NETIF_F_FCOE_CRC) &&
2153 protocol == htons(ETH_P_FCOE)));
2154}
2155
2156static netdev_features_t harmonize_features(struct sk_buff *skb,
2157 __be16 protocol, netdev_features_t features)
2158{
2159 if (skb->ip_summed != CHECKSUM_NONE &&
2160 !can_checksum_protocol(features, protocol)) {
2161 features &= ~NETIF_F_ALL_CSUM;
2162 features &= ~NETIF_F_SG;
2163 } else if (illegal_highdma(skb->dev, skb)) {
2164 features &= ~NETIF_F_SG;
2165 }
2166
2167 return features;
2168}
2169
2170netdev_features_t netif_skb_features(struct sk_buff *skb)
2171{
2172 __be16 protocol = skb->protocol;
2173 netdev_features_t features = skb->dev->features;
2174
2175 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2176 features &= ~NETIF_F_GSO_MASK;
2177
2178 if (protocol == htons(ETH_P_8021Q)) {
2179 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2180 protocol = veh->h_vlan_encapsulated_proto;
2181 } else if (!vlan_tx_tag_present(skb)) {
2182 return harmonize_features(skb, protocol, features);
2183 }
2184
2185 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
2186
2187 if (protocol != htons(ETH_P_8021Q)) {
2188 return harmonize_features(skb, protocol, features);
2189 } else {
2190 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2191 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
2192 return harmonize_features(skb, protocol, features);
2193 }
2194}
2195EXPORT_SYMBOL(netif_skb_features);
2196
2197
2198
2199
2200
2201
2202static inline int skb_needs_linearize(struct sk_buff *skb,
2203 int features)
2204{
2205 return skb_is_nonlinear(skb) &&
2206 ((skb_has_frag_list(skb) &&
2207 !(features & NETIF_F_FRAGLIST)) ||
2208 (skb_shinfo(skb)->nr_frags &&
2209 !(features & NETIF_F_SG)));
2210}
2211
2212int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2213 struct netdev_queue *txq)
2214{
2215 const struct net_device_ops *ops = dev->netdev_ops;
2216 int rc = NETDEV_TX_OK;
2217 unsigned int skb_len;
2218
2219 if (likely(!skb->next)) {
2220 netdev_features_t features;
2221
2222
2223
2224
2225
2226 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2227 skb_dst_drop(skb);
2228
2229 features = netif_skb_features(skb);
2230
2231 if (vlan_tx_tag_present(skb) &&
2232 !(features & NETIF_F_HW_VLAN_TX)) {
2233 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2234 if (unlikely(!skb))
2235 goto out;
2236
2237 skb->vlan_tci = 0;
2238 }
2239
2240 if (netif_needs_gso(skb, features)) {
2241 if (unlikely(dev_gso_segment(skb, features)))
2242 goto out_kfree_skb;
2243 if (skb->next)
2244 goto gso;
2245 } else {
2246 if (skb_needs_linearize(skb, features) &&
2247 __skb_linearize(skb))
2248 goto out_kfree_skb;
2249
2250
2251
2252
2253
2254 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2255 skb_set_transport_header(skb,
2256 skb_checksum_start_offset(skb));
2257 if (!(features & NETIF_F_ALL_CSUM) &&
2258 skb_checksum_help(skb))
2259 goto out_kfree_skb;
2260 }
2261 }
2262
2263 if (!list_empty(&ptype_all))
2264 dev_queue_xmit_nit(skb, dev);
2265
2266 skb_len = skb->len;
2267 rc = ops->ndo_start_xmit(skb, dev);
2268 trace_net_dev_xmit(skb, rc, dev, skb_len);
2269 if (rc == NETDEV_TX_OK)
2270 txq_trans_update(txq);
2271 return rc;
2272 }
2273
2274gso:
2275 do {
2276 struct sk_buff *nskb = skb->next;
2277
2278 skb->next = nskb->next;
2279 nskb->next = NULL;
2280
2281
2282
2283
2284
2285 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2286 skb_dst_drop(nskb);
2287
2288 if (!list_empty(&ptype_all))
2289 dev_queue_xmit_nit(nskb, dev);
2290
2291 skb_len = nskb->len;
2292 rc = ops->ndo_start_xmit(nskb, dev);
2293 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2294 if (unlikely(rc != NETDEV_TX_OK)) {
2295 if (rc & ~NETDEV_TX_MASK)
2296 goto out_kfree_gso_skb;
2297 nskb->next = skb->next;
2298 skb->next = nskb;
2299 return rc;
2300 }
2301 txq_trans_update(txq);
2302 if (unlikely(netif_xmit_stopped(txq) && skb->next))
2303 return NETDEV_TX_BUSY;
2304 } while (skb->next);
2305
2306out_kfree_gso_skb:
2307 if (likely(skb->next == NULL))
2308 skb->destructor = DEV_GSO_CB(skb)->destructor;
2309out_kfree_skb:
2310 kfree_skb(skb);
2311out:
2312 return rc;
2313}
2314
2315static u32 hashrnd __read_mostly;
2316
2317
2318
2319
2320
2321u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2322 unsigned int num_tx_queues)
2323{
2324 u32 hash;
2325 u16 qoffset = 0;
2326 u16 qcount = num_tx_queues;
2327
2328 if (skb_rx_queue_recorded(skb)) {
2329 hash = skb_get_rx_queue(skb);
2330 while (unlikely(hash >= num_tx_queues))
2331 hash -= num_tx_queues;
2332 return hash;
2333 }
2334
2335 if (dev->num_tc) {
2336 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2337 qoffset = dev->tc_to_txq[tc].offset;
2338 qcount = dev->tc_to_txq[tc].count;
2339 }
2340
2341 if (skb->sk && skb->sk->sk_hash)
2342 hash = skb->sk->sk_hash;
2343 else
2344 hash = (__force u16) skb->protocol;
2345 hash = jhash_1word(hash, hashrnd);
2346
2347 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2348}
2349EXPORT_SYMBOL(__skb_tx_hash);
2350
2351static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2352{
2353 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2354 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
2355 dev->name, queue_index,
2356 dev->real_num_tx_queues);
2357 return 0;
2358 }
2359 return queue_index;
2360}
2361
2362static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2363{
2364#ifdef CONFIG_XPS
2365 struct xps_dev_maps *dev_maps;
2366 struct xps_map *map;
2367 int queue_index = -1;
2368
2369 rcu_read_lock();
2370 dev_maps = rcu_dereference(dev->xps_maps);
2371 if (dev_maps) {
2372 map = rcu_dereference(
2373 dev_maps->cpu_map[raw_smp_processor_id()]);
2374 if (map) {
2375 if (map->len == 1)
2376 queue_index = map->queues[0];
2377 else {
2378 u32 hash;
2379 if (skb->sk && skb->sk->sk_hash)
2380 hash = skb->sk->sk_hash;
2381 else
2382 hash = (__force u16) skb->protocol ^
2383 skb->rxhash;
2384 hash = jhash_1word(hash, hashrnd);
2385 queue_index = map->queues[
2386 ((u64)hash * map->len) >> 32];
2387 }
2388 if (unlikely(queue_index >= dev->real_num_tx_queues))
2389 queue_index = -1;
2390 }
2391 }
2392 rcu_read_unlock();
2393
2394 return queue_index;
2395#else
2396 return -1;
2397#endif
2398}
2399
2400struct netdev_queue *netdev_pick_tx(struct net_device *dev,
2401 struct sk_buff *skb)
2402{
2403 int queue_index;
2404 const struct net_device_ops *ops = dev->netdev_ops;
2405
2406 if (dev->real_num_tx_queues == 1)
2407 queue_index = 0;
2408 else if (ops->ndo_select_queue) {
2409 queue_index = ops->ndo_select_queue(dev, skb);
2410 queue_index = dev_cap_txqueue(dev, queue_index);
2411 } else {
2412 struct sock *sk = skb->sk;
2413 queue_index = sk_tx_queue_get(sk);
2414
2415 if (queue_index < 0 || skb->ooo_okay ||
2416 queue_index >= dev->real_num_tx_queues) {
2417 int old_index = queue_index;
2418
2419 queue_index = get_xps_queue(dev, skb);
2420 if (queue_index < 0)
2421 queue_index = skb_tx_hash(dev, skb);
2422
2423 if (queue_index != old_index && sk) {
2424 struct dst_entry *dst =
2425 rcu_dereference_check(sk->sk_dst_cache, 1);
2426
2427 if (dst && skb_dst(skb) == dst)
2428 sk_tx_queue_set(sk, queue_index);
2429 }
2430 }
2431 }
2432
2433 skb_set_queue_mapping(skb, queue_index);
2434 return netdev_get_tx_queue(dev, queue_index);
2435}
2436
2437static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2438 struct net_device *dev,
2439 struct netdev_queue *txq)
2440{
2441 spinlock_t *root_lock = qdisc_lock(q);
2442 bool contended;
2443 int rc;
2444
2445 qdisc_skb_cb(skb)->pkt_len = skb->len;
2446 qdisc_calculate_pkt_len(skb, q);
2447
2448
2449
2450
2451
2452
2453 contended = qdisc_is_running(q);
2454 if (unlikely(contended))
2455 spin_lock(&q->busylock);
2456
2457 spin_lock(root_lock);
2458 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2459 kfree_skb(skb);
2460 rc = NET_XMIT_DROP;
2461 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2462 qdisc_run_begin(q)) {
2463
2464
2465
2466
2467
2468 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2469 skb_dst_force(skb);
2470
2471 qdisc_bstats_update(q, skb);
2472
2473 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2474 if (unlikely(contended)) {
2475 spin_unlock(&q->busylock);
2476 contended = false;
2477 }
2478 __qdisc_run(q);
2479 } else
2480 qdisc_run_end(q);
2481
2482 rc = NET_XMIT_SUCCESS;
2483 } else {
2484 skb_dst_force(skb);
2485 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2486 if (qdisc_run_begin(q)) {
2487 if (unlikely(contended)) {
2488 spin_unlock(&q->busylock);
2489 contended = false;
2490 }
2491 __qdisc_run(q);
2492 }
2493 }
2494 spin_unlock(root_lock);
2495 if (unlikely(contended))
2496 spin_unlock(&q->busylock);
2497 return rc;
2498}
2499
2500#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2501static void skb_update_prio(struct sk_buff *skb)
2502{
2503 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2504
2505 if (!skb->priority && skb->sk && map) {
2506 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2507
2508 if (prioidx < map->priomap_len)
2509 skb->priority = map->priomap[prioidx];
2510 }
2511}
2512#else
2513#define skb_update_prio(skb)
2514#endif
2515
2516static DEFINE_PER_CPU(int, xmit_recursion);
2517#define RECURSION_LIMIT 10
2518
2519
2520
2521
2522
2523int dev_loopback_xmit(struct sk_buff *skb)
2524{
2525 skb_reset_mac_header(skb);
2526 __skb_pull(skb, skb_network_offset(skb));
2527 skb->pkt_type = PACKET_LOOPBACK;
2528 skb->ip_summed = CHECKSUM_UNNECESSARY;
2529 WARN_ON(!skb_dst(skb));
2530 skb_dst_force(skb);
2531 netif_rx_ni(skb);
2532 return 0;
2533}
2534EXPORT_SYMBOL(dev_loopback_xmit);
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561int dev_queue_xmit(struct sk_buff *skb)
2562{
2563 struct net_device *dev = skb->dev;
2564 struct netdev_queue *txq;
2565 struct Qdisc *q;
2566 int rc = -ENOMEM;
2567
2568
2569
2570
2571 rcu_read_lock_bh();
2572
2573 skb_update_prio(skb);
2574
2575 txq = netdev_pick_tx(dev, skb);
2576 q = rcu_dereference_bh(txq->qdisc);
2577
2578#ifdef CONFIG_NET_CLS_ACT
2579 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2580#endif
2581 trace_net_dev_queue(skb);
2582 if (q->enqueue) {
2583 rc = __dev_xmit_skb(skb, q, dev, txq);
2584 goto out;
2585 }
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599 if (dev->flags & IFF_UP) {
2600 int cpu = smp_processor_id();
2601
2602 if (txq->xmit_lock_owner != cpu) {
2603
2604 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2605 goto recursion_alert;
2606
2607 HARD_TX_LOCK(dev, txq, cpu);
2608
2609 if (!netif_xmit_stopped(txq)) {
2610 __this_cpu_inc(xmit_recursion);
2611 rc = dev_hard_start_xmit(skb, dev, txq);
2612 __this_cpu_dec(xmit_recursion);
2613 if (dev_xmit_complete(rc)) {
2614 HARD_TX_UNLOCK(dev, txq);
2615 goto out;
2616 }
2617 }
2618 HARD_TX_UNLOCK(dev, txq);
2619 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2620 dev->name);
2621 } else {
2622
2623
2624
2625recursion_alert:
2626 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2627 dev->name);
2628 }
2629 }
2630
2631 rc = -ENETDOWN;
2632 rcu_read_unlock_bh();
2633
2634 kfree_skb(skb);
2635 return rc;
2636out:
2637 rcu_read_unlock_bh();
2638 return rc;
2639}
2640EXPORT_SYMBOL(dev_queue_xmit);
2641
2642
2643
2644
2645
2646
2647int netdev_max_backlog __read_mostly = 1000;
2648EXPORT_SYMBOL(netdev_max_backlog);
2649
2650int netdev_tstamp_prequeue __read_mostly = 1;
2651int netdev_budget __read_mostly = 300;
2652int weight_p __read_mostly = 64;
2653
2654
2655static inline void ____napi_schedule(struct softnet_data *sd,
2656 struct napi_struct *napi)
2657{
2658 list_add_tail(&napi->poll_list, &sd->poll_list);
2659 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2660}
2661
2662
2663
2664
2665
2666
2667
2668void __skb_get_rxhash(struct sk_buff *skb)
2669{
2670 struct flow_keys keys;
2671 u32 hash;
2672
2673 if (!skb_flow_dissect(skb, &keys))
2674 return;
2675
2676 if (keys.ports)
2677 skb->l4_rxhash = 1;
2678
2679
2680 if (((__force u32)keys.dst < (__force u32)keys.src) ||
2681 (((__force u32)keys.dst == (__force u32)keys.src) &&
2682 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
2683 swap(keys.dst, keys.src);
2684 swap(keys.port16[0], keys.port16[1]);
2685 }
2686
2687 hash = jhash_3words((__force u32)keys.dst,
2688 (__force u32)keys.src,
2689 (__force u32)keys.ports, hashrnd);
2690 if (!hash)
2691 hash = 1;
2692
2693 skb->rxhash = hash;
2694}
2695EXPORT_SYMBOL(__skb_get_rxhash);
2696
2697#ifdef CONFIG_RPS
2698
2699
2700struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2701EXPORT_SYMBOL(rps_sock_flow_table);
2702
2703struct static_key rps_needed __read_mostly;
2704
2705static struct rps_dev_flow *
2706set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2707 struct rps_dev_flow *rflow, u16 next_cpu)
2708{
2709 if (next_cpu != RPS_NO_CPU) {
2710#ifdef CONFIG_RFS_ACCEL
2711 struct netdev_rx_queue *rxqueue;
2712 struct rps_dev_flow_table *flow_table;
2713 struct rps_dev_flow *old_rflow;
2714 u32 flow_id;
2715 u16 rxq_index;
2716 int rc;
2717
2718
2719 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2720 !(dev->features & NETIF_F_NTUPLE))
2721 goto out;
2722 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2723 if (rxq_index == skb_get_rx_queue(skb))
2724 goto out;
2725
2726 rxqueue = dev->_rx + rxq_index;
2727 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2728 if (!flow_table)
2729 goto out;
2730 flow_id = skb->rxhash & flow_table->mask;
2731 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2732 rxq_index, flow_id);
2733 if (rc < 0)
2734 goto out;
2735 old_rflow = rflow;
2736 rflow = &flow_table->flows[flow_id];
2737 rflow->filter = rc;
2738 if (old_rflow->filter == rflow->filter)
2739 old_rflow->filter = RPS_NO_FILTER;
2740 out:
2741#endif
2742 rflow->last_qtail =
2743 per_cpu(softnet_data, next_cpu).input_queue_head;
2744 }
2745
2746 rflow->cpu = next_cpu;
2747 return rflow;
2748}
2749
2750
2751
2752
2753
2754
2755static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2756 struct rps_dev_flow **rflowp)
2757{
2758 struct netdev_rx_queue *rxqueue;
2759 struct rps_map *map;
2760 struct rps_dev_flow_table *flow_table;
2761 struct rps_sock_flow_table *sock_flow_table;
2762 int cpu = -1;
2763 u16 tcpu;
2764
2765 if (skb_rx_queue_recorded(skb)) {
2766 u16 index = skb_get_rx_queue(skb);
2767 if (unlikely(index >= dev->real_num_rx_queues)) {
2768 WARN_ONCE(dev->real_num_rx_queues > 1,
2769 "%s received packet on queue %u, but number "
2770 "of RX queues is %u\n",
2771 dev->name, index, dev->real_num_rx_queues);
2772 goto done;
2773 }
2774 rxqueue = dev->_rx + index;
2775 } else
2776 rxqueue = dev->_rx;
2777
2778 map = rcu_dereference(rxqueue->rps_map);
2779 if (map) {
2780 if (map->len == 1 &&
2781 !rcu_access_pointer(rxqueue->rps_flow_table)) {
2782 tcpu = map->cpus[0];
2783 if (cpu_online(tcpu))
2784 cpu = tcpu;
2785 goto done;
2786 }
2787 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2788 goto done;
2789 }
2790
2791 skb_reset_network_header(skb);
2792 if (!skb_get_rxhash(skb))
2793 goto done;
2794
2795 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2796 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2797 if (flow_table && sock_flow_table) {
2798 u16 next_cpu;
2799 struct rps_dev_flow *rflow;
2800
2801 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2802 tcpu = rflow->cpu;
2803
2804 next_cpu = sock_flow_table->ents[skb->rxhash &
2805 sock_flow_table->mask];
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818 if (unlikely(tcpu != next_cpu) &&
2819 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2820 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2821 rflow->last_qtail)) >= 0)) {
2822 tcpu = next_cpu;
2823 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2824 }
2825
2826 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2827 *rflowp = rflow;
2828 cpu = tcpu;
2829 goto done;
2830 }
2831 }
2832
2833 if (map) {
2834 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2835
2836 if (cpu_online(tcpu)) {
2837 cpu = tcpu;
2838 goto done;
2839 }
2840 }
2841
2842done:
2843 return cpu;
2844}
2845
2846#ifdef CONFIG_RFS_ACCEL
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2860 u32 flow_id, u16 filter_id)
2861{
2862 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2863 struct rps_dev_flow_table *flow_table;
2864 struct rps_dev_flow *rflow;
2865 bool expire = true;
2866 int cpu;
2867
2868 rcu_read_lock();
2869 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2870 if (flow_table && flow_id <= flow_table->mask) {
2871 rflow = &flow_table->flows[flow_id];
2872 cpu = ACCESS_ONCE(rflow->cpu);
2873 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2874 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2875 rflow->last_qtail) <
2876 (int)(10 * flow_table->mask)))
2877 expire = false;
2878 }
2879 rcu_read_unlock();
2880 return expire;
2881}
2882EXPORT_SYMBOL(rps_may_expire_flow);
2883
2884#endif
2885
2886
2887static void rps_trigger_softirq(void *data)
2888{
2889 struct softnet_data *sd = data;
2890
2891 ____napi_schedule(sd, &sd->backlog);
2892 sd->received_rps++;
2893}
2894
2895#endif
2896
2897
2898
2899
2900
2901
2902static int rps_ipi_queued(struct softnet_data *sd)
2903{
2904#ifdef CONFIG_RPS
2905 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2906
2907 if (sd != mysd) {
2908 sd->rps_ipi_next = mysd->rps_ipi_list;
2909 mysd->rps_ipi_list = sd;
2910
2911 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2912 return 1;
2913 }
2914#endif
2915 return 0;
2916}
2917
2918
2919
2920
2921
2922static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2923 unsigned int *qtail)
2924{
2925 struct softnet_data *sd;
2926 unsigned long flags;
2927
2928 sd = &per_cpu(softnet_data, cpu);
2929
2930 local_irq_save(flags);
2931
2932 rps_lock(sd);
2933 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2934 if (skb_queue_len(&sd->input_pkt_queue)) {
2935enqueue:
2936 __skb_queue_tail(&sd->input_pkt_queue, skb);
2937 input_queue_tail_incr_save(sd, qtail);
2938 rps_unlock(sd);
2939 local_irq_restore(flags);
2940 return NET_RX_SUCCESS;
2941 }
2942
2943
2944
2945
2946 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2947 if (!rps_ipi_queued(sd))
2948 ____napi_schedule(sd, &sd->backlog);
2949 }
2950 goto enqueue;
2951 }
2952
2953 sd->dropped++;
2954 rps_unlock(sd);
2955
2956 local_irq_restore(flags);
2957
2958 atomic_long_inc(&skb->dev->rx_dropped);
2959 kfree_skb(skb);
2960 return NET_RX_DROP;
2961}
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978int netif_rx(struct sk_buff *skb)
2979{
2980 int ret;
2981
2982
2983 if (netpoll_rx(skb))
2984 return NET_RX_DROP;
2985
2986 net_timestamp_check(netdev_tstamp_prequeue, skb);
2987
2988 trace_netif_rx(skb);
2989#ifdef CONFIG_RPS
2990 if (static_key_false(&rps_needed)) {
2991 struct rps_dev_flow voidflow, *rflow = &voidflow;
2992 int cpu;
2993
2994 preempt_disable();
2995 rcu_read_lock();
2996
2997 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2998 if (cpu < 0)
2999 cpu = smp_processor_id();
3000
3001 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3002
3003 rcu_read_unlock();
3004 preempt_enable();
3005 } else
3006#endif
3007 {
3008 unsigned int qtail;
3009 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3010 put_cpu();
3011 }
3012 return ret;
3013}
3014EXPORT_SYMBOL(netif_rx);
3015
3016int netif_rx_ni(struct sk_buff *skb)
3017{
3018 int err;
3019
3020 preempt_disable();
3021 err = netif_rx(skb);
3022 if (local_softirq_pending())
3023 do_softirq();
3024 preempt_enable();
3025
3026 return err;
3027}
3028EXPORT_SYMBOL(netif_rx_ni);
3029
3030static void net_tx_action(struct softirq_action *h)
3031{
3032 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3033
3034 if (sd->completion_queue) {
3035 struct sk_buff *clist;
3036
3037 local_irq_disable();
3038 clist = sd->completion_queue;
3039 sd->completion_queue = NULL;
3040 local_irq_enable();
3041
3042 while (clist) {
3043 struct sk_buff *skb = clist;
3044 clist = clist->next;
3045
3046 WARN_ON(atomic_read(&skb->users));
3047 trace_kfree_skb(skb, net_tx_action);
3048 __kfree_skb(skb);
3049 }
3050 }
3051
3052 if (sd->output_queue) {
3053 struct Qdisc *head;
3054
3055 local_irq_disable();
3056 head = sd->output_queue;
3057 sd->output_queue = NULL;
3058 sd->output_queue_tailp = &sd->output_queue;
3059 local_irq_enable();
3060
3061 while (head) {
3062 struct Qdisc *q = head;
3063 spinlock_t *root_lock;
3064
3065 head = head->next_sched;
3066
3067 root_lock = qdisc_lock(q);
3068 if (spin_trylock(root_lock)) {
3069 smp_mb__before_clear_bit();
3070 clear_bit(__QDISC_STATE_SCHED,
3071 &q->state);
3072 qdisc_run(q);
3073 spin_unlock(root_lock);
3074 } else {
3075 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3076 &q->state)) {
3077 __netif_reschedule(q);
3078 } else {
3079 smp_mb__before_clear_bit();
3080 clear_bit(__QDISC_STATE_SCHED,
3081 &q->state);
3082 }
3083 }
3084 }
3085 }
3086}
3087
3088#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3089 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3090
3091int (*br_fdb_test_addr_hook)(struct net_device *dev,
3092 unsigned char *addr) __read_mostly;
3093EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3094#endif
3095
3096#ifdef CONFIG_NET_CLS_ACT
3097
3098
3099
3100
3101
3102
3103
3104
3105static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3106{
3107 struct net_device *dev = skb->dev;
3108 u32 ttl = G_TC_RTTL(skb->tc_verd);
3109 int result = TC_ACT_OK;
3110 struct Qdisc *q;
3111
3112 if (unlikely(MAX_RED_LOOP < ttl++)) {
3113 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3114 skb->skb_iif, dev->ifindex);
3115 return TC_ACT_SHOT;
3116 }
3117
3118 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3119 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3120
3121 q = rxq->qdisc;
3122 if (q != &noop_qdisc) {
3123 spin_lock(qdisc_lock(q));
3124 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3125 result = qdisc_enqueue_root(skb, q);
3126 spin_unlock(qdisc_lock(q));
3127 }
3128
3129 return result;
3130}
3131
3132static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3133 struct packet_type **pt_prev,
3134 int *ret, struct net_device *orig_dev)
3135{
3136 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3137
3138 if (!rxq || rxq->qdisc == &noop_qdisc)
3139 goto out;
3140
3141 if (*pt_prev) {
3142 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3143 *pt_prev = NULL;
3144 }
3145
3146 switch (ing_filter(skb, rxq)) {
3147 case TC_ACT_SHOT:
3148 case TC_ACT_STOLEN:
3149 kfree_skb(skb);
3150 return NULL;
3151 }
3152
3153out:
3154 skb->tc_verd = 0;
3155 return skb;
3156}
3157#endif
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173int netdev_rx_handler_register(struct net_device *dev,
3174 rx_handler_func_t *rx_handler,
3175 void *rx_handler_data)
3176{
3177 ASSERT_RTNL();
3178
3179 if (dev->rx_handler)
3180 return -EBUSY;
3181
3182 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3183 rcu_assign_pointer(dev->rx_handler, rx_handler);
3184
3185 return 0;
3186}
3187EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197void netdev_rx_handler_unregister(struct net_device *dev)
3198{
3199
3200 ASSERT_RTNL();
3201 RCU_INIT_POINTER(dev->rx_handler, NULL);
3202 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3203}
3204EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3205
3206
3207
3208
3209
3210static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3211{
3212 switch (skb->protocol) {
3213 case __constant_htons(ETH_P_ARP):
3214 case __constant_htons(ETH_P_IP):
3215 case __constant_htons(ETH_P_IPV6):
3216 case __constant_htons(ETH_P_8021Q):
3217 return true;
3218 default:
3219 return false;
3220 }
3221}
3222
3223static int __netif_receive_skb(struct sk_buff *skb)
3224{
3225 struct packet_type *ptype, *pt_prev;
3226 rx_handler_func_t *rx_handler;
3227 struct net_device *orig_dev;
3228 struct net_device *null_or_dev;
3229 bool deliver_exact = false;
3230 int ret = NET_RX_DROP;
3231 __be16 type;
3232 unsigned long pflags = current->flags;
3233
3234 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3235
3236 trace_netif_receive_skb(skb);
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3248 current->flags |= PF_MEMALLOC;
3249
3250
3251 if (netpoll_receive_skb(skb))
3252 goto out;
3253
3254 orig_dev = skb->dev;
3255
3256 skb_reset_network_header(skb);
3257 skb_reset_transport_header(skb);
3258 skb_reset_mac_len(skb);
3259
3260 pt_prev = NULL;
3261
3262 rcu_read_lock();
3263
3264another_round:
3265 skb->skb_iif = skb->dev->ifindex;
3266
3267 __this_cpu_inc(softnet_data.processed);
3268
3269 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3270 skb = vlan_untag(skb);
3271 if (unlikely(!skb))
3272 goto unlock;
3273 }
3274
3275#ifdef CONFIG_NET_CLS_ACT
3276 if (skb->tc_verd & TC_NCLS) {
3277 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3278 goto ncls;
3279 }
3280#endif
3281
3282 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3283 goto skip_taps;
3284
3285 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3286 if (!ptype->dev || ptype->dev == skb->dev) {
3287 if (pt_prev)
3288 ret = deliver_skb(skb, pt_prev, orig_dev);
3289 pt_prev = ptype;
3290 }
3291 }
3292
3293skip_taps:
3294#ifdef CONFIG_NET_CLS_ACT
3295 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3296 if (!skb)
3297 goto unlock;
3298ncls:
3299#endif
3300
3301 if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3302 && !skb_pfmemalloc_protocol(skb))
3303 goto drop;
3304
3305 if (vlan_tx_tag_present(skb)) {
3306 if (pt_prev) {
3307 ret = deliver_skb(skb, pt_prev, orig_dev);
3308 pt_prev = NULL;
3309 }
3310 if (vlan_do_receive(&skb))
3311 goto another_round;
3312 else if (unlikely(!skb))
3313 goto unlock;
3314 }
3315
3316 rx_handler = rcu_dereference(skb->dev->rx_handler);
3317 if (rx_handler) {
3318 if (pt_prev) {
3319 ret = deliver_skb(skb, pt_prev, orig_dev);
3320 pt_prev = NULL;
3321 }
3322 switch (rx_handler(&skb)) {
3323 case RX_HANDLER_CONSUMED:
3324 goto unlock;
3325 case RX_HANDLER_ANOTHER:
3326 goto another_round;
3327 case RX_HANDLER_EXACT:
3328 deliver_exact = true;
3329 case RX_HANDLER_PASS:
3330 break;
3331 default:
3332 BUG();
3333 }
3334 }
3335
3336 if (vlan_tx_nonzero_tag_present(skb))
3337 skb->pkt_type = PACKET_OTHERHOST;
3338
3339
3340 null_or_dev = deliver_exact ? skb->dev : NULL;
3341
3342 type = skb->protocol;
3343 list_for_each_entry_rcu(ptype,
3344 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3345 if (ptype->type == type &&
3346 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3347 ptype->dev == orig_dev)) {
3348 if (pt_prev)
3349 ret = deliver_skb(skb, pt_prev, orig_dev);
3350 pt_prev = ptype;
3351 }
3352 }
3353
3354 if (pt_prev) {
3355 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3356 goto drop;
3357 else
3358 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3359 } else {
3360drop:
3361 atomic_long_inc(&skb->dev->rx_dropped);
3362 kfree_skb(skb);
3363
3364
3365
3366 ret = NET_RX_DROP;
3367 }
3368
3369unlock:
3370 rcu_read_unlock();
3371out:
3372 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3373 return ret;
3374}
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391int netif_receive_skb(struct sk_buff *skb)
3392{
3393 net_timestamp_check(netdev_tstamp_prequeue, skb);
3394
3395 if (skb_defer_rx_timestamp(skb))
3396 return NET_RX_SUCCESS;
3397
3398#ifdef CONFIG_RPS
3399 if (static_key_false(&rps_needed)) {
3400 struct rps_dev_flow voidflow, *rflow = &voidflow;
3401 int cpu, ret;
3402
3403 rcu_read_lock();
3404
3405 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3406
3407 if (cpu >= 0) {
3408 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3409 rcu_read_unlock();
3410 return ret;
3411 }
3412 rcu_read_unlock();
3413 }
3414#endif
3415 return __netif_receive_skb(skb);
3416}
3417EXPORT_SYMBOL(netif_receive_skb);
3418
3419
3420
3421
3422static void flush_backlog(void *arg)
3423{
3424 struct net_device *dev = arg;
3425 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3426 struct sk_buff *skb, *tmp;
3427
3428 rps_lock(sd);
3429 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3430 if (skb->dev == dev) {
3431 __skb_unlink(skb, &sd->input_pkt_queue);
3432 kfree_skb(skb);
3433 input_queue_head_incr(sd);
3434 }
3435 }
3436 rps_unlock(sd);
3437
3438 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3439 if (skb->dev == dev) {
3440 __skb_unlink(skb, &sd->process_queue);
3441 kfree_skb(skb);
3442 input_queue_head_incr(sd);
3443 }
3444 }
3445}
3446
3447static int napi_gro_complete(struct sk_buff *skb)
3448{
3449 struct packet_type *ptype;
3450 __be16 type = skb->protocol;
3451 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3452 int err = -ENOENT;
3453
3454 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3455
3456 if (NAPI_GRO_CB(skb)->count == 1) {
3457 skb_shinfo(skb)->gso_size = 0;
3458 goto out;
3459 }
3460
3461 rcu_read_lock();
3462 list_for_each_entry_rcu(ptype, head, list) {
3463 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3464 continue;
3465
3466 err = ptype->gro_complete(skb);
3467 break;
3468 }
3469 rcu_read_unlock();
3470
3471 if (err) {
3472 WARN_ON(&ptype->list == head);
3473 kfree_skb(skb);
3474 return NET_RX_SUCCESS;
3475 }
3476
3477out:
3478 return netif_receive_skb(skb);
3479}
3480
3481
3482
3483
3484
3485void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3486{
3487 struct sk_buff *skb, *prev = NULL;
3488
3489
3490 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3491 skb->prev = prev;
3492 prev = skb;
3493 }
3494
3495 for (skb = prev; skb; skb = prev) {
3496 skb->next = NULL;
3497
3498 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3499 return;
3500
3501 prev = skb->prev;
3502 napi_gro_complete(skb);
3503 napi->gro_count--;
3504 }
3505
3506 napi->gro_list = NULL;
3507}
3508EXPORT_SYMBOL(napi_gro_flush);
3509
3510enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3511{
3512 struct sk_buff **pp = NULL;
3513 struct packet_type *ptype;
3514 __be16 type = skb->protocol;
3515 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3516 int same_flow;
3517 int mac_len;
3518 enum gro_result ret;
3519
3520 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3521 goto normal;
3522
3523 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3524 goto normal;
3525
3526 rcu_read_lock();
3527 list_for_each_entry_rcu(ptype, head, list) {
3528 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3529 continue;
3530
3531 skb_set_network_header(skb, skb_gro_offset(skb));
3532 mac_len = skb->network_header - skb->mac_header;
3533 skb->mac_len = mac_len;
3534 NAPI_GRO_CB(skb)->same_flow = 0;
3535 NAPI_GRO_CB(skb)->flush = 0;
3536 NAPI_GRO_CB(skb)->free = 0;
3537
3538 pp = ptype->gro_receive(&napi->gro_list, skb);
3539 break;
3540 }
3541 rcu_read_unlock();
3542
3543 if (&ptype->list == head)
3544 goto normal;
3545
3546 same_flow = NAPI_GRO_CB(skb)->same_flow;
3547 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3548
3549 if (pp) {
3550 struct sk_buff *nskb = *pp;
3551
3552 *pp = nskb->next;
3553 nskb->next = NULL;
3554 napi_gro_complete(nskb);
3555 napi->gro_count--;
3556 }
3557
3558 if (same_flow)
3559 goto ok;
3560
3561 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3562 goto normal;
3563
3564 napi->gro_count++;
3565 NAPI_GRO_CB(skb)->count = 1;
3566 NAPI_GRO_CB(skb)->age = jiffies;
3567 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3568 skb->next = napi->gro_list;
3569 napi->gro_list = skb;
3570 ret = GRO_HELD;
3571
3572pull:
3573 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3574 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3575
3576 BUG_ON(skb->end - skb->tail < grow);
3577
3578 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3579
3580 skb->tail += grow;
3581 skb->data_len -= grow;
3582
3583 skb_shinfo(skb)->frags[0].page_offset += grow;
3584 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3585
3586 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3587 skb_frag_unref(skb, 0);
3588 memmove(skb_shinfo(skb)->frags,
3589 skb_shinfo(skb)->frags + 1,
3590 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3591 }
3592 }
3593
3594ok:
3595 return ret;
3596
3597normal:
3598 ret = GRO_NORMAL;
3599 goto pull;
3600}
3601EXPORT_SYMBOL(dev_gro_receive);
3602
3603static inline gro_result_t
3604__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3605{
3606 struct sk_buff *p;
3607 unsigned int maclen = skb->dev->hard_header_len;
3608
3609 for (p = napi->gro_list; p; p = p->next) {
3610 unsigned long diffs;
3611
3612 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3613 diffs |= p->vlan_tci ^ skb->vlan_tci;
3614 if (maclen == ETH_HLEN)
3615 diffs |= compare_ether_header(skb_mac_header(p),
3616 skb_gro_mac_header(skb));
3617 else if (!diffs)
3618 diffs = memcmp(skb_mac_header(p),
3619 skb_gro_mac_header(skb),
3620 maclen);
3621 NAPI_GRO_CB(p)->same_flow = !diffs;
3622 NAPI_GRO_CB(p)->flush = 0;
3623 }
3624
3625 return dev_gro_receive(napi, skb);
3626}
3627
3628gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3629{
3630 switch (ret) {
3631 case GRO_NORMAL:
3632 if (netif_receive_skb(skb))
3633 ret = GRO_DROP;
3634 break;
3635
3636 case GRO_DROP:
3637 kfree_skb(skb);
3638 break;
3639
3640 case GRO_MERGED_FREE:
3641 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3642 kmem_cache_free(skbuff_head_cache, skb);
3643 else
3644 __kfree_skb(skb);
3645 break;
3646
3647 case GRO_HELD:
3648 case GRO_MERGED:
3649 break;
3650 }
3651
3652 return ret;
3653}
3654EXPORT_SYMBOL(napi_skb_finish);
3655
3656static void skb_gro_reset_offset(struct sk_buff *skb)
3657{
3658 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3659 const skb_frag_t *frag0 = &pinfo->frags[0];
3660
3661 NAPI_GRO_CB(skb)->data_offset = 0;
3662 NAPI_GRO_CB(skb)->frag0 = NULL;
3663 NAPI_GRO_CB(skb)->frag0_len = 0;
3664
3665 if (skb->mac_header == skb->tail &&
3666 pinfo->nr_frags &&
3667 !PageHighMem(skb_frag_page(frag0))) {
3668 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3669 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3670 }
3671}
3672
3673gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3674{
3675 skb_gro_reset_offset(skb);
3676
3677 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3678}
3679EXPORT_SYMBOL(napi_gro_receive);
3680
3681static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3682{
3683 __skb_pull(skb, skb_headlen(skb));
3684
3685 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3686 skb->vlan_tci = 0;
3687 skb->dev = napi->dev;
3688 skb->skb_iif = 0;
3689
3690 napi->skb = skb;
3691}
3692
3693struct sk_buff *napi_get_frags(struct napi_struct *napi)
3694{
3695 struct sk_buff *skb = napi->skb;
3696
3697 if (!skb) {
3698 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3699 if (skb)
3700 napi->skb = skb;
3701 }
3702 return skb;
3703}
3704EXPORT_SYMBOL(napi_get_frags);
3705
3706gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3707 gro_result_t ret)
3708{
3709 switch (ret) {
3710 case GRO_NORMAL:
3711 case GRO_HELD:
3712 skb->protocol = eth_type_trans(skb, skb->dev);
3713
3714 if (ret == GRO_HELD)
3715 skb_gro_pull(skb, -ETH_HLEN);
3716 else if (netif_receive_skb(skb))
3717 ret = GRO_DROP;
3718 break;
3719
3720 case GRO_DROP:
3721 case GRO_MERGED_FREE:
3722 napi_reuse_skb(napi, skb);
3723 break;
3724
3725 case GRO_MERGED:
3726 break;
3727 }
3728
3729 return ret;
3730}
3731EXPORT_SYMBOL(napi_frags_finish);
3732
3733static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3734{
3735 struct sk_buff *skb = napi->skb;
3736 struct ethhdr *eth;
3737 unsigned int hlen;
3738 unsigned int off;
3739
3740 napi->skb = NULL;
3741
3742 skb_reset_mac_header(skb);
3743 skb_gro_reset_offset(skb);
3744
3745 off = skb_gro_offset(skb);
3746 hlen = off + sizeof(*eth);
3747 eth = skb_gro_header_fast(skb, off);
3748 if (skb_gro_header_hard(skb, hlen)) {
3749 eth = skb_gro_header_slow(skb, hlen, off);
3750 if (unlikely(!eth)) {
3751 napi_reuse_skb(napi, skb);
3752 skb = NULL;
3753 goto out;
3754 }
3755 }
3756
3757 skb_gro_pull(skb, sizeof(*eth));
3758
3759
3760
3761
3762
3763 skb->protocol = eth->h_proto;
3764
3765out:
3766 return skb;
3767}
3768
3769gro_result_t napi_gro_frags(struct napi_struct *napi)
3770{
3771 struct sk_buff *skb = napi_frags_skb(napi);
3772
3773 if (!skb)
3774 return GRO_DROP;
3775
3776 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3777}
3778EXPORT_SYMBOL(napi_gro_frags);
3779
3780
3781
3782
3783
3784static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3785{
3786#ifdef CONFIG_RPS
3787 struct softnet_data *remsd = sd->rps_ipi_list;
3788
3789 if (remsd) {
3790 sd->rps_ipi_list = NULL;
3791
3792 local_irq_enable();
3793
3794
3795 while (remsd) {
3796 struct softnet_data *next = remsd->rps_ipi_next;
3797
3798 if (cpu_online(remsd->cpu))
3799 __smp_call_function_single(remsd->cpu,
3800 &remsd->csd, 0);
3801 remsd = next;
3802 }
3803 } else
3804#endif
3805 local_irq_enable();
3806}
3807
3808static int process_backlog(struct napi_struct *napi, int quota)
3809{
3810 int work = 0;
3811 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3812
3813#ifdef CONFIG_RPS
3814
3815
3816
3817 if (sd->rps_ipi_list) {
3818 local_irq_disable();
3819 net_rps_action_and_irq_enable(sd);
3820 }
3821#endif
3822 napi->weight = weight_p;
3823 local_irq_disable();
3824 while (work < quota) {
3825 struct sk_buff *skb;
3826 unsigned int qlen;
3827
3828 while ((skb = __skb_dequeue(&sd->process_queue))) {
3829 local_irq_enable();
3830 __netif_receive_skb(skb);
3831 local_irq_disable();
3832 input_queue_head_incr(sd);
3833 if (++work >= quota) {
3834 local_irq_enable();
3835 return work;
3836 }
3837 }
3838
3839 rps_lock(sd);
3840 qlen = skb_queue_len(&sd->input_pkt_queue);
3841 if (qlen)
3842 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3843 &sd->process_queue);
3844
3845 if (qlen < quota - work) {
3846
3847
3848
3849
3850
3851
3852
3853 list_del(&napi->poll_list);
3854 napi->state = 0;
3855
3856 quota = work + qlen;
3857 }
3858 rps_unlock(sd);
3859 }
3860 local_irq_enable();
3861
3862 return work;
3863}
3864
3865
3866
3867
3868
3869
3870
3871void __napi_schedule(struct napi_struct *n)
3872{
3873 unsigned long flags;
3874
3875 local_irq_save(flags);
3876 ____napi_schedule(&__get_cpu_var(softnet_data), n);
3877 local_irq_restore(flags);
3878}
3879EXPORT_SYMBOL(__napi_schedule);
3880
3881void __napi_complete(struct napi_struct *n)
3882{
3883 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3884 BUG_ON(n->gro_list);
3885
3886 list_del(&n->poll_list);
3887 smp_mb__before_clear_bit();
3888 clear_bit(NAPI_STATE_SCHED, &n->state);
3889}
3890EXPORT_SYMBOL(__napi_complete);
3891
3892void napi_complete(struct napi_struct *n)
3893{
3894 unsigned long flags;
3895
3896
3897
3898
3899
3900 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3901 return;
3902
3903 napi_gro_flush(n, false);
3904 local_irq_save(flags);
3905 __napi_complete(n);
3906 local_irq_restore(flags);
3907}
3908EXPORT_SYMBOL(napi_complete);
3909
3910void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3911 int (*poll)(struct napi_struct *, int), int weight)
3912{
3913 INIT_LIST_HEAD(&napi->poll_list);
3914 napi->gro_count = 0;
3915 napi->gro_list = NULL;
3916 napi->skb = NULL;
3917 napi->poll = poll;
3918 napi->weight = weight;
3919 list_add(&napi->dev_list, &dev->napi_list);
3920 napi->dev = dev;
3921#ifdef CONFIG_NETPOLL
3922 spin_lock_init(&napi->poll_lock);
3923 napi->poll_owner = -1;
3924#endif
3925 set_bit(NAPI_STATE_SCHED, &napi->state);
3926}
3927EXPORT_SYMBOL(netif_napi_add);
3928
3929void netif_napi_del(struct napi_struct *napi)
3930{
3931 struct sk_buff *skb, *next;
3932
3933 list_del_init(&napi->dev_list);
3934 napi_free_frags(napi);
3935
3936 for (skb = napi->gro_list; skb; skb = next) {
3937 next = skb->next;
3938 skb->next = NULL;
3939 kfree_skb(skb);
3940 }
3941
3942 napi->gro_list = NULL;
3943 napi->gro_count = 0;
3944}
3945EXPORT_SYMBOL(netif_napi_del);
3946
3947static void net_rx_action(struct softirq_action *h)
3948{
3949 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3950 unsigned long time_limit = jiffies + 2;
3951 int budget = netdev_budget;
3952 void *have;
3953
3954 local_irq_disable();
3955
3956 while (!list_empty(&sd->poll_list)) {
3957 struct napi_struct *n;
3958 int work, weight;
3959
3960
3961
3962
3963
3964 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3965 goto softnet_break;
3966
3967 local_irq_enable();
3968
3969
3970
3971
3972
3973
3974 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3975
3976 have = netpoll_poll_lock(n);
3977
3978 weight = n->weight;
3979
3980
3981
3982
3983
3984
3985
3986 work = 0;
3987 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3988 work = n->poll(n, weight);
3989 trace_napi_poll(n);
3990 }
3991
3992 WARN_ON_ONCE(work > weight);
3993
3994 budget -= work;
3995
3996 local_irq_disable();
3997
3998
3999
4000
4001
4002
4003 if (unlikely(work == weight)) {
4004 if (unlikely(napi_disable_pending(n))) {
4005 local_irq_enable();
4006 napi_complete(n);
4007 local_irq_disable();
4008 } else {
4009 if (n->gro_list) {
4010
4011
4012
4013 local_irq_enable();
4014 napi_gro_flush(n, HZ >= 1000);
4015 local_irq_disable();
4016 }
4017 list_move_tail(&n->poll_list, &sd->poll_list);
4018 }
4019 }
4020
4021 netpoll_poll_unlock(have);
4022 }
4023out:
4024 net_rps_action_and_irq_enable(sd);
4025
4026#ifdef CONFIG_NET_DMA
4027
4028
4029
4030
4031 dma_issue_pending_all();
4032#endif
4033
4034 return;
4035
4036softnet_break:
4037 sd->time_squeeze++;
4038 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4039 goto out;
4040}
4041
4042static gifconf_func_t *gifconf_list[NPROTO];
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
4054{
4055 if (family >= NPROTO)
4056 return -EINVAL;
4057 gifconf_list[family] = gifconf;
4058 return 0;
4059}
4060EXPORT_SYMBOL(register_gifconf);
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074static int dev_ifname(struct net *net, struct ifreq __user *arg)
4075{
4076 struct net_device *dev;
4077 struct ifreq ifr;
4078
4079
4080
4081
4082
4083 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4084 return -EFAULT;
4085
4086 rcu_read_lock();
4087 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
4088 if (!dev) {
4089 rcu_read_unlock();
4090 return -ENODEV;
4091 }
4092
4093 strcpy(ifr.ifr_name, dev->name);
4094 rcu_read_unlock();
4095
4096 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4097 return -EFAULT;
4098 return 0;
4099}
4100
4101
4102
4103
4104
4105
4106
4107static int dev_ifconf(struct net *net, char __user *arg)
4108{
4109 struct ifconf ifc;
4110 struct net_device *dev;
4111 char __user *pos;
4112 int len;
4113 int total;
4114 int i;
4115
4116
4117
4118
4119
4120 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
4121 return -EFAULT;
4122
4123 pos = ifc.ifc_buf;
4124 len = ifc.ifc_len;
4125
4126
4127
4128
4129
4130 total = 0;
4131 for_each_netdev(net, dev) {
4132 for (i = 0; i < NPROTO; i++) {
4133 if (gifconf_list[i]) {
4134 int done;
4135 if (!pos)
4136 done = gifconf_list[i](dev, NULL, 0);
4137 else
4138 done = gifconf_list[i](dev, pos + total,
4139 len - total);
4140 if (done < 0)
4141 return -EFAULT;
4142 total += done;
4143 }
4144 }
4145 }
4146
4147
4148
4149
4150 ifc.ifc_len = total;
4151
4152
4153
4154
4155 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4156}
4157
4158#ifdef CONFIG_PROC_FS
4159
4160#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4161
4162#define get_bucket(x) ((x) >> BUCKET_SPACE)
4163#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4164#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4165
4166static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4167{
4168 struct net *net = seq_file_net(seq);
4169 struct net_device *dev;
4170 struct hlist_node *p;
4171 struct hlist_head *h;
4172 unsigned int count = 0, offset = get_offset(*pos);
4173
4174 h = &net->dev_name_head[get_bucket(*pos)];
4175 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4176 if (++count == offset)
4177 return dev;
4178 }
4179
4180 return NULL;
4181}
4182
4183static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
4184{
4185 struct net_device *dev;
4186 unsigned int bucket;
4187
4188 do {
4189 dev = dev_from_same_bucket(seq, pos);
4190 if (dev)
4191 return dev;
4192
4193 bucket = get_bucket(*pos) + 1;
4194 *pos = set_bucket_offset(bucket, 1);
4195 } while (bucket < NETDEV_HASHENTRIES);
4196
4197 return NULL;
4198}
4199
4200
4201
4202
4203
4204void *dev_seq_start(struct seq_file *seq, loff_t *pos)
4205 __acquires(RCU)
4206{
4207 rcu_read_lock();
4208 if (!*pos)
4209 return SEQ_START_TOKEN;
4210
4211 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4212 return NULL;
4213
4214 return dev_from_bucket(seq, pos);
4215}
4216
4217void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4218{
4219 ++*pos;
4220 return dev_from_bucket(seq, pos);
4221}
4222
4223void dev_seq_stop(struct seq_file *seq, void *v)
4224 __releases(RCU)
4225{
4226 rcu_read_unlock();
4227}
4228
4229static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4230{
4231 struct rtnl_link_stats64 temp;
4232 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4233
4234 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4235 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4236 dev->name, stats->rx_bytes, stats->rx_packets,
4237 stats->rx_errors,
4238 stats->rx_dropped + stats->rx_missed_errors,
4239 stats->rx_fifo_errors,
4240 stats->rx_length_errors + stats->rx_over_errors +
4241 stats->rx_crc_errors + stats->rx_frame_errors,
4242 stats->rx_compressed, stats->multicast,
4243 stats->tx_bytes, stats->tx_packets,
4244 stats->tx_errors, stats->tx_dropped,
4245 stats->tx_fifo_errors, stats->collisions,
4246 stats->tx_carrier_errors +
4247 stats->tx_aborted_errors +
4248 stats->tx_window_errors +
4249 stats->tx_heartbeat_errors,
4250 stats->tx_compressed);
4251}
4252
4253
4254
4255
4256
4257static int dev_seq_show(struct seq_file *seq, void *v)
4258{
4259 if (v == SEQ_START_TOKEN)
4260 seq_puts(seq, "Inter-| Receive "
4261 " | Transmit\n"
4262 " face |bytes packets errs drop fifo frame "
4263 "compressed multicast|bytes packets errs "
4264 "drop fifo colls carrier compressed\n");
4265 else
4266 dev_seq_printf_stats(seq, v);
4267 return 0;
4268}
4269
4270static struct softnet_data *softnet_get_online(loff_t *pos)
4271{
4272 struct softnet_data *sd = NULL;
4273
4274 while (*pos < nr_cpu_ids)
4275 if (cpu_online(*pos)) {
4276 sd = &per_cpu(softnet_data, *pos);
4277 break;
4278 } else
4279 ++*pos;
4280 return sd;
4281}
4282
4283static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4284{
4285 return softnet_get_online(pos);
4286}
4287
4288static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4289{
4290 ++*pos;
4291 return softnet_get_online(pos);
4292}
4293
4294static void softnet_seq_stop(struct seq_file *seq, void *v)
4295{
4296}
4297
4298static int softnet_seq_show(struct seq_file *seq, void *v)
4299{
4300 struct softnet_data *sd = v;
4301
4302 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4303 sd->processed, sd->dropped, sd->time_squeeze, 0,
4304 0, 0, 0, 0,
4305 sd->cpu_collision, sd->received_rps);
4306 return 0;
4307}
4308
4309static const struct seq_operations dev_seq_ops = {
4310 .start = dev_seq_start,
4311 .next = dev_seq_next,
4312 .stop = dev_seq_stop,
4313 .show = dev_seq_show,
4314};
4315
4316static int dev_seq_open(struct inode *inode, struct file *file)
4317{
4318 return seq_open_net(inode, file, &dev_seq_ops,
4319 sizeof(struct seq_net_private));
4320}
4321
4322static const struct file_operations dev_seq_fops = {
4323 .owner = THIS_MODULE,
4324 .open = dev_seq_open,
4325 .read = seq_read,
4326 .llseek = seq_lseek,
4327 .release = seq_release_net,
4328};
4329
4330static const struct seq_operations softnet_seq_ops = {
4331 .start = softnet_seq_start,
4332 .next = softnet_seq_next,
4333 .stop = softnet_seq_stop,
4334 .show = softnet_seq_show,
4335};
4336
4337static int softnet_seq_open(struct inode *inode, struct file *file)
4338{
4339 return seq_open(file, &softnet_seq_ops);
4340}
4341
4342static const struct file_operations softnet_seq_fops = {
4343 .owner = THIS_MODULE,
4344 .open = softnet_seq_open,
4345 .read = seq_read,
4346 .llseek = seq_lseek,
4347 .release = seq_release,
4348};
4349
4350static void *ptype_get_idx(loff_t pos)
4351{
4352 struct packet_type *pt = NULL;
4353 loff_t i = 0;
4354 int t;
4355
4356 list_for_each_entry_rcu(pt, &ptype_all, list) {
4357 if (i == pos)
4358 return pt;
4359 ++i;
4360 }
4361
4362 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
4363 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4364 if (i == pos)
4365 return pt;
4366 ++i;
4367 }
4368 }
4369 return NULL;
4370}
4371
4372static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
4373 __acquires(RCU)
4374{
4375 rcu_read_lock();
4376 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4377}
4378
4379static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4380{
4381 struct packet_type *pt;
4382 struct list_head *nxt;
4383 int hash;
4384
4385 ++*pos;
4386 if (v == SEQ_START_TOKEN)
4387 return ptype_get_idx(0);
4388
4389 pt = v;
4390 nxt = pt->list.next;
4391 if (pt->type == htons(ETH_P_ALL)) {
4392 if (nxt != &ptype_all)
4393 goto found;
4394 hash = 0;
4395 nxt = ptype_base[0].next;
4396 } else
4397 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4398
4399 while (nxt == &ptype_base[hash]) {
4400 if (++hash >= PTYPE_HASH_SIZE)
4401 return NULL;
4402 nxt = ptype_base[hash].next;
4403 }
4404found:
4405 return list_entry(nxt, struct packet_type, list);
4406}
4407
4408static void ptype_seq_stop(struct seq_file *seq, void *v)
4409 __releases(RCU)
4410{
4411 rcu_read_unlock();
4412}
4413
4414static int ptype_seq_show(struct seq_file *seq, void *v)
4415{
4416 struct packet_type *pt = v;
4417
4418 if (v == SEQ_START_TOKEN)
4419 seq_puts(seq, "Type Device Function\n");
4420 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4421 if (pt->type == htons(ETH_P_ALL))
4422 seq_puts(seq, "ALL ");
4423 else
4424 seq_printf(seq, "%04x", ntohs(pt->type));
4425
4426 seq_printf(seq, " %-8s %pF\n",
4427 pt->dev ? pt->dev->name : "", pt->func);
4428 }
4429
4430 return 0;
4431}
4432
4433static const struct seq_operations ptype_seq_ops = {
4434 .start = ptype_seq_start,
4435 .next = ptype_seq_next,
4436 .stop = ptype_seq_stop,
4437 .show = ptype_seq_show,
4438};
4439
4440static int ptype_seq_open(struct inode *inode, struct file *file)
4441{
4442 return seq_open_net(inode, file, &ptype_seq_ops,
4443 sizeof(struct seq_net_private));
4444}
4445
4446static const struct file_operations ptype_seq_fops = {
4447 .owner = THIS_MODULE,
4448 .open = ptype_seq_open,
4449 .read = seq_read,
4450 .llseek = seq_lseek,
4451 .release = seq_release_net,
4452};
4453
4454
4455static int __net_init dev_proc_net_init(struct net *net)
4456{
4457 int rc = -ENOMEM;
4458
4459 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4460 goto out;
4461 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4462 goto out_dev;
4463 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4464 goto out_softnet;
4465
4466 if (wext_proc_init(net))
4467 goto out_ptype;
4468 rc = 0;
4469out:
4470 return rc;
4471out_ptype:
4472 proc_net_remove(net, "ptype");
4473out_softnet:
4474 proc_net_remove(net, "softnet_stat");
4475out_dev:
4476 proc_net_remove(net, "dev");
4477 goto out;
4478}
4479
4480static void __net_exit dev_proc_net_exit(struct net *net)
4481{
4482 wext_proc_exit(net);
4483
4484 proc_net_remove(net, "ptype");
4485 proc_net_remove(net, "softnet_stat");
4486 proc_net_remove(net, "dev");
4487}
4488
4489static struct pernet_operations __net_initdata dev_proc_ops = {
4490 .init = dev_proc_net_init,
4491 .exit = dev_proc_net_exit,
4492};
4493
4494static int __init dev_proc_init(void)
4495{
4496 return register_pernet_subsys(&dev_proc_ops);
4497}
4498#else
4499#define dev_proc_init() 0
4500#endif
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513int netdev_set_master(struct net_device *slave, struct net_device *master)
4514{
4515 struct net_device *old = slave->master;
4516
4517 ASSERT_RTNL();
4518
4519 if (master) {
4520 if (old)
4521 return -EBUSY;
4522 dev_hold(master);
4523 }
4524
4525 slave->master = master;
4526
4527 if (old)
4528 dev_put(old);
4529 return 0;
4530}
4531EXPORT_SYMBOL(netdev_set_master);
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4544{
4545 int err;
4546
4547 ASSERT_RTNL();
4548
4549 err = netdev_set_master(slave, master);
4550 if (err)
4551 return err;
4552 if (master)
4553 slave->flags |= IFF_SLAVE;
4554 else
4555 slave->flags &= ~IFF_SLAVE;
4556
4557 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4558 return 0;
4559}
4560EXPORT_SYMBOL(netdev_set_bond_master);
4561
4562static void dev_change_rx_flags(struct net_device *dev, int flags)
4563{
4564 const struct net_device_ops *ops = dev->netdev_ops;
4565
4566 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4567 ops->ndo_change_rx_flags(dev, flags);
4568}
4569
4570static int __dev_set_promiscuity(struct net_device *dev, int inc)
4571{
4572 unsigned int old_flags = dev->flags;
4573 kuid_t uid;
4574 kgid_t gid;
4575
4576 ASSERT_RTNL();
4577
4578 dev->flags |= IFF_PROMISC;
4579 dev->promiscuity += inc;
4580 if (dev->promiscuity == 0) {
4581
4582
4583
4584
4585 if (inc < 0)
4586 dev->flags &= ~IFF_PROMISC;
4587 else {
4588 dev->promiscuity -= inc;
4589 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4590 dev->name);
4591 return -EOVERFLOW;
4592 }
4593 }
4594 if (dev->flags != old_flags) {
4595 pr_info("device %s %s promiscuous mode\n",
4596 dev->name,
4597 dev->flags & IFF_PROMISC ? "entered" : "left");
4598 if (audit_enabled) {
4599 current_uid_gid(&uid, &gid);
4600 audit_log(current->audit_context, GFP_ATOMIC,
4601 AUDIT_ANOM_PROMISCUOUS,
4602 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4603 dev->name, (dev->flags & IFF_PROMISC),
4604 (old_flags & IFF_PROMISC),
4605 from_kuid(&init_user_ns, audit_get_loginuid(current)),
4606 from_kuid(&init_user_ns, uid),
4607 from_kgid(&init_user_ns, gid),
4608 audit_get_sessionid(current));
4609 }
4610
4611 dev_change_rx_flags(dev, IFF_PROMISC);
4612 }
4613 return 0;
4614}
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627int dev_set_promiscuity(struct net_device *dev, int inc)
4628{
4629 unsigned int old_flags = dev->flags;
4630 int err;
4631
4632 err = __dev_set_promiscuity(dev, inc);
4633 if (err < 0)
4634 return err;
4635 if (dev->flags != old_flags)
4636 dev_set_rx_mode(dev);
4637 return err;
4638}
4639EXPORT_SYMBOL(dev_set_promiscuity);
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654int dev_set_allmulti(struct net_device *dev, int inc)
4655{
4656 unsigned int old_flags = dev->flags;
4657
4658 ASSERT_RTNL();
4659
4660 dev->flags |= IFF_ALLMULTI;
4661 dev->allmulti += inc;
4662 if (dev->allmulti == 0) {
4663
4664
4665
4666
4667 if (inc < 0)
4668 dev->flags &= ~IFF_ALLMULTI;
4669 else {
4670 dev->allmulti -= inc;
4671 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4672 dev->name);
4673 return -EOVERFLOW;
4674 }
4675 }
4676 if (dev->flags ^ old_flags) {
4677 dev_change_rx_flags(dev, IFF_ALLMULTI);
4678 dev_set_rx_mode(dev);
4679 }
4680 return 0;
4681}
4682EXPORT_SYMBOL(dev_set_allmulti);
4683
4684
4685
4686
4687
4688
4689
4690void __dev_set_rx_mode(struct net_device *dev)
4691{
4692 const struct net_device_ops *ops = dev->netdev_ops;
4693
4694
4695 if (!(dev->flags&IFF_UP))
4696 return;
4697
4698 if (!netif_device_present(dev))
4699 return;
4700
4701 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4702
4703
4704
4705 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4706 __dev_set_promiscuity(dev, 1);
4707 dev->uc_promisc = true;
4708 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4709 __dev_set_promiscuity(dev, -1);
4710 dev->uc_promisc = false;
4711 }
4712 }
4713
4714 if (ops->ndo_set_rx_mode)
4715 ops->ndo_set_rx_mode(dev);
4716}
4717
4718void dev_set_rx_mode(struct net_device *dev)
4719{
4720 netif_addr_lock_bh(dev);
4721 __dev_set_rx_mode(dev);
4722 netif_addr_unlock_bh(dev);
4723}
4724
4725
4726
4727
4728
4729
4730
4731unsigned int dev_get_flags(const struct net_device *dev)
4732{
4733 unsigned int flags;
4734
4735 flags = (dev->flags & ~(IFF_PROMISC |
4736 IFF_ALLMULTI |
4737 IFF_RUNNING |
4738 IFF_LOWER_UP |
4739 IFF_DORMANT)) |
4740 (dev->gflags & (IFF_PROMISC |
4741 IFF_ALLMULTI));
4742
4743 if (netif_running(dev)) {
4744 if (netif_oper_up(dev))
4745 flags |= IFF_RUNNING;
4746 if (netif_carrier_ok(dev))
4747 flags |= IFF_LOWER_UP;
4748 if (netif_dormant(dev))
4749 flags |= IFF_DORMANT;
4750 }
4751
4752 return flags;
4753}
4754EXPORT_SYMBOL(dev_get_flags);
4755
4756int __dev_change_flags(struct net_device *dev, unsigned int flags)
4757{
4758 unsigned int old_flags = dev->flags;
4759 int ret;
4760
4761 ASSERT_RTNL();
4762
4763
4764
4765
4766
4767 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4768 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4769 IFF_AUTOMEDIA)) |
4770 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4771 IFF_ALLMULTI));
4772
4773
4774
4775
4776
4777 if ((old_flags ^ flags) & IFF_MULTICAST)
4778 dev_change_rx_flags(dev, IFF_MULTICAST);
4779
4780 dev_set_rx_mode(dev);
4781
4782
4783
4784
4785
4786
4787
4788 ret = 0;
4789 if ((old_flags ^ flags) & IFF_UP) {
4790 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4791
4792 if (!ret)
4793 dev_set_rx_mode(dev);
4794 }
4795
4796 if ((flags ^ dev->gflags) & IFF_PROMISC) {
4797 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4798
4799 dev->gflags ^= IFF_PROMISC;
4800 dev_set_promiscuity(dev, inc);
4801 }
4802
4803
4804
4805
4806
4807 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4808 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4809
4810 dev->gflags ^= IFF_ALLMULTI;
4811 dev_set_allmulti(dev, inc);
4812 }
4813
4814 return ret;
4815}
4816
4817void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4818{
4819 unsigned int changes = dev->flags ^ old_flags;
4820
4821 if (changes & IFF_UP) {
4822 if (dev->flags & IFF_UP)
4823 call_netdevice_notifiers(NETDEV_UP, dev);
4824 else
4825 call_netdevice_notifiers(NETDEV_DOWN, dev);
4826 }
4827
4828 if (dev->flags & IFF_UP &&
4829 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4830 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4831}
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841int dev_change_flags(struct net_device *dev, unsigned int flags)
4842{
4843 int ret;
4844 unsigned int changes, old_flags = dev->flags;
4845
4846 ret = __dev_change_flags(dev, flags);
4847 if (ret < 0)
4848 return ret;
4849
4850 changes = old_flags ^ dev->flags;
4851 if (changes)
4852 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4853
4854 __dev_notify_flags(dev, old_flags);
4855 return ret;
4856}
4857EXPORT_SYMBOL(dev_change_flags);
4858
4859
4860
4861
4862
4863
4864
4865
4866int dev_set_mtu(struct net_device *dev, int new_mtu)
4867{
4868 const struct net_device_ops *ops = dev->netdev_ops;
4869 int err;
4870
4871 if (new_mtu == dev->mtu)
4872 return 0;
4873
4874
4875 if (new_mtu < 0)
4876 return -EINVAL;
4877
4878 if (!netif_device_present(dev))
4879 return -ENODEV;
4880
4881 err = 0;
4882 if (ops->ndo_change_mtu)
4883 err = ops->ndo_change_mtu(dev, new_mtu);
4884 else
4885 dev->mtu = new_mtu;
4886
4887 if (!err && dev->flags & IFF_UP)
4888 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4889 return err;
4890}
4891EXPORT_SYMBOL(dev_set_mtu);
4892
4893
4894
4895
4896
4897
4898void dev_set_group(struct net_device *dev, int new_group)
4899{
4900 dev->group = new_group;
4901}
4902EXPORT_SYMBOL(dev_set_group);
4903
4904
4905
4906
4907
4908
4909
4910
4911int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4912{
4913 const struct net_device_ops *ops = dev->netdev_ops;
4914 int err;
4915
4916 if (!ops->ndo_set_mac_address)
4917 return -EOPNOTSUPP;
4918 if (sa->sa_family != dev->type)
4919 return -EINVAL;
4920 if (!netif_device_present(dev))
4921 return -ENODEV;
4922 err = ops->ndo_set_mac_address(dev, sa);
4923 if (!err)
4924 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4925 add_device_randomness(dev->dev_addr, dev->addr_len);
4926 return err;
4927}
4928EXPORT_SYMBOL(dev_set_mac_address);
4929
4930
4931
4932
4933static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4934{
4935 int err;
4936 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4937
4938 if (!dev)
4939 return -ENODEV;
4940
4941 switch (cmd) {
4942 case SIOCGIFFLAGS:
4943 ifr->ifr_flags = (short) dev_get_flags(dev);
4944 return 0;
4945
4946 case SIOCGIFMETRIC:
4947
4948 ifr->ifr_metric = 0;
4949 return 0;
4950
4951 case SIOCGIFMTU:
4952 ifr->ifr_mtu = dev->mtu;
4953 return 0;
4954
4955 case SIOCGIFHWADDR:
4956 if (!dev->addr_len)
4957 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4958 else
4959 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4960 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4961 ifr->ifr_hwaddr.sa_family = dev->type;
4962 return 0;
4963
4964 case SIOCGIFSLAVE:
4965 err = -EINVAL;
4966 break;
4967
4968 case SIOCGIFMAP:
4969 ifr->ifr_map.mem_start = dev->mem_start;
4970 ifr->ifr_map.mem_end = dev->mem_end;
4971 ifr->ifr_map.base_addr = dev->base_addr;
4972 ifr->ifr_map.irq = dev->irq;
4973 ifr->ifr_map.dma = dev->dma;
4974 ifr->ifr_map.port = dev->if_port;
4975 return 0;
4976
4977 case SIOCGIFINDEX:
4978 ifr->ifr_ifindex = dev->ifindex;
4979 return 0;
4980
4981 case SIOCGIFTXQLEN:
4982 ifr->ifr_qlen = dev->tx_queue_len;
4983 return 0;
4984
4985 default:
4986
4987
4988
4989 WARN_ON(1);
4990 err = -ENOTTY;
4991 break;
4992
4993 }
4994 return err;
4995}
4996
4997
4998
4999
5000static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
5001{
5002 int err;
5003 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
5004 const struct net_device_ops *ops;
5005
5006 if (!dev)
5007 return -ENODEV;
5008
5009 ops = dev->netdev_ops;
5010
5011 switch (cmd) {
5012 case SIOCSIFFLAGS:
5013 return dev_change_flags(dev, ifr->ifr_flags);
5014
5015 case SIOCSIFMETRIC:
5016
5017 return -EOPNOTSUPP;
5018
5019 case SIOCSIFMTU:
5020 return dev_set_mtu(dev, ifr->ifr_mtu);
5021
5022 case SIOCSIFHWADDR:
5023 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
5024
5025 case SIOCSIFHWBROADCAST:
5026 if (ifr->ifr_hwaddr.sa_family != dev->type)
5027 return -EINVAL;
5028 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
5029 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5030 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5031 return 0;
5032
5033 case SIOCSIFMAP:
5034 if (ops->ndo_set_config) {
5035 if (!netif_device_present(dev))
5036 return -ENODEV;
5037 return ops->ndo_set_config(dev, &ifr->ifr_map);
5038 }
5039 return -EOPNOTSUPP;
5040
5041 case SIOCADDMULTI:
5042 if (!ops->ndo_set_rx_mode ||
5043 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5044 return -EINVAL;
5045 if (!netif_device_present(dev))
5046 return -ENODEV;
5047 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
5048
5049 case SIOCDELMULTI:
5050 if (!ops->ndo_set_rx_mode ||
5051 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5052 return -EINVAL;
5053 if (!netif_device_present(dev))
5054 return -ENODEV;
5055 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
5056
5057 case SIOCSIFTXQLEN:
5058 if (ifr->ifr_qlen < 0)
5059 return -EINVAL;
5060 dev->tx_queue_len = ifr->ifr_qlen;
5061 return 0;
5062
5063 case SIOCSIFNAME:
5064 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
5065 return dev_change_name(dev, ifr->ifr_newname);
5066
5067 case SIOCSHWTSTAMP:
5068 err = net_hwtstamp_validate(ifr);
5069 if (err)
5070 return err;
5071
5072
5073
5074
5075
5076 default:
5077 if ((cmd >= SIOCDEVPRIVATE &&
5078 cmd <= SIOCDEVPRIVATE + 15) ||
5079 cmd == SIOCBONDENSLAVE ||
5080 cmd == SIOCBONDRELEASE ||
5081 cmd == SIOCBONDSETHWADDR ||
5082 cmd == SIOCBONDSLAVEINFOQUERY ||
5083 cmd == SIOCBONDINFOQUERY ||
5084 cmd == SIOCBONDCHANGEACTIVE ||
5085 cmd == SIOCGMIIPHY ||
5086 cmd == SIOCGMIIREG ||
5087 cmd == SIOCSMIIREG ||
5088 cmd == SIOCBRADDIF ||
5089 cmd == SIOCBRDELIF ||
5090 cmd == SIOCSHWTSTAMP ||
5091 cmd == SIOCWANDEV) {
5092 err = -EOPNOTSUPP;
5093 if (ops->ndo_do_ioctl) {
5094 if (netif_device_present(dev))
5095 err = ops->ndo_do_ioctl(dev, ifr, cmd);
5096 else
5097 err = -ENODEV;
5098 }
5099 } else
5100 err = -EINVAL;
5101
5102 }
5103 return err;
5104}
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5124{
5125 struct ifreq ifr;
5126 int ret;
5127 char *colon;
5128
5129
5130
5131
5132
5133
5134 if (cmd == SIOCGIFCONF) {
5135 rtnl_lock();
5136 ret = dev_ifconf(net, (char __user *) arg);
5137 rtnl_unlock();
5138 return ret;
5139 }
5140 if (cmd == SIOCGIFNAME)
5141 return dev_ifname(net, (struct ifreq __user *)arg);
5142
5143 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5144 return -EFAULT;
5145
5146 ifr.ifr_name[IFNAMSIZ-1] = 0;
5147
5148 colon = strchr(ifr.ifr_name, ':');
5149 if (colon)
5150 *colon = 0;
5151
5152
5153
5154
5155
5156 switch (cmd) {
5157
5158
5159
5160
5161
5162
5163 case SIOCGIFFLAGS:
5164 case SIOCGIFMETRIC:
5165 case SIOCGIFMTU:
5166 case SIOCGIFHWADDR:
5167 case SIOCGIFSLAVE:
5168 case SIOCGIFMAP:
5169 case SIOCGIFINDEX:
5170 case SIOCGIFTXQLEN:
5171 dev_load(net, ifr.ifr_name);
5172 rcu_read_lock();
5173 ret = dev_ifsioc_locked(net, &ifr, cmd);
5174 rcu_read_unlock();
5175 if (!ret) {
5176 if (colon)
5177 *colon = ':';
5178 if (copy_to_user(arg, &ifr,
5179 sizeof(struct ifreq)))
5180 ret = -EFAULT;
5181 }
5182 return ret;
5183
5184 case SIOCETHTOOL:
5185 dev_load(net, ifr.ifr_name);
5186 rtnl_lock();
5187 ret = dev_ethtool(net, &ifr);
5188 rtnl_unlock();
5189 if (!ret) {
5190 if (colon)
5191 *colon = ':';
5192 if (copy_to_user(arg, &ifr,
5193 sizeof(struct ifreq)))
5194 ret = -EFAULT;
5195 }
5196 return ret;
5197
5198
5199
5200
5201
5202
5203
5204 case SIOCGMIIPHY:
5205 case SIOCGMIIREG:
5206 case SIOCSIFNAME:
5207 if (!capable(CAP_NET_ADMIN))
5208 return -EPERM;
5209 dev_load(net, ifr.ifr_name);
5210 rtnl_lock();
5211 ret = dev_ifsioc(net, &ifr, cmd);
5212 rtnl_unlock();
5213 if (!ret) {
5214 if (colon)
5215 *colon = ':';
5216 if (copy_to_user(arg, &ifr,
5217 sizeof(struct ifreq)))
5218 ret = -EFAULT;
5219 }
5220 return ret;
5221
5222
5223
5224
5225
5226
5227
5228 case SIOCSIFFLAGS:
5229 case SIOCSIFMETRIC:
5230 case SIOCSIFMTU:
5231 case SIOCSIFMAP:
5232 case SIOCSIFHWADDR:
5233 case SIOCSIFSLAVE:
5234 case SIOCADDMULTI:
5235 case SIOCDELMULTI:
5236 case SIOCSIFHWBROADCAST:
5237 case SIOCSIFTXQLEN:
5238 case SIOCSMIIREG:
5239 case SIOCBONDENSLAVE:
5240 case SIOCBONDRELEASE:
5241 case SIOCBONDSETHWADDR:
5242 case SIOCBONDCHANGEACTIVE:
5243 case SIOCBRADDIF:
5244 case SIOCBRDELIF:
5245 case SIOCSHWTSTAMP:
5246 if (!capable(CAP_NET_ADMIN))
5247 return -EPERM;
5248
5249 case SIOCBONDSLAVEINFOQUERY:
5250 case SIOCBONDINFOQUERY:
5251 dev_load(net, ifr.ifr_name);
5252 rtnl_lock();
5253 ret = dev_ifsioc(net, &ifr, cmd);
5254 rtnl_unlock();
5255 return ret;
5256
5257 case SIOCGIFMEM:
5258
5259
5260 case SIOCSIFMEM:
5261
5262
5263 case SIOCSIFLINK:
5264 return -ENOTTY;
5265
5266
5267
5268
5269 default:
5270 if (cmd == SIOCWANDEV ||
5271 (cmd >= SIOCDEVPRIVATE &&
5272 cmd <= SIOCDEVPRIVATE + 15)) {
5273 dev_load(net, ifr.ifr_name);
5274 rtnl_lock();
5275 ret = dev_ifsioc(net, &ifr, cmd);
5276 rtnl_unlock();
5277 if (!ret && copy_to_user(arg, &ifr,
5278 sizeof(struct ifreq)))
5279 ret = -EFAULT;
5280 return ret;
5281 }
5282
5283 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5284 return wext_handle_ioctl(net, &ifr, cmd, arg);
5285 return -ENOTTY;
5286 }
5287}
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298static int dev_new_index(struct net *net)
5299{
5300 int ifindex = net->ifindex;
5301 for (;;) {
5302 if (++ifindex <= 0)
5303 ifindex = 1;
5304 if (!__dev_get_by_index(net, ifindex))
5305 return net->ifindex = ifindex;
5306 }
5307}
5308
5309
5310static LIST_HEAD(net_todo_list);
5311
5312static void net_set_todo(struct net_device *dev)
5313{
5314 list_add_tail(&dev->todo_list, &net_todo_list);
5315}
5316
5317static void rollback_registered_many(struct list_head *head)
5318{
5319 struct net_device *dev, *tmp;
5320
5321 BUG_ON(dev_boot_phase);
5322 ASSERT_RTNL();
5323
5324 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5325
5326
5327
5328
5329 if (dev->reg_state == NETREG_UNINITIALIZED) {
5330 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5331 dev->name, dev);
5332
5333 WARN_ON(1);
5334 list_del(&dev->unreg_list);
5335 continue;
5336 }
5337 dev->dismantle = true;
5338 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5339 }
5340
5341
5342 dev_close_many(head);
5343
5344 list_for_each_entry(dev, head, unreg_list) {
5345
5346 unlist_netdevice(dev);
5347
5348 dev->reg_state = NETREG_UNREGISTERING;
5349 }
5350
5351 synchronize_net();
5352
5353 list_for_each_entry(dev, head, unreg_list) {
5354
5355 dev_shutdown(dev);
5356
5357
5358
5359
5360
5361 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5362
5363 if (!dev->rtnl_link_ops ||
5364 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5365 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5366
5367
5368
5369
5370 dev_uc_flush(dev);
5371 dev_mc_flush(dev);
5372
5373 if (dev->netdev_ops->ndo_uninit)
5374 dev->netdev_ops->ndo_uninit(dev);
5375
5376
5377 WARN_ON(dev->master);
5378
5379
5380 netdev_unregister_kobject(dev);
5381 }
5382
5383 synchronize_net();
5384
5385 list_for_each_entry(dev, head, unreg_list)
5386 dev_put(dev);
5387}
5388
5389static void rollback_registered(struct net_device *dev)
5390{
5391 LIST_HEAD(single);
5392
5393 list_add(&dev->unreg_list, &single);
5394 rollback_registered_many(&single);
5395 list_del(&single);
5396}
5397
5398static netdev_features_t netdev_fix_features(struct net_device *dev,
5399 netdev_features_t features)
5400{
5401
5402 if ((features & NETIF_F_HW_CSUM) &&
5403 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5404 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5405 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5406 }
5407
5408
5409 if ((features & NETIF_F_SG) &&
5410 !(features & NETIF_F_ALL_CSUM)) {
5411 netdev_dbg(dev,
5412 "Dropping NETIF_F_SG since no checksum feature.\n");
5413 features &= ~NETIF_F_SG;
5414 }
5415
5416
5417 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5418 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5419 features &= ~NETIF_F_ALL_TSO;
5420 }
5421
5422
5423 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5424 features &= ~NETIF_F_TSO_ECN;
5425
5426
5427 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5428 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5429 features &= ~NETIF_F_GSO;
5430 }
5431
5432
5433 if (features & NETIF_F_UFO) {
5434
5435 if (!((features & NETIF_F_GEN_CSUM) ||
5436 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5437 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5438 netdev_dbg(dev,
5439 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5440 features &= ~NETIF_F_UFO;
5441 }
5442
5443 if (!(features & NETIF_F_SG)) {
5444 netdev_dbg(dev,
5445 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5446 features &= ~NETIF_F_UFO;
5447 }
5448 }
5449
5450 return features;
5451}
5452
5453int __netdev_update_features(struct net_device *dev)
5454{
5455 netdev_features_t features;
5456 int err = 0;
5457
5458 ASSERT_RTNL();
5459
5460 features = netdev_get_wanted_features(dev);
5461
5462 if (dev->netdev_ops->ndo_fix_features)
5463 features = dev->netdev_ops->ndo_fix_features(dev, features);
5464
5465
5466 features = netdev_fix_features(dev, features);
5467
5468 if (dev->features == features)
5469 return 0;
5470
5471 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5472 &dev->features, &features);
5473
5474 if (dev->netdev_ops->ndo_set_features)
5475 err = dev->netdev_ops->ndo_set_features(dev, features);
5476
5477 if (unlikely(err < 0)) {
5478 netdev_err(dev,
5479 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5480 err, &features, &dev->features);
5481 return -1;
5482 }
5483
5484 if (!err)
5485 dev->features = features;
5486
5487 return 1;
5488}
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498void netdev_update_features(struct net_device *dev)
5499{
5500 if (__netdev_update_features(dev))
5501 netdev_features_change(dev);
5502}
5503EXPORT_SYMBOL(netdev_update_features);
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515void netdev_change_features(struct net_device *dev)
5516{
5517 __netdev_update_features(dev);
5518 netdev_features_change(dev);
5519}
5520EXPORT_SYMBOL(netdev_change_features);
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5532 struct net_device *dev)
5533{
5534 if (rootdev->operstate == IF_OPER_DORMANT)
5535 netif_dormant_on(dev);
5536 else
5537 netif_dormant_off(dev);
5538
5539 if (netif_carrier_ok(rootdev)) {
5540 if (!netif_carrier_ok(dev))
5541 netif_carrier_on(dev);
5542 } else {
5543 if (netif_carrier_ok(dev))
5544 netif_carrier_off(dev);
5545 }
5546}
5547EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5548
5549#ifdef CONFIG_RPS
5550static int netif_alloc_rx_queues(struct net_device *dev)
5551{
5552 unsigned int i, count = dev->num_rx_queues;
5553 struct netdev_rx_queue *rx;
5554
5555 BUG_ON(count < 1);
5556
5557 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5558 if (!rx) {
5559 pr_err("netdev: Unable to allocate %u rx queues\n", count);
5560 return -ENOMEM;
5561 }
5562 dev->_rx = rx;
5563
5564 for (i = 0; i < count; i++)
5565 rx[i].dev = dev;
5566 return 0;
5567}
5568#endif
5569
5570static void netdev_init_one_queue(struct net_device *dev,
5571 struct netdev_queue *queue, void *_unused)
5572{
5573
5574 spin_lock_init(&queue->_xmit_lock);
5575 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5576 queue->xmit_lock_owner = -1;
5577 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5578 queue->dev = dev;
5579#ifdef CONFIG_BQL
5580 dql_init(&queue->dql, HZ);
5581#endif
5582}
5583
5584static int netif_alloc_netdev_queues(struct net_device *dev)
5585{
5586 unsigned int count = dev->num_tx_queues;
5587 struct netdev_queue *tx;
5588
5589 BUG_ON(count < 1);
5590
5591 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5592 if (!tx) {
5593 pr_err("netdev: Unable to allocate %u tx queues\n", count);
5594 return -ENOMEM;
5595 }
5596 dev->_tx = tx;
5597
5598 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5599 spin_lock_init(&dev->tx_global_lock);
5600
5601 return 0;
5602}
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621int register_netdevice(struct net_device *dev)
5622{
5623 int ret;
5624 struct net *net = dev_net(dev);
5625
5626 BUG_ON(dev_boot_phase);
5627 ASSERT_RTNL();
5628
5629 might_sleep();
5630
5631
5632 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5633 BUG_ON(!net);
5634
5635 spin_lock_init(&dev->addr_list_lock);
5636 netdev_set_addr_lockdep_class(dev);
5637
5638 dev->iflink = -1;
5639
5640 ret = dev_get_valid_name(net, dev, dev->name);
5641 if (ret < 0)
5642 goto out;
5643
5644
5645 if (dev->netdev_ops->ndo_init) {
5646 ret = dev->netdev_ops->ndo_init(dev);
5647 if (ret) {
5648 if (ret > 0)
5649 ret = -EIO;
5650 goto out;
5651 }
5652 }
5653
5654 ret = -EBUSY;
5655 if (!dev->ifindex)
5656 dev->ifindex = dev_new_index(net);
5657 else if (__dev_get_by_index(net, dev->ifindex))
5658 goto err_uninit;
5659
5660 if (dev->iflink == -1)
5661 dev->iflink = dev->ifindex;
5662
5663
5664
5665
5666 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5667 dev->features |= NETIF_F_SOFT_FEATURES;
5668 dev->wanted_features = dev->features & dev->hw_features;
5669
5670
5671 if (!(dev->flags & IFF_LOOPBACK)) {
5672 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5673 if (dev->features & NETIF_F_ALL_CSUM) {
5674 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5675 dev->features |= NETIF_F_NOCACHE_COPY;
5676 }
5677 }
5678
5679
5680
5681 dev->vlan_features |= NETIF_F_HIGHDMA;
5682
5683 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5684 ret = notifier_to_errno(ret);
5685 if (ret)
5686 goto err_uninit;
5687
5688 ret = netdev_register_kobject(dev);
5689 if (ret)
5690 goto err_uninit;
5691 dev->reg_state = NETREG_REGISTERED;
5692
5693 __netdev_update_features(dev);
5694
5695
5696
5697
5698
5699
5700 set_bit(__LINK_STATE_PRESENT, &dev->state);
5701
5702 linkwatch_init_dev(dev);
5703
5704 dev_init_scheduler(dev);
5705 dev_hold(dev);
5706 list_netdevice(dev);
5707 add_device_randomness(dev->dev_addr, dev->addr_len);
5708
5709
5710 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5711 ret = notifier_to_errno(ret);
5712 if (ret) {
5713 rollback_registered(dev);
5714 dev->reg_state = NETREG_UNREGISTERED;
5715 }
5716
5717
5718
5719
5720 if (!dev->rtnl_link_ops ||
5721 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5722 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5723
5724out:
5725 return ret;
5726
5727err_uninit:
5728 if (dev->netdev_ops->ndo_uninit)
5729 dev->netdev_ops->ndo_uninit(dev);
5730 goto out;
5731}
5732EXPORT_SYMBOL(register_netdevice);
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744int init_dummy_netdev(struct net_device *dev)
5745{
5746
5747
5748
5749
5750
5751 memset(dev, 0, sizeof(struct net_device));
5752
5753
5754
5755
5756 dev->reg_state = NETREG_DUMMY;
5757
5758
5759 INIT_LIST_HEAD(&dev->napi_list);
5760
5761
5762 set_bit(__LINK_STATE_PRESENT, &dev->state);
5763 set_bit(__LINK_STATE_START, &dev->state);
5764
5765
5766
5767
5768
5769
5770 return 0;
5771}
5772EXPORT_SYMBOL_GPL(init_dummy_netdev);
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788int register_netdev(struct net_device *dev)
5789{
5790 int err;
5791
5792 rtnl_lock();
5793 err = register_netdevice(dev);
5794 rtnl_unlock();
5795 return err;
5796}
5797EXPORT_SYMBOL(register_netdev);
5798
5799int netdev_refcnt_read(const struct net_device *dev)
5800{
5801 int i, refcnt = 0;
5802
5803 for_each_possible_cpu(i)
5804 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5805 return refcnt;
5806}
5807EXPORT_SYMBOL(netdev_refcnt_read);
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821static void netdev_wait_allrefs(struct net_device *dev)
5822{
5823 unsigned long rebroadcast_time, warning_time;
5824 int refcnt;
5825
5826 linkwatch_forget_dev(dev);
5827
5828 rebroadcast_time = warning_time = jiffies;
5829 refcnt = netdev_refcnt_read(dev);
5830
5831 while (refcnt != 0) {
5832 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5833 rtnl_lock();
5834
5835
5836 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5837
5838 __rtnl_unlock();
5839 rcu_barrier();
5840 rtnl_lock();
5841
5842 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5843 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5844 &dev->state)) {
5845
5846
5847
5848
5849
5850
5851 linkwatch_run_queue();
5852 }
5853
5854 __rtnl_unlock();
5855
5856 rebroadcast_time = jiffies;
5857 }
5858
5859 msleep(250);
5860
5861 refcnt = netdev_refcnt_read(dev);
5862
5863 if (time_after(jiffies, warning_time + 10 * HZ)) {
5864 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5865 dev->name, refcnt);
5866 warning_time = jiffies;
5867 }
5868 }
5869}
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895void netdev_run_todo(void)
5896{
5897 struct list_head list;
5898
5899
5900 list_replace_init(&net_todo_list, &list);
5901
5902 __rtnl_unlock();
5903
5904
5905
5906 if (!list_empty(&list))
5907 rcu_barrier();
5908
5909 while (!list_empty(&list)) {
5910 struct