1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
24#include <linux/capability.h>
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
34#include <linux/mutex.h>
35
36#include <net/net_namespace.h>
37#include <net/ip.h>
38#ifdef CONFIG_IP_VS_IPV6
39#include <net/ipv6.h>
40#include <net/ip6_route.h>
41#endif
42#include <net/route.h>
43#include <net/sock.h>
44#include <net/genetlink.h>
45
46#include <asm/uaccess.h>
47
48#include <net/ip_vs.h>
49
50
51static DEFINE_MUTEX(__ip_vs_mutex);
52
53
54static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56
57static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59
60static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62
63static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65
66static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68
69int ip_vs_drop_rate = 0;
70int ip_vs_drop_counter = 0;
71static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73
74static int ip_vs_num_services = 0;
75
76
77static int sysctl_ip_vs_drop_entry = 0;
78static int sysctl_ip_vs_drop_packet = 0;
79static int sysctl_ip_vs_secure_tcp = 0;
80static int sysctl_ip_vs_amemthresh = 1024;
81static int sysctl_ip_vs_am_droprate = 10;
82int sysctl_ip_vs_cache_bypass = 0;
83int sysctl_ip_vs_expire_nodest_conn = 0;
84int sysctl_ip_vs_expire_quiescent_template = 0;
85int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89#ifdef CONFIG_IP_VS_DEBUG
90static int sysctl_ip_vs_debug_level = 0;
91
92int ip_vs_get_debug_level(void)
93{
94 return sysctl_ip_vs_debug_level;
95}
96#endif
97
98#ifdef CONFIG_IP_VS_IPV6
99
100static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101{
102 struct rt6_info *rt;
103 struct flowi fl = {
104 .oif = 0,
105 .nl_u = {
106 .ip6_u = {
107 .daddr = *addr,
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109 };
110
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113 return 1;
114
115 return 0;
116}
117#endif
118
119
120
121
122static void update_defense_level(void)
123{
124 struct sysinfo i;
125 static int old_secure_tcp = 0;
126 int availmem;
127 int nomem;
128 int to_change = -1;
129
130
131 si_meminfo(&i);
132 availmem = i.freeram + i.bufferram;
133
134
135
136
137
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
139
140 local_bh_disable();
141
142
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
145 case 0:
146 atomic_set(&ip_vs_dropentry, 0);
147 break;
148 case 1:
149 if (nomem) {
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
152 } else {
153 atomic_set(&ip_vs_dropentry, 0);
154 }
155 break;
156 case 2:
157 if (nomem) {
158 atomic_set(&ip_vs_dropentry, 1);
159 } else {
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
162 };
163 break;
164 case 3:
165 atomic_set(&ip_vs_dropentry, 1);
166 break;
167 }
168 spin_unlock(&__ip_vs_dropentry_lock);
169
170
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
173 case 0:
174 ip_vs_drop_rate = 0;
175 break;
176 case 1:
177 if (nomem) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
182 } else {
183 ip_vs_drop_rate = 0;
184 }
185 break;
186 case 2:
187 if (nomem) {
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
191 } else {
192 ip_vs_drop_rate = 0;
193 sysctl_ip_vs_drop_packet = 1;
194 }
195 break;
196 case 3:
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198 break;
199 }
200 spin_unlock(&__ip_vs_droppacket_lock);
201
202
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
205 case 0:
206 if (old_secure_tcp >= 2)
207 to_change = 0;
208 break;
209 case 1:
210 if (nomem) {
211 if (old_secure_tcp < 2)
212 to_change = 1;
213 sysctl_ip_vs_secure_tcp = 2;
214 } else {
215 if (old_secure_tcp >= 2)
216 to_change = 0;
217 }
218 break;
219 case 2:
220 if (nomem) {
221 if (old_secure_tcp < 2)
222 to_change = 1;
223 } else {
224 if (old_secure_tcp >= 2)
225 to_change = 0;
226 sysctl_ip_vs_secure_tcp = 1;
227 }
228 break;
229 case 3:
230 if (old_secure_tcp < 2)
231 to_change = 1;
232 break;
233 }
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
235 if (to_change >= 0)
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
238
239 local_bh_enable();
240}
241
242
243
244
245
246#define DEFENSE_TIMER_PERIOD 1*HZ
247static void defense_work_handler(struct work_struct *work);
248static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
249
250static void defense_work_handler(struct work_struct *work)
251{
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
255
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257}
258
259int
260ip_vs_use_count_inc(void)
261{
262 return try_module_get(THIS_MODULE);
263}
264
265void
266ip_vs_use_count_dec(void)
267{
268 module_put(THIS_MODULE);
269}
270
271
272
273
274
275#define IP_VS_SVC_TAB_BITS 8
276#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279
280static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281
282static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284
285
286
287#define IP_VS_RTAB_BITS 4
288#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293
294
295
296static LIST_HEAD(ip_vs_dest_trash);
297
298
299
300
301static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305
306
307
308static __inline__ unsigned
309ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310 __be16 port)
311{
312 register unsigned porth = ntohs(port);
313 __be32 addr_fold = addr->ip;
314
315#ifdef CONFIG_IP_VS_IPV6
316 if (af == AF_INET6)
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
319#endif
320
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
322 & IP_VS_SVC_TAB_MASK;
323}
324
325
326
327
328static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329{
330 return fwmark & IP_VS_SVC_TAB_MASK;
331}
332
333
334
335
336
337
338static int ip_vs_svc_hash(struct ip_vs_service *svc)
339{
340 unsigned hash;
341
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
345 return 0;
346 }
347
348 if (svc->fwmark == 0) {
349
350
351
352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
353 svc->port);
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355 } else {
356
357
358
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361 }
362
363 svc->flags |= IP_VS_SVC_F_HASHED;
364
365 atomic_inc(&svc->refcnt);
366 return 1;
367}
368
369
370
371
372
373
374static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375{
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
379 return 0;
380 }
381
382 if (svc->fwmark == 0) {
383
384 list_del(&svc->s_list);
385 } else {
386
387 list_del(&svc->f_list);
388 }
389
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
392 return 1;
393}
394
395
396
397
398
399static inline struct ip_vs_service *
400__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401 __be16 vport)
402{
403 unsigned hash;
404 struct ip_vs_service *svc;
405
406
407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
408
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
410 if ((svc->af == af)
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
414
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423
424
425
426
427static inline struct ip_vs_service *
428__ip_vs_svc_fwm_get(int af, __u32 fwmark)
429{
430 unsigned hash;
431 struct ip_vs_service *svc;
432
433
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
437 if (svc->fwmark == fwmark && svc->af == af) {
438
439 atomic_inc(&svc->usecnt);
440 return svc;
441 }
442 }
443
444 return NULL;
445}
446
447struct ip_vs_service *
448ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
450{
451 struct ip_vs_service *svc;
452
453 read_lock(&__ip_vs_svc_lock);
454
455
456
457
458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
459 goto out;
460
461
462
463
464
465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
466
467 if (svc == NULL
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471
472
473
474
475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
476 }
477
478 if (svc == NULL
479 && atomic_read(&ip_vs_nullsvc_counter)) {
480
481
482
483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
484 }
485
486 out:
487 read_unlock(&__ip_vs_svc_lock);
488
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
493
494 return svc;
495}
496
497
498static inline void
499__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500{
501 atomic_inc(&svc->refcnt);
502 dest->svc = svc;
503}
504
505static inline void
506__ip_vs_unbind_svc(struct ip_vs_dest *dest)
507{
508 struct ip_vs_service *svc = dest->svc;
509
510 dest->svc = NULL;
511 if (atomic_dec_and_test(&svc->refcnt))
512 kfree(svc);
513}
514
515
516
517
518
519static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
521 __be16 port)
522{
523 register unsigned porth = ntohs(port);
524 __be32 addr_fold = addr->ip;
525
526#ifdef CONFIG_IP_VS_IPV6
527 if (af == AF_INET6)
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
530#endif
531
532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
533 & IP_VS_RTAB_MASK;
534}
535
536
537
538
539
540static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541{
542 unsigned hash;
543
544 if (!list_empty(&dest->d_list)) {
545 return 0;
546 }
547
548
549
550
551
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556 return 1;
557}
558
559
560
561
562
563static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564{
565
566
567
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
571 }
572
573 return 1;
574}
575
576
577
578
579struct ip_vs_dest *
580ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
582 __be16 dport)
583{
584 unsigned hash;
585 struct ip_vs_dest *dest;
586
587
588
589
590
591 hash = ip_vs_rs_hashkey(af, daddr, dport);
592
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
595 if ((dest->af == af)
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
599 dest->vfwmark)) {
600
601 read_unlock(&__ip_vs_rs_lock);
602 return dest;
603 }
604 }
605 read_unlock(&__ip_vs_rs_lock);
606
607 return NULL;
608}
609
610
611
612
613static struct ip_vs_dest *
614ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615 __be16 dport)
616{
617 struct ip_vs_dest *dest;
618
619
620
621
622 list_for_each_entry(dest, &svc->destinations, n_list) {
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
626
627 return dest;
628 }
629 }
630
631 return NULL;
632}
633
634
635
636
637
638
639
640
641
642
643
644struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645 __be16 dport,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
648{
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
651
652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
653 if (!svc)
654 return NULL;
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
656 if (dest)
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
659 return dest;
660}
661
662
663
664
665
666
667
668
669
670
671
672static struct ip_vs_dest *
673ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674 __be16 dport)
675{
676 struct ip_vs_dest *dest, *nxt;
677
678
679
680
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683 "dest->refcnt=%d\n",
684 dest->vfwmark,
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
686 ntohs(dest->port),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
693 (svc->fwmark ||
694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
695 dest->vport == svc->port))) {
696
697 return dest;
698 }
699
700
701
702
703 if (atomic_read(&dest->refcnt) == 1) {
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705 "from trash\n",
706 dest->vfwmark,
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
708 ntohs(dest->port));
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
712 kfree(dest);
713 }
714 }
715
716 return NULL;
717}
718
719
720
721
722
723
724
725
726
727
728
729static void ip_vs_trash_cleanup(void)
730{
731 struct ip_vs_dest *dest, *nxt;
732
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
737 kfree(dest);
738 }
739}
740
741
742static void
743ip_vs_zero_stats(struct ip_vs_stats *stats)
744{
745 spin_lock_bh(&stats->lock);
746
747 memset(&stats->ustats, 0, sizeof(stats->ustats));
748 ip_vs_zero_estimator(stats);
749
750 spin_unlock_bh(&stats->lock);
751}
752
753
754
755
756static void
757__ip_vs_update_dest(struct ip_vs_service *svc,
758 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
759{
760 int conn_flags;
761
762
763 atomic_set(&dest->weight, udest->weight);
764 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
765
766
767#ifdef CONFIG_IP_VS_IPV6
768 if (svc->af == AF_INET6) {
769 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
770 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
771 | IP_VS_CONN_F_LOCALNODE;
772 }
773 } else
774#endif
775 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
776 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
777 | IP_VS_CONN_F_LOCALNODE;
778 }
779
780
781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
783 } else {
784
785
786
787
788 write_lock_bh(&__ip_vs_rs_lock);
789 ip_vs_rs_hash(dest);
790 write_unlock_bh(&__ip_vs_rs_lock);
791 }
792 atomic_set(&dest->conn_flags, conn_flags);
793
794
795 if (!dest->svc) {
796 __ip_vs_bind_svc(dest, svc);
797 } else {
798 if (dest->svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc);
802 }
803 }
804
805
806 dest->flags |= IP_VS_DEST_F_AVAILABLE;
807
808 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
810 dest->u_threshold = udest->u_threshold;
811 dest->l_threshold = udest->l_threshold;
812}
813
814
815
816
817
818static int
819ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
820 struct ip_vs_dest **dest_p)
821{
822 struct ip_vs_dest *dest;
823 unsigned atype;
824
825 EnterFunction(2);
826
827#ifdef CONFIG_IP_VS_IPV6
828 if (svc->af == AF_INET6) {
829 atype = ipv6_addr_type(&udest->addr.in6);
830 if ((!(atype & IPV6_ADDR_UNICAST) ||
831 atype & IPV6_ADDR_LINKLOCAL) &&
832 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
833 return -EINVAL;
834 } else
835#endif
836 {
837 atype = inet_addr_type(&init_net, udest->addr.ip);
838 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
839 return -EINVAL;
840 }
841
842 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
843 if (dest == NULL) {
844 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
845 return -ENOMEM;
846 }
847
848 dest->af = svc->af;
849 dest->protocol = svc->protocol;
850 dest->vaddr = svc->addr;
851 dest->vport = svc->port;
852 dest->vfwmark = svc->fwmark;
853 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
854 dest->port = udest->port;
855
856 atomic_set(&dest->activeconns, 0);
857 atomic_set(&dest->inactconns, 0);
858 atomic_set(&dest->persistconns, 0);
859 atomic_set(&dest->refcnt, 0);
860
861 INIT_LIST_HEAD(&dest->d_list);
862 spin_lock_init(&dest->dst_lock);
863 spin_lock_init(&dest->stats.lock);
864 __ip_vs_update_dest(svc, dest, udest);
865 ip_vs_new_estimator(&dest->stats);
866
867 *dest_p = dest;
868
869 LeaveFunction(2);
870 return 0;
871}
872
873
874
875
876
877static int
878ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
879{
880 struct ip_vs_dest *dest;
881 union nf_inet_addr daddr;
882 __be16 dport = udest->port;
883 int ret;
884
885 EnterFunction(2);
886
887 if (udest->weight < 0) {
888 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
889 return -ERANGE;
890 }
891
892 if (udest->l_threshold > udest->u_threshold) {
893 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
894 "upper threshold\n");
895 return -ERANGE;
896 }
897
898 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
899
900
901
902
903 dest = ip_vs_lookup_dest(svc, &daddr, dport);
904
905 if (dest != NULL) {
906 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
907 return -EEXIST;
908 }
909
910
911
912
913
914 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
915
916 if (dest != NULL) {
917 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
918 "dest->refcnt=%d, service %u/%s:%u\n",
919 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
920 atomic_read(&dest->refcnt),
921 dest->vfwmark,
922 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
923 ntohs(dest->vport));
924
925 __ip_vs_update_dest(svc, dest, udest);
926
927
928
929
930 list_del(&dest->n_list);
931
932 ip_vs_new_estimator(&dest->stats);
933
934 write_lock_bh(&__ip_vs_svc_lock);
935
936
937
938
939 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
940
941 list_add(&dest->n_list, &svc->destinations);
942 svc->num_dests++;
943
944
945 if (svc->scheduler->update_service)
946 svc->scheduler->update_service(svc);
947
948 write_unlock_bh(&__ip_vs_svc_lock);
949 return 0;
950 }
951
952
953
954
955 ret = ip_vs_new_dest(svc, udest, &dest);
956 if (ret) {
957 return ret;
958 }
959
960
961
962
963 atomic_inc(&dest->refcnt);
964
965 write_lock_bh(&__ip_vs_svc_lock);
966
967
968
969
970 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
971
972 list_add(&dest->n_list, &svc->destinations);
973 svc->num_dests++;
974
975
976 if (svc->scheduler->update_service)
977 svc->scheduler->update_service(svc);
978
979 write_unlock_bh(&__ip_vs_svc_lock);
980
981 LeaveFunction(2);
982
983 return 0;
984}
985
986
987
988
989
990static int
991ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
992{
993 struct ip_vs_dest *dest;
994 union nf_inet_addr daddr;
995 __be16 dport = udest->port;
996
997 EnterFunction(2);
998
999 if (udest->weight < 0) {
1000 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1001 return -ERANGE;
1002 }
1003
1004 if (udest->l_threshold > udest->u_threshold) {
1005 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1006 "upper threshold\n");
1007 return -ERANGE;
1008 }
1009
1010 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1011
1012
1013
1014
1015 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1016
1017 if (dest == NULL) {
1018 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1019 return -ENOENT;
1020 }
1021
1022 __ip_vs_update_dest(svc, dest, udest);
1023
1024 write_lock_bh(&__ip_vs_svc_lock);
1025
1026
1027 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1028
1029
1030 if (svc->scheduler->update_service)
1031 svc->scheduler->update_service(svc);
1032
1033 write_unlock_bh(&__ip_vs_svc_lock);
1034
1035 LeaveFunction(2);
1036
1037 return 0;
1038}
1039
1040
1041
1042
1043
1044static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1045{
1046 ip_vs_kill_estimator(&dest->stats);
1047
1048
1049
1050
1051 write_lock_bh(&__ip_vs_rs_lock);
1052 ip_vs_rs_unhash(dest);
1053 write_unlock_bh(&__ip_vs_rs_lock);
1054
1055
1056
1057
1058
1059
1060 if (atomic_dec_and_test(&dest->refcnt)) {
1061 ip_vs_dst_reset(dest);
1062
1063
1064
1065
1066
1067 atomic_dec(&dest->svc->refcnt);
1068 kfree(dest);
1069 } else {
1070 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1071 "dest->refcnt=%d\n",
1072 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1073 ntohs(dest->port),
1074 atomic_read(&dest->refcnt));
1075 list_add(&dest->n_list, &ip_vs_dest_trash);
1076 atomic_inc(&dest->refcnt);
1077 }
1078}
1079
1080
1081
1082
1083
1084static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1085 struct ip_vs_dest *dest,
1086 int svcupd)
1087{
1088 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1089
1090
1091
1092
1093 list_del(&dest->n_list);
1094 svc->num_dests--;
1095
1096
1097
1098
1099 if (svcupd && svc->scheduler->update_service)
1100 svc->scheduler->update_service(svc);
1101}
1102
1103
1104
1105
1106
1107static int
1108ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1109{
1110 struct ip_vs_dest *dest;
1111 __be16 dport = udest->port;
1112
1113 EnterFunction(2);
1114
1115 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1116
1117 if (dest == NULL) {
1118 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1119 return -ENOENT;
1120 }
1121
1122 write_lock_bh(&__ip_vs_svc_lock);
1123
1124
1125
1126
1127 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1128
1129
1130
1131
1132 __ip_vs_unlink_dest(svc, dest, 1);
1133
1134 write_unlock_bh(&__ip_vs_svc_lock);
1135
1136
1137
1138
1139 __ip_vs_del_dest(dest);
1140
1141 LeaveFunction(2);
1142
1143 return 0;
1144}
1145
1146
1147
1148
1149
1150static int
1151ip_vs_add_service(struct ip_vs_service_user_kern *u,
1152 struct ip_vs_service **svc_p)
1153{
1154 int ret = 0;
1155 struct ip_vs_scheduler *sched = NULL;
1156 struct ip_vs_service *svc = NULL;
1157
1158
1159 ip_vs_use_count_inc();
1160
1161
1162 sched = ip_vs_scheduler_get(u->sched_name);
1163 if (sched == NULL) {
1164 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1165 u->sched_name);
1166 ret = -ENOENT;
1167 goto out_mod_dec;
1168 }
1169
1170#ifdef CONFIG_IP_VS_IPV6
1171 if (u->af == AF_INET6) {
1172 if (!sched->supports_ipv6) {
1173 ret = -EAFNOSUPPORT;
1174 goto out_err;
1175 }
1176 if ((u->netmask < 1) || (u->netmask > 128)) {
1177 ret = -EINVAL;
1178 goto out_err;
1179 }
1180 }
1181#endif
1182
1183 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1184 if (svc == NULL) {
1185 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1186 ret = -ENOMEM;
1187 goto out_err;
1188 }
1189
1190
1191 atomic_set(&svc->usecnt, 1);
1192 atomic_set(&svc->refcnt, 0);
1193
1194 svc->af = u->af;
1195 svc->protocol = u->protocol;
1196 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1197 svc->port = u->port;
1198 svc->fwmark = u->fwmark;
1199 svc->flags = u->flags;
1200 svc->timeout = u->timeout * HZ;
1201 svc->netmask = u->netmask;
1202
1203 INIT_LIST_HEAD(&svc->destinations);
1204 rwlock_init(&svc->sched_lock);
1205 spin_lock_init(&svc->stats.lock);
1206
1207
1208 ret = ip_vs_bind_scheduler(svc, sched);
1209 if (ret)
1210 goto out_err;
1211 sched = NULL;
1212
1213
1214 if (svc->port == FTPPORT)
1215 atomic_inc(&ip_vs_ftpsvc_counter);
1216 else if (svc->port == 0)
1217 atomic_inc(&ip_vs_nullsvc_counter);
1218
1219 ip_vs_new_estimator(&svc->stats);
1220
1221
1222 if (svc->af == AF_INET)
1223 ip_vs_num_services++;
1224
1225
1226 write_lock_bh(&__ip_vs_svc_lock);
1227 ip_vs_svc_hash(svc);
1228 write_unlock_bh(&__ip_vs_svc_lock);
1229
1230 *svc_p = svc;
1231 return 0;
1232
1233 out_err:
1234 if (svc != NULL) {
1235 if (svc->scheduler)
1236 ip_vs_unbind_scheduler(svc);
1237 if (svc->inc) {
1238 local_bh_disable();
1239 ip_vs_app_inc_put(svc->inc);
1240 local_bh_enable();
1241 }
1242 kfree(svc);
1243 }
1244 ip_vs_scheduler_put(sched);
1245
1246 out_mod_dec:
1247
1248 ip_vs_use_count_dec();
1249
1250 return ret;
1251}
1252
1253
1254
1255
1256
1257static int
1258ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1259{
1260 struct ip_vs_scheduler *sched, *old_sched;
1261 int ret = 0;
1262
1263
1264
1265
1266 sched = ip_vs_scheduler_get(u->sched_name);
1267 if (sched == NULL) {
1268 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1269 u->sched_name);
1270 return -ENOENT;
1271 }
1272 old_sched = sched;
1273
1274#ifdef CONFIG_IP_VS_IPV6
1275 if (u->af == AF_INET6) {
1276 if (!sched->supports_ipv6) {
1277 ret = -EAFNOSUPPORT;
1278 goto out;
1279 }
1280 if ((u->netmask < 1) || (u->netmask > 128)) {
1281 ret = -EINVAL;
1282 goto out;
1283 }
1284 }
1285#endif
1286
1287 write_lock_bh(&__ip_vs_svc_lock);
1288
1289
1290
1291
1292 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1293
1294
1295
1296
1297 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1298 svc->timeout = u->timeout * HZ;
1299 svc->netmask = u->netmask;
1300
1301 old_sched = svc->scheduler;
1302 if (sched != old_sched) {
1303
1304
1305
1306 if ((ret = ip_vs_unbind_scheduler(svc))) {
1307 old_sched = sched;
1308 goto out_unlock;
1309 }
1310
1311
1312
1313
1314 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325 ip_vs_bind_scheduler(svc, old_sched);
1326 old_sched = sched;
1327 goto out_unlock;
1328 }
1329 }
1330
1331 out_unlock:
1332 write_unlock_bh(&__ip_vs_svc_lock);
1333#ifdef CONFIG_IP_VS_IPV6
1334 out:
1335#endif
1336
1337 if (old_sched)
1338 ip_vs_scheduler_put(old_sched);
1339
1340 return ret;
1341}
1342
1343
1344
1345
1346
1347
1348
1349static void __ip_vs_del_service(struct ip_vs_service *svc)
1350{
1351 struct ip_vs_dest *dest, *nxt;
1352 struct ip_vs_scheduler *old_sched;
1353
1354
1355 if (svc->af == AF_INET)
1356 ip_vs_num_services--;
1357
1358 ip_vs_kill_estimator(&svc->stats);
1359
1360
1361 old_sched = svc->scheduler;
1362 ip_vs_unbind_scheduler(svc);
1363 if (old_sched)
1364 ip_vs_scheduler_put(old_sched);
1365
1366
1367 if (svc->inc) {
1368 ip_vs_app_inc_put(svc->inc);
1369 svc->inc = NULL;
1370 }
1371
1372
1373
1374
1375 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1376 __ip_vs_unlink_dest(svc, dest, 0);
1377 __ip_vs_del_dest(dest);
1378 }
1379
1380
1381
1382
1383 if (svc->port == FTPPORT)
1384 atomic_dec(&ip_vs_ftpsvc_counter);
1385 else if (svc->port == 0)
1386 atomic_dec(&ip_vs_nullsvc_counter);
1387
1388
1389
1390
1391 if (atomic_read(&svc->refcnt) == 0)
1392 kfree(svc);
1393
1394
1395 ip_vs_use_count_dec();
1396}
1397
1398
1399
1400
1401static int ip_vs_del_service(struct ip_vs_service *svc)
1402{
1403 if (svc == NULL)
1404 return -EEXIST;
1405
1406
1407
1408
1409 write_lock_bh(&__ip_vs_svc_lock);
1410
1411 ip_vs_svc_unhash(svc);
1412
1413
1414
1415
1416 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1417
1418 __ip_vs_del_service(svc);
1419
1420 write_unlock_bh(&__ip_vs_svc_lock);
1421
1422 return 0;
1423}
1424
1425
1426
1427
1428
1429static int ip_vs_flush(void)
1430{
1431 int idx;
1432 struct ip_vs_service *svc, *nxt;
1433
1434
1435
1436
1437 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1438 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1439 write_lock_bh(&__ip_vs_svc_lock);
1440 ip_vs_svc_unhash(svc);
1441
1442
1443
1444 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1445 __ip_vs_del_service(svc);
1446 write_unlock_bh(&__ip_vs_svc_lock);
1447 }
1448 }
1449
1450
1451
1452
1453 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1454 list_for_each_entry_safe(svc, nxt,
1455 &ip_vs_svc_fwm_table[idx], f_list) {
1456 write_lock_bh(&__ip_vs_svc_lock);
1457 ip_vs_svc_unhash(svc);
1458
1459
1460
1461 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1462 __ip_vs_del_service(svc);
1463 write_unlock_bh(&__ip_vs_svc_lock);
1464 }
1465 }
1466
1467 return 0;
1468}
1469
1470
1471
1472
1473
1474static int ip_vs_zero_service(struct ip_vs_service *svc)
1475{
1476 struct ip_vs_dest *dest;
1477
1478 write_lock_bh(&__ip_vs_svc_lock);
1479 list_for_each_entry(dest, &svc->destinations, n_list) {
1480 ip_vs_zero_stats(&dest->stats);
1481 }
1482 ip_vs_zero_stats(&svc->stats);
1483 write_unlock_bh(&__ip_vs_svc_lock);
1484 return 0;
1485}
1486
1487static int ip_vs_zero_all(void)
1488{
1489 int idx;
1490 struct ip_vs_service *svc;
1491
1492 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1493 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1494 ip_vs_zero_service(svc);
1495 }
1496 }
1497
1498 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1499 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1500 ip_vs_zero_service(svc);
1501 }
1502 }
1503
1504 ip_vs_zero_stats(&ip_vs_stats);
1505 return 0;
1506}
1507
1508
1509static int
1510proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1511 void __user *buffer, size_t *lenp, loff_t *ppos)
1512{
1513 int *valp = table->data;
1514 int val = *valp;
1515 int rc;
1516
1517 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1518 if (write && (*valp != val)) {
1519 if ((*valp < 0) || (*valp > 3)) {
1520
1521 *valp = val;
1522 } else {
1523 update_defense_level();
1524 }
1525 }
1526 return rc;
1527}
1528
1529
1530static int
1531proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1532 void __user *buffer, size_t *lenp, loff_t *ppos)
1533{
1534 int *valp = table->data;
1535 int val[2];
1536 int rc;
1537
1538
1539 memcpy(val, valp, sizeof(val));
1540
1541 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1542 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1543
1544 memcpy(valp, val, sizeof(val));
1545 }
1546 return rc;
1547}
1548
1549
1550
1551
1552
1553
1554static struct ctl_table vs_vars[] = {
1555 {
1556 .procname = "amemthresh",
1557 .data = &sysctl_ip_vs_amemthresh,
1558 .maxlen = sizeof(int),
1559 .mode = 0644,
1560 .proc_handler = &proc_dointvec,
1561 },
1562#ifdef CONFIG_IP_VS_DEBUG
1563 {
1564 .procname = "debug_level",
1565 .data = &sysctl_ip_vs_debug_level,
1566 .maxlen = sizeof(int),
1567 .mode = 0644,
1568 .proc_handler = &proc_dointvec,
1569 },
1570#endif
1571 {
1572 .procname = "am_droprate",
1573 .data = &sysctl_ip_vs_am_droprate,
1574 .maxlen = sizeof(int),
1575 .mode = 0644,
1576 .proc_handler = &proc_dointvec,
1577 },
1578 {
1579 .procname = "drop_entry",
1580 .data = &sysctl_ip_vs_drop_entry,
1581 .maxlen = sizeof(int),
1582 .mode = 0644,
1583 .proc_handler = &proc_do_defense_mode,
1584 },
1585 {
1586 .procname = "drop_packet",
1587 .data = &sysctl_ip_vs_drop_packet,
1588 .maxlen = sizeof(int),
1589 .mode = 0644,
1590 .proc_handler = &proc_do_defense_mode,
1591 },
1592 {
1593 .procname = "secure_tcp",
1594 .data = &sysctl_ip_vs_secure_tcp,
1595 .maxlen = sizeof(int),
1596 .mode = 0644,
1597 .proc_handler = &proc_do_defense_mode,
1598 },
1599#if 0
1600 {
1601 .procname = "timeout_established",
1602 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1603 .maxlen = sizeof(int),
1604 .mode = 0644,
1605 .proc_handler = &proc_dointvec_jiffies,
1606 },
1607 {
1608 .procname = "timeout_synsent",
1609 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1610 .maxlen = sizeof(int),
1611 .mode = 0644,
1612 .proc_handler = &proc_dointvec_jiffies,
1613 },
1614 {
1615 .procname = "timeout_synrecv",
1616 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1617 .maxlen = sizeof(int),
1618 .mode = 0644,
1619 .proc_handler = &proc_dointvec_jiffies,
1620 },
1621 {
1622 .procname = "timeout_finwait",
1623 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1624 .maxlen = sizeof(int),
1625 .mode = 0644,
1626 .proc_handler = &proc_dointvec_jiffies,
1627 },
1628 {
1629 .procname = "timeout_timewait",
1630 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1631 .maxlen = sizeof(int),
1632 .mode = 0644,
1633 .proc_handler = &proc_dointvec_jiffies,
1634 },
1635 {
1636 .procname = "timeout_close",
1637 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1638 .maxlen = sizeof(int),
1639 .mode = 0644,
1640 .proc_handler = &proc_dointvec_jiffies,
1641 },
1642 {
1643 .procname = "timeout_closewait",
1644 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1645 .maxlen = sizeof(int),
1646 .mode = 0644,
1647 .proc_handler = &proc_dointvec_jiffies,
1648 },
1649 {
1650 .procname = "timeout_lastack",
1651 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1652 .maxlen = sizeof(int),
1653 .mode = 0644,
1654 .proc_handler = &proc_dointvec_jiffies,
1655 },
1656 {
1657 .procname = "timeout_listen",
1658 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1659 .maxlen = sizeof(int),
1660 .mode = 0644,
1661 .proc_handler = &proc_dointvec_jiffies,
1662 },
1663 {
1664 .procname = "timeout_synack",
1665 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1666 .maxlen = sizeof(int),
1667 .mode = 0644,
1668 .proc_handler = &proc_dointvec_jiffies,
1669 },
1670 {
1671 .procname = "timeout_udp",
1672 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1673 .maxlen = sizeof(int),
1674 .mode = 0644,
1675 .proc_handler = &proc_dointvec_jiffies,
1676 },
1677 {
1678 .procname = "timeout_icmp",
1679 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1680 .maxlen = sizeof(int),
1681 .mode = 0644,
1682 .proc_handler = &proc_dointvec_jiffies,
1683 },
1684#endif
1685 {
1686 .procname = "cache_bypass",
1687 .data = &sysctl_ip_vs_cache_bypass,
1688 .maxlen = sizeof(int),
1689 .mode = 0644,
1690 .proc_handler = &proc_dointvec,
1691 },
1692 {
1693 .procname = "expire_nodest_conn",
1694 .data = &sysctl_ip_vs_expire_nodest_conn,
1695 .maxlen = sizeof(int),
1696 .mode = 0644,
1697 .proc_handler = &proc_dointvec,
1698 },
1699 {
1700 .procname = "expire_quiescent_template",
1701 .data = &sysctl_ip_vs_expire_quiescent_template,
1702 .maxlen = sizeof(int),
1703 .mode = 0644,
1704 .proc_handler = &proc_dointvec,
1705 },
1706 {
1707 .procname = "sync_threshold",
1708 .data = &sysctl_ip_vs_sync_threshold,
1709 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1710 .mode = 0644,
1711 .proc_handler = &proc_do_sync_threshold,
1712 },
1713 {
1714 .procname = "nat_icmp_send",
1715 .data = &sysctl_ip_vs_nat_icmp_send,
1716 .maxlen = sizeof(int),
1717 .mode = 0644,
1718 .proc_handler = &proc_dointvec,
1719 },
1720 { .ctl_name = 0 }
1721};
1722
1723const struct ctl_path net_vs_ctl_path[] = {
1724 { .procname = "net", .ctl_name = CTL_NET, },
1725 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1726 { .procname = "vs", },
1727 { }
1728};
1729EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1730
1731static struct ctl_table_header * sysctl_header;
1732
1733#ifdef CONFIG_PROC_FS
1734
1735struct ip_vs_iter {
1736 struct list_head *table;
1737 int bucket;
1738};
1739
1740
1741
1742
1743
1744static inline const char *ip_vs_fwd_name(unsigned flags)
1745{
1746 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1747 case IP_VS_CONN_F_LOCALNODE:
1748 return "Local";
1749 case IP_VS_CONN_F_TUNNEL:
1750 return "Tunnel";
1751 case IP_VS_CONN_F_DROUTE:
1752 return "Route";
1753 default:
1754 return "Masq";
1755 }
1756}
1757
1758
1759
1760static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1761{
1762 struct ip_vs_iter *iter = seq->private;
1763 int idx;
1764 struct ip_vs_service *svc;
1765
1766
1767 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1768 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1769 if (pos-- == 0){
1770 iter->table = ip_vs_svc_table;
1771 iter->bucket = idx;
1772 return svc;
1773 }
1774 }
1775 }
1776
1777
1778 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1779 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1780 if (pos-- == 0) {
1781 iter->table = ip_vs_svc_fwm_table;
1782 iter->bucket = idx;
1783 return svc;
1784 }
1785 }
1786 }
1787
1788 return NULL;
1789}
1790
1791static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1792__acquires(__ip_vs_svc_lock)
1793{
1794
1795 read_lock_bh(&__ip_vs_svc_lock);
1796 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1797}
1798
1799
1800static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1801{
1802 struct list_head *e;
1803 struct ip_vs_iter *iter;
1804 struct ip_vs_service *svc;
1805
1806 ++*pos;
1807 if (v == SEQ_START_TOKEN)
1808 return ip_vs_info_array(seq,0);
1809
1810 svc = v;
1811 iter = seq->private;
1812
1813 if (iter->table == ip_vs_svc_table) {
1814
1815 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1816 return list_entry(e, struct ip_vs_service, s_list);
1817
1818
1819 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1820 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1821 s_list) {
1822 return svc;
1823 }
1824 }
1825
1826 iter->table = ip_vs_svc_fwm_table;
1827 iter->bucket = -1;
1828 goto scan_fwmark;
1829 }
1830
1831
1832 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1833 return list_entry(e, struct ip_vs_service, f_list);
1834
1835 scan_fwmark:
1836 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1837 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1838 f_list)
1839 return svc;
1840 }
1841
1842 return NULL;
1843}
1844
1845static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1846__releases(__ip_vs_svc_lock)
1847{
1848 read_unlock_bh(&__ip_vs_svc_lock);
1849}
1850
1851
1852static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1853{
1854 if (v == SEQ_START_TOKEN) {
1855 seq_printf(seq,
1856 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1857 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1858 seq_puts(seq,
1859 "Prot LocalAddress:Port Scheduler Flags\n");
1860 seq_puts(seq,
1861 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1862 } else {
1863 const struct ip_vs_service *svc = v;
1864 const struct ip_vs_iter *iter = seq->private;
1865 const struct ip_vs_dest *dest;
1866
1867 if (iter->table == ip_vs_svc_table) {
1868#ifdef CONFIG_IP_VS_IPV6
1869 if (svc->af == AF_INET6)
1870 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1871 ip_vs_proto_name(svc->protocol),
1872 NIP6(svc->addr.in6),
1873 ntohs(svc->port),
1874 svc->scheduler->name);
1875 else
1876#endif
1877 seq_printf(seq, "%s %08X:%04X %s ",
1878 ip_vs_proto_name(svc->protocol),
1879 ntohl(svc->addr.ip),
1880 ntohs(svc->port),
1881 svc->scheduler->name);
1882 } else {
1883 seq_printf(seq, "FWM %08X %s ",
1884 svc->fwmark, svc->scheduler->name);
1885 }
1886
1887 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1888 seq_printf(seq, "persistent %d %08X\n",
1889 svc->timeout,
1890 ntohl(svc->netmask));
1891 else
1892 seq_putc(seq, '\n');
1893
1894 list_for_each_entry(dest, &svc->destinations, n_list) {
1895#ifdef CONFIG_IP_VS_IPV6
1896 if (dest->af == AF_INET6)
1897 seq_printf(seq,
1898 " -> [" NIP6_FMT "]:%04X"
1899 " %-7s %-6d %-10d %-10d\n",
1900 NIP6(dest->addr.in6),
1901 ntohs(dest->port),
1902 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1903 atomic_read(&dest->weight),
1904 atomic_read(&dest->activeconns),
1905 atomic_read(&dest->inactconns));
1906 else
1907#endif
1908 seq_printf(seq,
1909 " -> %08X:%04X "
1910 "%-7s %-6d %-10d %-10d\n",
1911 ntohl(dest->addr.ip),
1912 ntohs(dest->port),
1913 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1914 atomic_read(&dest->weight),
1915 atomic_read(&dest->activeconns),
1916 atomic_read(&dest->inactconns));
1917
1918 }
1919 }
1920 return 0;
1921}
1922
1923static const struct seq_operations ip_vs_info_seq_ops = {
1924 .start = ip_vs_info_seq_start,
1925 .next = ip_vs_info_seq_next,
1926 .stop = ip_vs_info_seq_stop,
1927 .show = ip_vs_info_seq_show,
1928};
1929
1930static int ip_vs_info_open(struct inode *inode, struct file *file)
1931{
1932 return seq_open_private(file, &ip_vs_info_seq_ops,
1933 sizeof(struct ip_vs_iter));
1934}
1935
1936static const struct file_operations ip_vs_info_fops = {
1937 .owner = THIS_MODULE,
1938 .open = ip_vs_info_open,
1939 .read = seq_read,
1940 .llseek = seq_lseek,
1941 .release = seq_release_private,
1942};
1943
1944#endif
1945
1946struct ip_vs_stats ip_vs_stats = {
1947 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1948};
1949
1950#ifdef CONFIG_PROC_FS
1951static int ip_vs_stats_show(struct seq_file *seq, void *v)
1952{
1953
1954
1955 seq_puts(seq,
1956 " Total Incoming Outgoing Incoming Outgoing\n");
1957 seq_printf(seq,
1958 " Conns Packets Packets Bytes Bytes\n");
1959
1960 spin_lock_bh(&ip_vs_stats.lock);
1961 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1962 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1963 (unsigned long long) ip_vs_stats.ustats.inbytes,
1964 (unsigned long long) ip_vs_stats.ustats.outbytes);
1965
1966
1967 seq_puts(seq,
1968 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1969 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1970 ip_vs_stats.ustats.cps,
1971 ip_vs_stats.ustats.inpps,
1972 ip_vs_stats.ustats.outpps,
1973 ip_vs_stats.ustats.inbps,
1974 ip_vs_stats.ustats.outbps);
1975 spin_unlock_bh(&ip_vs_stats.lock);
1976
1977 return 0;
1978}
1979
1980static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1981{
1982 return single_open(file, ip_vs_stats_show, NULL);
1983}
1984
1985static const struct file_operations ip_vs_stats_fops = {
1986 .owner = THIS_MODULE,
1987 .open = ip_vs_stats_seq_open,
1988 .read = seq_read,
1989 .llseek = seq_lseek,
1990 .release = single_release,
1991};
1992
1993#endif
1994
1995
1996
1997
1998static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1999{
2000 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2001 u->tcp_timeout,
2002 u->tcp_fin_timeout,
2003 u->udp_timeout);
2004
2005#ifdef CONFIG_IP_VS_PROTO_TCP
2006 if (u->tcp_timeout) {
2007 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2008 = u->tcp_timeout * HZ;
2009 }
2010
2011 if (u->tcp_fin_timeout) {
2012 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2013 = u->tcp_fin_timeout * HZ;
2014 }
2015#endif
2016
2017#ifdef CONFIG_IP_VS_PROTO_UDP
2018 if (u->udp_timeout) {
2019 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2020 = u->udp_timeout * HZ;
2021 }
2022#endif
2023 return 0;
2024}
2025
2026
2027#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2028#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2029#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2030 sizeof(struct ip_vs_dest_user))
2031#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2032#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2033#define MAX_ARG_LEN SVCDEST_ARG_LEN
2034
2035static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2036 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2037 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2038 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2039 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2040 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2041 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2042 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2045 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2046 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2047};
2048
2049static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2050 struct ip_vs_service_user *usvc_compat)
2051{
2052 usvc->af = AF_INET;
2053 usvc->protocol = usvc_compat->protocol;
2054 usvc->addr.ip = usvc_compat->addr;
2055 usvc->port = usvc_compat->port;
2056 usvc->fwmark = usvc_compat->fwmark;
2057
2058
2059 usvc->sched_name = usvc_compat->sched_name;
2060
2061 usvc->flags = usvc_compat->flags;
2062 usvc->timeout = usvc_compat->timeout;
2063 usvc->netmask = usvc_compat->netmask;
2064}
2065
2066static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2067 struct ip_vs_dest_user *udest_compat)
2068{
2069 udest->addr.ip = udest_compat->addr;
2070 udest->port = udest_compat->port;
2071 udest->conn_flags = udest_compat->conn_flags;
2072 udest->weight = udest_compat->weight;
2073 udest->u_threshold = udest_compat->u_threshold;
2074 udest->l_threshold = udest_compat->l_threshold;
2075}
2076
2077static int
2078do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2079{
2080 int ret;
2081 unsigned char arg[MAX_ARG_LEN];
2082 struct ip_vs_service_user *usvc_compat;
2083 struct ip_vs_service_user_kern usvc;
2084 struct ip_vs_service *svc;
2085 struct ip_vs_dest_user *udest_compat;
2086 struct ip_vs_dest_user_kern udest;
2087
2088 if (!capable(CAP_NET_ADMIN))
2089 return -EPERM;
2090
2091 if (len != set_arglen[SET_CMDID(cmd)]) {
2092 IP_VS_ERR("set_ctl: len %u != %u\n",
2093 len, set_arglen[SET_CMDID(cmd)]);
2094 return -EINVAL;
2095 }
2096
2097 if (copy_from_user(arg, user, len) != 0)
2098 return -EFAULT;
2099
2100
2101 ip_vs_use_count_inc();
2102
2103 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2104 ret = -ERESTARTSYS;
2105 goto out_dec;
2106 }
2107
2108 if (cmd == IP_VS_SO_SET_FLUSH) {
2109
2110 ret = ip_vs_flush();
2111 goto out_unlock;
2112 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2113
2114 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2115 goto out_unlock;
2116 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2117 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2118 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2119 goto out_unlock;
2120 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2121 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2122 ret = stop_sync_thread(dm->state);
2123 goto out_unlock;
2124 }
2125
2126 usvc_compat = (struct ip_vs_service_user *)arg;
2127 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2128
2129
2130
2131 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2132 ip_vs_copy_udest_compat(&udest, udest_compat);
2133
2134 if (cmd == IP_VS_SO_SET_ZERO) {
2135
2136 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2137 ret = ip_vs_zero_all();
2138 goto out_unlock;
2139 }
2140 }
2141
2142
2143 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2144 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
2145 usvc.protocol, NIPQUAD(usvc.addr.ip),
2146 ntohs(usvc.port), usvc.sched_name);
2147 ret = -EFAULT;
2148 goto out_unlock;
2149 }
2150
2151
2152 if (usvc.fwmark == 0)
2153 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2154 &usvc.addr, usvc.port);
2155 else
2156 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2157
2158 if (cmd != IP_VS_SO_SET_ADD
2159 && (svc == NULL || svc->protocol != usvc.protocol)) {
2160 ret = -ESRCH;
2161 goto out_unlock;
2162 }
2163
2164 switch (cmd) {
2165 case IP_VS_SO_SET_ADD:
2166 if (svc != NULL)
2167 ret = -EEXIST;
2168 else
2169 ret = ip_vs_add_service(&usvc, &svc);
2170 break;
2171 case IP_VS_SO_SET_EDIT:
2172 ret = ip_vs_edit_service(svc, &usvc);
2173 break;
2174 case IP_VS_SO_SET_DEL:
2175 ret = ip_vs_del_service(svc);
2176 if (!ret)
2177 goto out_unlock;
2178 break;
2179 case IP_VS_SO_SET_ZERO:
2180 ret = ip_vs_zero_service(svc);
2181 break;
2182 case IP_VS_SO_SET_ADDDEST:
2183 ret = ip_vs_add_dest(svc, &udest);
2184 break;
2185 case IP_VS_SO_SET_EDITDEST:
2186 ret = ip_vs_edit_dest(svc, &udest);
2187 break;
2188 case IP_VS_SO_SET_DELDEST:
2189 ret = ip_vs_del_dest(svc, &udest);
2190 break;
2191 default:
2192 ret = -EINVAL;
2193 }
2194
2195 if (svc)
2196 ip_vs_service_put(svc);
2197
2198 out_unlock:
2199 mutex_unlock(&__ip_vs_mutex);
2200 out_dec:
2201
2202 ip_vs_use_count_dec();
2203
2204 return ret;
2205}
2206
2207
2208static void
2209ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2210{
2211 spin_lock_bh(&src->lock);
2212 memcpy(dst, &src->ustats, sizeof(*dst));
2213 spin_unlock_bh(&src->lock);
2214}
2215
2216static void
2217ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2218{
2219 dst->protocol = src->protocol;
2220 dst->addr = src->addr.ip;
2221 dst->port = src->port;
2222 dst->fwmark = src->fwmark;
2223 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2224 dst->flags = src->flags;
2225 dst->timeout = src->timeout / HZ;
2226 dst->netmask = src->netmask;
2227 dst->num_dests = src->num_dests;
2228 ip_vs_copy_stats(&dst->stats, &src->stats);
2229}
2230
2231static inline int
2232__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2233 struct ip_vs_get_services __user *uptr)
2234{
2235 int idx, count=0;
2236 struct ip_vs_service *svc;
2237 struct ip_vs_service_entry entry;
2238 int ret = 0;
2239
2240 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2241 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2242
2243 if (svc->af != AF_INET)
2244 continue;
2245
2246 if (count >= get->num_services)
2247 goto out;
2248 memset(&entry, 0, sizeof(entry));
2249 ip_vs_copy_service(&entry, svc);
2250 if (copy_to_user(&uptr->entrytable[count],
2251 &entry, sizeof(entry))) {
2252 ret = -EFAULT;
2253 goto out;
2254 }
2255 count++;
2256 }
2257 }
2258
2259 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2260 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2261
2262 if (svc->af != AF_INET)
2263 continue;
2264
2265 if (count >= get->num_services)
2266 goto out;
2267 memset(&entry, 0, sizeof(entry));
2268 ip_vs_copy_service(&entry, svc);
2269 if (copy_to_user(&uptr->entrytable[count],
2270 &entry, sizeof(entry))) {
2271 ret = -EFAULT;
2272 goto out;
2273 }
2274 count++;
2275 }
2276 }
2277 out:
2278 return ret;
2279}
2280
2281static inline int
2282__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2283 struct ip_vs_get_dests __user *uptr)
2284{
2285 struct ip_vs_service *svc;
2286 union nf_inet_addr addr = { .ip = get->addr };
2287 int ret = 0;
2288
2289 if (get->fwmark)
2290 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2291 else
2292 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2293 get->port);
2294
2295 if (svc) {
2296 int count = 0;
2297 struct ip_vs_dest *dest;
2298 struct ip_vs_dest_entry entry;
2299
2300 list_for_each_entry(dest, &svc->destinations, n_list) {
2301 if (count >= get->num_dests)
2302 break;
2303
2304 entry.addr = dest->addr.ip;
2305 entry.port = dest->port;
2306 entry.conn_flags = atomic_read(&dest->conn_flags);
2307 entry.weight = atomic_read(&dest->weight);
2308 entry.u_threshold = dest->u_threshold;
2309 entry.l_threshold = dest->l_threshold;
2310 entry.activeconns = atomic_read(&dest->activeconns);
2311 entry.inactconns = atomic_read(&dest->inactconns);
2312 entry.persistconns = atomic_read(&dest->persistconns);
2313 ip_vs_copy_stats(&entry.stats, &dest->stats);
2314 if (copy_to_user(&uptr->entrytable[count],
2315 &entry, sizeof(entry))) {
2316 ret = -EFAULT;
2317 break;
2318 }
2319 count++;
2320 }
2321 ip_vs_service_put(svc);
2322 } else
2323 ret = -ESRCH;
2324 return ret;
2325}
2326
2327static inline void
2328__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2329{
2330#ifdef CONFIG_IP_VS_PROTO_TCP
2331 u->tcp_timeout =
2332 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2333 u->tcp_fin_timeout =
2334 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2335#endif
2336#ifdef CONFIG_IP_VS_PROTO_UDP
2337 u->udp_timeout =
2338 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2339#endif
2340}
2341
2342
2343#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2344#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2345#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2346#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2347#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2348#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2349#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2350
2351static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2352 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2353 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2354 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2355 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2356 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2357 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2358 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2359};
2360
2361static int
2362do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2363{
2364 unsigned char arg[128];
2365 int ret = 0;
2366
2367 if (!capable(CAP_NET_ADMIN))
2368 return -EPERM;
2369
2370 if (*len < get_arglen[GET_CMDID(cmd)]) {
2371 IP_VS_ERR("get_ctl: len %u < %u\n",
2372 *len, get_arglen[GET_CMDID(cmd)]);
2373 return -EINVAL;
2374 }
2375
2376 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2377 return -EFAULT;
2378
2379 if (mutex_lock_interruptible(&__ip_vs_mutex))
2380 return -ERESTARTSYS;
2381
2382 switch (cmd) {
2383 case IP_VS_SO_GET_VERSION:
2384 {
2385 char buf[64];
2386
2387 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2388 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2389 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2390 ret = -EFAULT;
2391 goto out;
2392 }
2393 *len = strlen(buf)+1;
2394 }
2395 break;
2396
2397 case IP_VS_SO_GET_INFO:
2398 {
2399 struct ip_vs_getinfo info;
2400 info.version = IP_VS_VERSION_CODE;
2401 info.size = IP_VS_CONN_TAB_SIZE;
2402 info.num_services = ip_vs_num_services;
2403 if (copy_to_user(user, &info, sizeof(info)) != 0)
2404 ret = -EFAULT;
2405 }
2406 break;
2407
2408 case IP_VS_SO_GET_SERVICES:
2409 {
2410 struct ip_vs_get_services *get;
2411 int size;
2412
2413 get = (struct ip_vs_get_services *)arg;
2414 size = sizeof(*get) +
2415 sizeof(struct ip_vs_service_entry) * get->num_services;
2416 if (*len != size) {
2417 IP_VS_ERR("length: %u != %u\n", *len, size);
2418 ret = -EINVAL;
2419 goto out;
2420 }
2421 ret = __ip_vs_get_service_entries(get, user);
2422 }
2423 break;
2424
2425 case IP_VS_SO_GET_SERVICE:
2426 {
2427 struct ip_vs_service_entry *entry;
2428 struct ip_vs_service *svc;
2429 union nf_inet_addr addr;
2430
2431 entry = (struct ip_vs_service_entry *)arg;
2432 addr.ip = entry->addr;
2433 if (entry->fwmark)
2434 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2435 else
2436 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2437 &addr, entry->port);
2438 if (svc) {
2439 ip_vs_copy_service(entry, svc);
2440 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2441 ret = -EFAULT;
2442 ip_vs_service_put(svc);
2443 } else
2444 ret = -ESRCH;
2445 }
2446 break;
2447
2448 case IP_VS_SO_GET_DESTS:
2449 {
2450 struct ip_vs_get_dests *get;
2451 int size;
2452
2453 get = (struct ip_vs_get_dests *)arg;
2454 size = sizeof(*get) +
2455 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2456 if (*len != size) {
2457 IP_VS_ERR("length: %u != %u\n", *len, size);
2458 ret = -EINVAL;
2459 goto out;
2460 }
2461 ret = __ip_vs_get_dest_entries(get, user);
2462 }
2463 break;
2464
2465 case IP_VS_SO_GET_TIMEOUT:
2466 {
2467 struct ip_vs_timeout_user t;
2468
2469 __ip_vs_get_timeouts(&t);
2470 if (copy_to_user(user, &t, sizeof(t)) != 0)
2471 ret = -EFAULT;
2472 }
2473 break;
2474
2475 case IP_VS_SO_GET_DAEMON:
2476 {
2477 struct ip_vs_daemon_user d[2];
2478
2479 memset(&d, 0, sizeof(d));
2480 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2481 d[0].state = IP_VS_STATE_MASTER;
2482 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2483 d[0].syncid = ip_vs_master_syncid;
2484 }
2485 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2486 d[1].state = IP_VS_STATE_BACKUP;
2487 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2488 d[1].syncid = ip_vs_backup_syncid;
2489 }
2490 if (copy_to_user(user, &d, sizeof(d)) != 0)
2491 ret = -EFAULT;
2492 }
2493 break;
2494
2495 default:
2496 ret = -EINVAL;
2497 }
2498
2499 out:
2500 mutex_unlock(&__ip_vs_mutex);
2501 return ret;
2502}
2503
2504
2505static struct nf_sockopt_ops ip_vs_sockopts = {
2506 .pf = PF_INET,
2507 .set_optmin = IP_VS_BASE_CTL,
2508 .set_optmax = IP_VS_SO_SET_MAX+1,
2509 .set = do_ip_vs_set_ctl,
2510 .get_optmin = IP_VS_BASE_CTL,
2511 .get_optmax = IP_VS_SO_GET_MAX+1,
2512 .get = do_ip_vs_get_ctl,
2513 .owner = THIS_MODULE,
2514};
2515
2516
2517
2518
2519
2520
2521static struct genl_family ip_vs_genl_family = {
2522 .id = GENL_ID_GENERATE,
2523 .hdrsize = 0,
2524 .name = IPVS_GENL_NAME,
2525 .version = IPVS_GENL_VERSION,
2526 .maxattr = IPVS_CMD_MAX,
2527};
2528
2529
2530static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2531 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2532 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2533 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2534 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2535 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2536 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2537};
2538
2539
2540static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2541 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2542 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2543 .len = IP_VS_IFNAME_MAXLEN },
2544 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2545};
2546
2547
2548static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2549 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2550 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2551 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2552 .len = sizeof(union nf_inet_addr) },
2553 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2554 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2555 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2556 .len = IP_VS_SCHEDNAME_MAXLEN },
2557 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2558 .len = sizeof(struct ip_vs_flags) },
2559 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2560 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2561 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2562};
2563
2564
2565static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2566 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2567 .len = sizeof(union nf_inet_addr) },
2568 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2569 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2570 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2571 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2572 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2573 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2574 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2575 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2576 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2577};
2578
2579static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2580 struct ip_vs_stats *stats)
2581{
2582 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2583 if (!nl_stats)
2584 return -EMSGSIZE;
2585
2586 spin_lock_bh(&stats->lock);
2587
2588 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2589 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2590 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2591 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2592 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2593 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2594 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2595 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2596 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2597 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2598
2599 spin_unlock_bh(&stats->lock);
2600
2601 nla_nest_end(skb, nl_stats);
2602
2603 return 0;
2604
2605nla_put_failure:
2606 spin_unlock_bh(&stats->lock);
2607 nla_nest_cancel(skb, nl_stats);
2608 return -EMSGSIZE;
2609}
2610
2611static int ip_vs_genl_fill_service(struct sk_buff *skb,
2612 struct ip_vs_service *svc)
2613{
2614 struct nlattr *nl_service;
2615 struct ip_vs_flags flags = { .flags = svc->flags,
2616 .mask = ~0 };
2617
2618 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2619 if (!nl_service)
2620 return -EMSGSIZE;
2621
2622 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2623
2624 if (svc->fwmark) {
2625 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2626 } else {
2627 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2628 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2629 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2630 }
2631
2632 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2633 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2634 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2635 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2636
2637 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2638 goto nla_put_failure;
2639
2640 nla_nest_end(skb, nl_service);
2641
2642 return 0;
2643
2644nla_put_failure:
2645 nla_nest_cancel(skb, nl_service);
2646 return -EMSGSIZE;
2647}
2648
2649static int ip_vs_genl_dump_service(struct sk_buff *skb,
2650 struct ip_vs_service *svc,
2651 struct netlink_callback *cb)
2652{
2653 void *hdr;
2654
2655 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2656 &ip_vs_genl_family, NLM_F_MULTI,
2657 IPVS_CMD_NEW_SERVICE);
2658 if (!hdr)
2659 return -EMSGSIZE;
2660
2661 if (ip_vs_genl_fill_service(skb, svc) < 0)
2662 goto nla_put_failure;
2663
2664 return genlmsg_end(skb, hdr);
2665
2666nla_put_failure:
2667 genlmsg_cancel(skb, hdr);
2668 return -EMSGSIZE;
2669}
2670
2671static int ip_vs_genl_dump_services(struct sk_buff *skb,
2672 struct netlink_callback *cb)
2673{
2674 int idx = 0, i;
2675 int start = cb->args[0];
2676 struct ip_vs_service *svc;
2677
2678 mutex_lock(&__ip_vs_mutex);
2679 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2680 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2681 if (++idx <= start)
2682 continue;
2683 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2684 idx--;
2685 goto nla_put_failure;
2686 }
2687 }
2688 }
2689
2690 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2691 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2692 if (++idx <= start)
2693 continue;
2694 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2695 idx--;
2696 goto nla_put_failure;
2697 }
2698 }
2699 }
2700
2701nla_put_failure:
2702 mutex_unlock(&__ip_vs_mutex);
2703 cb->args[0] = idx;
2704
2705 return skb->len;
2706}
2707
2708static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2709 struct nlattr *nla, int full_entry)
2710{
2711 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2712 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2713
2714
2715 if (nla == NULL ||
2716 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2717 return -EINVAL;
2718
2719 nla_af = attrs[IPVS_SVC_ATTR_AF];
2720 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2721 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2722 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2723 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2724
2725 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2726 return -EINVAL;
2727
2728 usvc->af = nla_get_u16(nla_af);
2729#ifdef CONFIG_IP_VS_IPV6
2730 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2731#else
2732 if (usvc->af != AF_INET)
2733#endif
2734 return -EAFNOSUPPORT;
2735
2736 if (nla_fwmark) {
2737 usvc->protocol = IPPROTO_TCP;
2738 usvc->fwmark = nla_get_u32(nla_fwmark);
2739 } else {
2740 usvc->protocol = nla_get_u16(nla_protocol);
2741 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2742 usvc->port = nla_get_u16(nla_port);
2743 usvc->fwmark = 0;
2744 }
2745
2746
2747 if (full_entry) {
2748 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2749 *nla_netmask;
2750 struct ip_vs_flags flags;
2751 struct ip_vs_service *svc;
2752
2753 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2754 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2755 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2756 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2757
2758 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2759 return -EINVAL;
2760
2761 nla_memcpy(&flags, nla_flags, sizeof(flags));
2762
2763
2764 if (usvc->fwmark)
2765 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2766 else
2767 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2768 &usvc->addr, usvc->port);
2769 if (svc) {
2770 usvc->flags = svc->flags;
2771 ip_vs_service_put(svc);
2772 } else
2773 usvc->flags = 0;
2774
2775
2776 usvc->flags = (usvc->flags & ~flags.mask) |
2777 (flags.flags & flags.mask);
2778 usvc->sched_name = nla_data(nla_sched);
2779 usvc->timeout = nla_get_u32(nla_timeout);
2780 usvc->netmask = nla_get_u32(nla_netmask);
2781 }
2782
2783 return 0;
2784}
2785
2786static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2787{
2788 struct ip_vs_service_user_kern usvc;
2789 int ret;
2790
2791 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2792 if (ret)
2793 return ERR_PTR(ret);
2794
2795 if (usvc.fwmark)
2796 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2797 else
2798 return __ip_vs_service_get(usvc.af, usvc.protocol,
2799 &usvc.addr, usvc.port);
2800}
2801
2802static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2803{
2804 struct nlattr *nl_dest;
2805
2806 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2807 if (!nl_dest)
2808 return -EMSGSIZE;
2809
2810 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2811 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2812
2813 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2814 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2815 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2816 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2817 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2818 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2819 atomic_read(&dest->activeconns));
2820 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2821 atomic_read(&dest->inactconns));
2822 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2823 atomic_read(&dest->persistconns));
2824
2825 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2826 goto nla_put_failure;
2827
2828 nla_nest_end(skb, nl_dest);
2829
2830 return 0;
2831
2832nla_put_failure:
2833 nla_nest_cancel(skb, nl_dest);
2834 return -EMSGSIZE;
2835}
2836
2837static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2838 struct netlink_callback *cb)
2839{
2840 void *hdr;
2841
2842 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2843 &ip_vs_genl_family, NLM_F_MULTI,
2844 IPVS_CMD_NEW_DEST);
2845 if (!hdr)
2846 return -EMSGSIZE;
2847
2848 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2849 goto nla_put_failure;
2850
2851 return genlmsg_end(skb, hdr);
2852
2853nla_put_failure:
2854 genlmsg_cancel(skb, hdr);
2855 return -EMSGSIZE;
2856}
2857
2858static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2859 struct netlink_callback *cb)
2860{
2861 int idx = 0;
2862 int start = cb->args[0];
2863 struct ip_vs_service *svc;
2864 struct ip_vs_dest *dest;
2865 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2866
2867 mutex_lock(&__ip_vs_mutex);
2868
2869
2870 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2871 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2872 goto out_err;
2873
2874 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2875 if (IS_ERR(svc) || svc == NULL)
2876 goto out_err;
2877
2878
2879 list_for_each_entry(dest, &svc->destinations, n_list) {
2880 if (++idx <= start)
2881 continue;
2882 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2883 idx--;
2884 goto nla_put_failure;
2885 }
2886 }
2887
2888nla_put_failure:
2889 cb->args[0] = idx;
2890 ip_vs_service_put(svc);
2891
2892out_err:
2893 mutex_unlock(&__ip_vs_mutex);
2894
2895 return skb->len;
2896}
2897
2898static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2899 struct nlattr *nla, int full_entry)
2900{
2901 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2902 struct nlattr *nla_addr, *nla_port;
2903
2904
2905 if (nla == NULL ||
2906 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2907 return -EINVAL;
2908
2909 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2910 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2911
2912 if (!(nla_addr && nla_port))
2913 return -EINVAL;
2914
2915 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2916 udest->port = nla_get_u16(nla_port);
2917
2918
2919 if (full_entry) {
2920 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2921 *nla_l_thresh;
2922
2923 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2924 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2925 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2926 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2927
2928 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2929 return -EINVAL;
2930
2931 udest->conn_flags = nla_get_u32(nla_fwd)
2932 & IP_VS_CONN_F_FWD_MASK;
2933 udest->weight = nla_get_u32(nla_weight);
2934 udest->u_threshold = nla_get_u32(nla_u_thresh);
2935 udest->l_threshold = nla_get_u32(nla_l_thresh);
2936 }
2937
2938 return 0;
2939}
2940
2941static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2942 const char *mcast_ifn, __be32 syncid)
2943{
2944 struct nlattr *nl_daemon;
2945
2946 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2947 if (!nl_daemon)
2948 return -EMSGSIZE;
2949
2950 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2951 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2952 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2953
2954 nla_nest_end(skb, nl_daemon);
2955
2956 return 0;
2957
2958nla_put_failure:
2959 nla_nest_cancel(skb, nl_daemon);
2960 return -EMSGSIZE;
2961}
2962
2963static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2964 const char *mcast_ifn, __be32 syncid,
2965 struct netlink_callback *cb)
2966{
2967 void *hdr;
2968 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2969 &ip_vs_genl_family, NLM_F_MULTI,
2970 IPVS_CMD_NEW_DAEMON);
2971 if (!hdr)
2972 return -EMSGSIZE;
2973
2974 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2975 goto nla_put_failure;
2976
2977 return genlmsg_end(skb, hdr);
2978
2979nla_put_failure:
2980 genlmsg_cancel(skb, hdr);
2981 return -EMSGSIZE;
2982}
2983
2984static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2985 struct netlink_callback *cb)
2986{
2987 mutex_lock(&__ip_vs_mutex);
2988 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2989 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2990 ip_vs_master_mcast_ifn,
2991 ip_vs_master_syncid, cb) < 0)
2992 goto nla_put_failure;
2993
2994 cb->args[0] = 1;
2995 }
2996
2997 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2998 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2999 ip_vs_backup_mcast_ifn,
3000 ip_vs_backup_syncid, cb) < 0)
3001 goto nla_put_failure;
3002
3003 cb->args[1] = 1;
3004 }
3005
3006nla_put_failure:
3007 mutex_unlock(&__ip_vs_mutex);
3008
3009 return skb->len;
3010}
3011
3012static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3013{
3014 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3015 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3016 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3017 return -EINVAL;
3018
3019 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3020 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3021 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3022}
3023
3024static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3025{
3026 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3027 return -EINVAL;
3028
3029 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3030}
3031
3032static int ip_vs_genl_set_config(struct nlattr **attrs)
3033{
3034 struct ip_vs_timeout_user t;
3035
3036 __ip_vs_get_timeouts(&t);
3037
3038 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3039 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3040
3041 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3042 t.tcp_fin_timeout =
3043 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3044
3045 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3046 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3047
3048 return ip_vs_set_timeout(&t);
3049}
3050
3051static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3052{
3053 struct ip_vs_service *svc = NULL;
3054 struct ip_vs_service_user_kern usvc;
3055 struct ip_vs_dest_user_kern udest;
3056 int ret = 0, cmd;
3057 int need_full_svc = 0, need_full_dest = 0;
3058
3059 cmd = info->genlhdr->cmd;
3060
3061 mutex_lock(&__ip_vs_mutex);
3062
3063 if (cmd == IPVS_CMD_FLUSH) {
3064 ret = ip_vs_flush();
3065 goto out;
3066 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3067 ret = ip_vs_genl_set_config(info->attrs);
3068 goto out;
3069 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3070 cmd == IPVS_CMD_DEL_DAEMON) {
3071
3072 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3073
3074 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3075 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3076 info->attrs[IPVS_CMD_ATTR_DAEMON],
3077 ip_vs_daemon_policy)) {
3078 ret = -EINVAL;
3079 goto out;
3080 }
3081
3082 if (cmd == IPVS_CMD_NEW_DAEMON)
3083 ret = ip_vs_genl_new_daemon(daemon_attrs);
3084 else
3085 ret = ip_vs_genl_del_daemon(daemon_attrs);
3086 goto out;
3087 } else if (cmd == IPVS_CMD_ZERO &&
3088 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3089 ret = ip_vs_zero_all();
3090 goto out;
3091 }
3092
3093
3094
3095
3096 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3097 need_full_svc = 1;
3098
3099 ret = ip_vs_genl_parse_service(&usvc,
3100 info->attrs[IPVS_CMD_ATTR_SERVICE],
3101 need_full_svc);
3102 if (ret)
3103 goto out;
3104
3105
3106 if (usvc.fwmark == 0)
3107 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3108 &usvc.addr, usvc.port);
3109 else
3110 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3111
3112
3113 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3114 ret = -ESRCH;
3115 goto out;
3116 }
3117
3118
3119
3120
3121 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3122 cmd == IPVS_CMD_DEL_DEST) {
3123 if (cmd != IPVS_CMD_DEL_DEST)
3124 need_full_dest = 1;
3125
3126 ret = ip_vs_genl_parse_dest(&udest,
3127 info->attrs[IPVS_CMD_ATTR_DEST],
3128 need_full_dest);
3129 if (ret)
3130 goto out;
3131 }
3132
3133 switch (cmd) {
3134 case IPVS_CMD_NEW_SERVICE:
3135 if (svc == NULL)
3136 ret = ip_vs_add_service(&usvc, &svc);
3137 else
3138 ret = -EEXIST;
3139 break;
3140 case IPVS_CMD_SET_SERVICE:
3141 ret = ip_vs_edit_service(svc, &usvc);
3142 break;
3143 case IPVS_CMD_DEL_SERVICE:
3144 ret = ip_vs_del_service(svc);
3145 break;
3146 case IPVS_CMD_NEW_DEST:
3147 ret = ip_vs_add_dest(svc, &udest);
3148 break;
3149 case IPVS_CMD_SET_DEST:
3150 ret = ip_vs_edit_dest(svc, &udest);
3151 break;
3152 case IPVS_CMD_DEL_DEST:
3153 ret = ip_vs_del_dest(svc, &udest);
3154 break;
3155 case IPVS_CMD_ZERO:
3156 ret = ip_vs_zero_service(svc);
3157 break;
3158 default:
3159 ret = -EINVAL;
3160 }
3161
3162out:
3163 if (svc)
3164 ip_vs_service_put(svc);
3165 mutex_unlock(&__ip_vs_mutex);
3166
3167 return ret;
3168}
3169
3170static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3171{
3172 struct sk_buff *msg;
3173 void *reply;
3174 int ret, cmd, reply_cmd;
3175
3176 cmd = info->genlhdr->cmd;
3177
3178 if (cmd == IPVS_CMD_GET_SERVICE)
3179 reply_cmd = IPVS_CMD_NEW_SERVICE;
3180 else if (cmd == IPVS_CMD_GET_INFO)
3181 reply_cmd = IPVS_CMD_SET_INFO;
3182 else if (cmd == IPVS_CMD_GET_CONFIG)
3183 reply_cmd = IPVS_CMD_SET_CONFIG;
3184 else {
3185 IP_VS_ERR("unknown Generic Netlink command\n");
3186 return -EINVAL;
3187 }
3188
3189 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3190 if (!msg)
3191 return -ENOMEM;
3192
3193 mutex_lock(&__ip_vs_mutex);
3194
3195 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3196 if (reply == NULL)
3197 goto nla_put_failure;
3198
3199 switch (cmd) {
3200 case IPVS_CMD_GET_SERVICE:
3201 {
3202 struct ip_vs_service *svc;
3203
3204 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3205 if (IS_ERR(svc)) {
3206 ret = PTR_ERR(svc);
3207 goto out_err;
3208 } else if (svc) {
3209 ret = ip_vs_genl_fill_service(msg, svc);
3210 ip_vs_service_put(svc);
3211 if (ret)
3212 goto nla_put_failure;
3213 } else {
3214 ret = -ESRCH;
3215 goto out_err;
3216 }
3217
3218 break;
3219 }
3220
3221 case IPVS_CMD_GET_CONFIG:
3222 {
3223 struct ip_vs_timeout_user t;
3224
3225 __ip_vs_get_timeouts(&t);
3226#ifdef CONFIG_IP_VS_PROTO_TCP
3227 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3228 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3229 t.tcp_fin_timeout);
3230#endif
3231#ifdef CONFIG_IP_VS_PROTO_UDP
3232 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3233#endif
3234
3235 break;
3236 }
3237
3238 case IPVS_CMD_GET_INFO:
3239 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3240 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3241 IP_VS_CONN_TAB_SIZE);
3242 break;
3243 }
3244
3245 genlmsg_end(msg, reply);
3246 ret = genlmsg_unicast(msg, info->snd_pid);
3247 goto out;
3248
3249nla_put_failure:
3250 IP_VS_ERR("not enough space in Netlink message\n");
3251 ret = -EMSGSIZE;
3252
3253out_err:
3254 nlmsg_free(msg);
3255out:
3256 mutex_unlock(&__ip_vs_mutex);
3257
3258 return ret;
3259}
3260
3261
3262static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3263 {
3264 .cmd = IPVS_CMD_NEW_SERVICE,
3265 .flags = GENL_ADMIN_PERM,
3266 .policy = ip_vs_cmd_policy,
3267 .doit = ip_vs_genl_set_cmd,
3268 },
3269 {
3270 .cmd = IPVS_CMD_SET_SERVICE,
3271 .flags = GENL_ADMIN_PERM,
3272 .policy = ip_vs_cmd_policy,
3273 .doit = ip_vs_genl_set_cmd,
3274 },
3275 {
3276 .cmd = IPVS_CMD_DEL_SERVICE,
3277 .flags = GENL_ADMIN_PERM,
3278 .policy = ip_vs_cmd_policy,
3279 .doit = ip_vs_genl_set_cmd,
3280 },
3281 {
3282 .cmd = IPVS_CMD_GET_SERVICE,
3283 .flags = GENL_ADMIN_PERM,
3284 .doit = ip_vs_genl_get_cmd,
3285 .dumpit = ip_vs_genl_dump_services,
3286 .policy = ip_vs_cmd_policy,
3287 },
3288 {
3289 .cmd = IPVS_CMD_NEW_DEST,
3290 .flags = GENL_ADMIN_PERM,
3291 .policy = ip_vs_cmd_policy,
3292 .doit = ip_vs_genl_set_cmd,
3293 },
3294 {
3295 .cmd = IPVS_CMD_SET_DEST,
3296 .flags = GENL_ADMIN_PERM,
3297 .policy = ip_vs_cmd_policy,
3298 .doit = ip_vs_genl_set_cmd,
3299 },
3300 {
3301 .cmd = IPVS_CMD_DEL_DEST,
3302 .flags = GENL_ADMIN_PERM,
3303 .policy = ip_vs_cmd_policy,
3304 .doit = ip_vs_genl_set_cmd,
3305 },
3306 {
3307 .cmd = IPVS_CMD_GET_DEST,
3308 .flags = GENL_ADMIN_PERM,
3309 .policy = ip_vs_cmd_policy,
3310 .dumpit = ip_vs_genl_dump_dests,
3311 },
3312 {
3313 .cmd = IPVS_CMD_NEW_DAEMON,
3314 .flags = GENL_ADMIN_PERM,
3315 .policy = ip_vs_cmd_policy,
3316 .doit = ip_vs_genl_set_cmd,
3317 },
3318 {
3319 .cmd = IPVS_CMD_DEL_DAEMON,
3320 .flags = GENL_ADMIN_PERM,
3321 .policy = ip_vs_cmd_policy,
3322 .doit = ip_vs_genl_set_cmd,
3323 },
3324 {
3325 .cmd = IPVS_CMD_GET_DAEMON,
3326 .flags = GENL_ADMIN_PERM,
3327 .dumpit = ip_vs_genl_dump_daemons,
3328 },
3329 {
3330 .cmd = IPVS_CMD_SET_CONFIG,
3331 .flags = GENL_ADMIN_PERM,
3332 .policy = ip_vs_cmd_policy,
3333 .doit = ip_vs_genl_set_cmd,
3334 },
3335 {
3336 .cmd = IPVS_CMD_GET_CONFIG,
3337 .flags = GENL_ADMIN_PERM,
3338 .doit = ip_vs_genl_get_cmd,
3339 },
3340 {
3341 .cmd = IPVS_CMD_GET_INFO,
3342 .flags = GENL_ADMIN_PERM,
3343 .doit = ip_vs_genl_get_cmd,
3344 },
3345 {
3346 .cmd = IPVS_CMD_ZERO,
3347 .flags = GENL_ADMIN_PERM,
3348 .policy = ip_vs_cmd_policy,
3349 .doit = ip_vs_genl_set_cmd,
3350 },
3351 {
3352 .cmd = IPVS_CMD_FLUSH,
3353 .flags = GENL_ADMIN_PERM,
3354 .doit = ip_vs_genl_set_cmd,
3355 },
3356};
3357
3358static int __init ip_vs_genl_register(void)
3359{
3360 int ret, i;
3361
3362 ret = genl_register_family(&ip_vs_genl_family);
3363 if (ret)
3364 return ret;
3365
3366 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3367 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3368 if (ret)
3369 goto err_out;
3370 }
3371 return 0;
3372
3373err_out:
3374 genl_unregister_family(&ip_vs_genl_family);
3375 return ret;
3376}
3377
3378static void ip_vs_genl_unregister(void)
3379{
3380 genl_unregister_family(&ip_vs_genl_family);
3381}
3382
3383
3384
3385
3386int __init ip_vs_control_init(void)
3387{
3388 int ret;
3389 int idx;
3390
3391 EnterFunction(2);
3392
3393 ret = nf_register_sockopt(&ip_vs_sockopts);
3394 if (ret) {
3395 IP_VS_ERR("cannot register sockopt.\n");
3396 return ret;
3397 }
3398
3399 ret = ip_vs_genl_register();
3400 if (ret) {
3401 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3402 nf_unregister_sockopt(&ip_vs_sockopts);
3403 return ret;
3404 }
3405
3406 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3407 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3408
3409 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3410
3411
3412 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3413 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3414 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3415 }
3416 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3417 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3418 }
3419
3420 ip_vs_new_estimator(&ip_vs_stats);
3421
3422
3423 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3424
3425 LeaveFunction(2);
3426 return 0;
3427}
3428
3429
3430void ip_vs_control_cleanup(void)
3431{
3432 EnterFunction(2);
3433 ip_vs_trash_cleanup();
3434 cancel_rearming_delayed_work(&defense_work);
3435 cancel_work_sync(&defense_work.work);
3436 ip_vs_kill_estimator(&ip_vs_stats);
3437 unregister_sysctl_table(sysctl_header);
3438 proc_net_remove(&init_net, "ip_vs_stats");
3439 proc_net_remove(&init_net, "ip_vs");
3440 ip_vs_genl_unregister();
3441 nf_unregister_sockopt(&ip_vs_sockopts);
3442 LeaveFunction(2);
3443}
3444