1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/fs.h>
27#include <linux/sysctl.h>
28#include <linux/proc_fs.h>
29#include <linux/timer.h>
30#include <linux/swap.h>
31#include <linux/proc_fs.h>
32
33#include <linux/netfilter.h>
34#include <linux/netfilter_ipv4.h>
35
36#include <net/ip.h>
37#include <net/sock.h>
38
39#include <asm/uaccess.h>
40
41#include <net/ip_vs.h>
42
43
44static DECLARE_MUTEX(__ip_vs_mutex);
45
46
47rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
48
49
50static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
51
52
53static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
54
55
56static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
57
58
59static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
60
61
62int ip_vs_drop_rate = 0;
63int ip_vs_drop_counter = 0;
64atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
65
66
67static int ip_vs_num_services = 0;
68
69
70static int sysctl_ip_vs_drop_entry = 0;
71static int sysctl_ip_vs_drop_packet = 0;
72static int sysctl_ip_vs_secure_tcp = 0;
73static int sysctl_ip_vs_amemthresh = 2048;
74static int sysctl_ip_vs_am_droprate = 10;
75int sysctl_ip_vs_cache_bypass = 0;
76int sysctl_ip_vs_expire_nodest_conn = 0;
77int sysctl_ip_vs_expire_quiescent_template = 0;
78int sysctl_ip_vs_sync_threshold = 3;
79int sysctl_ip_vs_nat_icmp_send = 0;
80
81#ifdef CONFIG_IP_VS_DEBUG
82static int sysctl_ip_vs_debug_level = 0;
83
84int ip_vs_get_debug_level(void)
85{
86 return sysctl_ip_vs_debug_level;
87}
88#endif
89
90
91
92
93static void update_defense_level(void)
94{
95 struct sysinfo i;
96 int availmem;
97 int nomem;
98
99
100 si_meminfo(&i);
101 availmem = i.freeram + i.bufferram;
102
103 nomem = (availmem < sysctl_ip_vs_amemthresh);
104
105
106 spin_lock(&__ip_vs_dropentry_lock);
107 switch (sysctl_ip_vs_drop_entry) {
108 case 0:
109 atomic_set(&ip_vs_dropentry, 0);
110 break;
111 case 1:
112 if (nomem) {
113 atomic_set(&ip_vs_dropentry, 1);
114 sysctl_ip_vs_drop_entry = 2;
115 } else {
116 atomic_set(&ip_vs_dropentry, 0);
117 }
118 break;
119 case 2:
120 if (nomem) {
121 atomic_set(&ip_vs_dropentry, 1);
122 } else {
123 atomic_set(&ip_vs_dropentry, 0);
124 sysctl_ip_vs_drop_entry = 1;
125 };
126 break;
127 case 3:
128 atomic_set(&ip_vs_dropentry, 1);
129 break;
130 }
131 spin_unlock(&__ip_vs_dropentry_lock);
132
133
134 spin_lock(&__ip_vs_droppacket_lock);
135 switch (sysctl_ip_vs_drop_packet) {
136 case 0:
137 ip_vs_drop_rate = 0;
138 break;
139 case 1:
140 if (nomem) {
141 ip_vs_drop_rate = ip_vs_drop_counter
142 = sysctl_ip_vs_amemthresh /
143 (sysctl_ip_vs_amemthresh - availmem);
144 sysctl_ip_vs_drop_packet = 2;
145 } else {
146 ip_vs_drop_rate = 0;
147 }
148 break;
149 case 2:
150 if (nomem) {
151 ip_vs_drop_rate = ip_vs_drop_counter
152 = sysctl_ip_vs_amemthresh /
153 (sysctl_ip_vs_amemthresh - availmem);
154 } else {
155 ip_vs_drop_rate = 0;
156 sysctl_ip_vs_drop_packet = 1;
157 }
158 break;
159 case 3:
160 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
161 break;
162 }
163 spin_unlock(&__ip_vs_droppacket_lock);
164
165
166 write_lock(&__ip_vs_securetcp_lock);
167 switch (sysctl_ip_vs_secure_tcp) {
168 case 0:
169 ip_vs_secure_tcp_set(0);
170 break;
171 case 1:
172 if (nomem) {
173 ip_vs_secure_tcp_set(1);
174 sysctl_ip_vs_secure_tcp = 2;
175 } else {
176 ip_vs_secure_tcp_set(0);
177 }
178 break;
179 case 2:
180 if (nomem) {
181 ip_vs_secure_tcp_set(1);
182 } else {
183 ip_vs_secure_tcp_set(0);
184 sysctl_ip_vs_secure_tcp = 1;
185 }
186 break;
187 case 3:
188 ip_vs_secure_tcp_set(1);
189 break;
190 }
191 write_unlock(&__ip_vs_securetcp_lock);
192}
193
194
195
196
197
198static struct timer_list defense_timer;
199#define DEFENSE_TIMER_PERIOD 1*HZ
200
201static void defense_timer_handler(unsigned long data)
202{
203 update_defense_level();
204 if (atomic_read(&ip_vs_dropentry))
205 ip_vs_random_dropentry();
206
207 mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
208}
209
210
211
212
213
214#define IP_VS_SVC_TAB_BITS 8
215#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
216#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
217
218
219static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
220
221static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
222
223
224
225
226#define IP_VS_RTAB_BITS 4
227#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
228#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
229
230static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
231
232
233
234
235static LIST_HEAD(ip_vs_dest_trash);
236
237
238
239
240static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
241static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
242
243
244
245
246
247static __inline__ unsigned
248ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port)
249{
250 register unsigned porth = ntohs(port);
251
252 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
253 & IP_VS_SVC_TAB_MASK;
254}
255
256
257
258
259static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
260{
261 return fwmark & IP_VS_SVC_TAB_MASK;
262}
263
264
265
266
267
268
269
270static int ip_vs_svc_hash(struct ip_vs_service *svc)
271{
272 unsigned hash;
273
274 if (svc->flags & IP_VS_SVC_F_HASHED) {
275 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
276 "called from %p\n", __builtin_return_address(0));
277 return 0;
278 }
279
280 if (svc->fwmark == 0) {
281
282
283
284 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port);
285 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
286 } else {
287
288
289
290 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
291 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
292 }
293
294 svc->flags |= IP_VS_SVC_F_HASHED;
295
296 atomic_inc(&svc->refcnt);
297 return 1;
298}
299
300
301
302
303
304
305
306static int ip_vs_svc_unhash(struct ip_vs_service *svc)
307{
308 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
309 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
310 "called from %p\n", __builtin_return_address(0));
311 return 0;
312 }
313
314 if (svc->fwmark == 0) {
315
316
317
318 list_del(&svc->s_list);
319 } else {
320
321
322
323 list_del(&svc->f_list);
324 }
325
326 svc->flags &= ~IP_VS_SVC_F_HASHED;
327 atomic_dec(&svc->refcnt);
328 return 1;
329}
330
331
332
333
334
335static __inline__ struct ip_vs_service *
336__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport)
337{
338 unsigned hash;
339 struct ip_vs_service *svc;
340 struct list_head *l,*e;
341
342
343
344
345 hash = ip_vs_svc_hashkey(protocol, vaddr, vport);
346
347 l = &ip_vs_svc_table[hash];
348 for (e=l->next; e!=l; e=e->next) {
349 svc = list_entry(e, struct ip_vs_service, s_list);
350 if ((svc->addr == vaddr)
351 && (svc->port == vport)
352 && (svc->protocol == protocol)) {
353
354 atomic_inc(&svc->usecnt);
355 return svc;
356 }
357 }
358
359 return NULL;
360}
361
362
363
364
365
366static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark)
367{
368 unsigned hash;
369 struct ip_vs_service *svc;
370 struct list_head *l,*e;
371
372
373
374
375 hash = ip_vs_svc_fwm_hashkey(fwmark);
376
377 l = &ip_vs_svc_fwm_table[hash];
378 for (e=l->next; e!=l; e=e->next) {
379 svc = list_entry(e, struct ip_vs_service, f_list);
380 if (svc->fwmark == fwmark) {
381
382 atomic_inc(&svc->usecnt);
383 return svc;
384 }
385 }
386
387 return NULL;
388}
389
390struct ip_vs_service *
391ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport)
392{
393 struct ip_vs_service *svc;
394
395 read_lock(&__ip_vs_svc_lock);
396
397
398
399
400 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark)))
401 goto out;
402
403
404
405
406
407 svc = __ip_vs_service_get(protocol, vaddr, vport);
408
409 if (svc == NULL
410 && protocol == IPPROTO_TCP
411 && atomic_read(&ip_vs_ftpsvc_counter)
412 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
413
414
415
416
417 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT);
418 }
419
420 if (svc == NULL
421 && atomic_read(&ip_vs_nullsvc_counter)) {
422
423
424
425 svc = __ip_vs_service_get(protocol, vaddr, 0);
426 }
427
428 out:
429 read_unlock(&__ip_vs_svc_lock);
430
431 IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
432 fwmark, ip_vs_proto_name(protocol),
433 NIPQUAD(vaddr), ntohs(vport),
434 svc?"hit":"not hit");
435
436 return svc;
437}
438
439
440static inline void
441__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
442{
443 atomic_inc(&svc->refcnt);
444 dest->svc = svc;
445}
446
447static inline void
448__ip_vs_unbind_svc(struct ip_vs_dest *dest)
449{
450 struct ip_vs_service *svc = dest->svc;
451
452 dest->svc = NULL;
453 if (atomic_dec_and_test(&svc->refcnt))
454 kfree(svc);
455}
456
457
458
459
460static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port)
461{
462 register unsigned porth = ntohs(port);
463
464 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
465 & IP_VS_RTAB_MASK;
466}
467
468
469
470
471
472
473static int ip_vs_rs_hash(struct ip_vs_dest *dest)
474{
475 unsigned hash;
476
477 if (!list_empty(&dest->d_list)) {
478 return 0;
479 }
480
481
482
483
484
485 hash = ip_vs_rs_hashkey(dest->addr, dest->port);
486 list_add(&dest->d_list, &ip_vs_rtable[hash]);
487
488 return 1;
489}
490
491
492
493
494
495
496static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
497{
498
499
500
501 if (!list_empty(&dest->d_list)) {
502 list_del(&dest->d_list);
503 INIT_LIST_HEAD(&dest->d_list);
504 }
505
506 return 1;
507}
508
509
510
511
512struct ip_vs_dest *
513ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport)
514{
515 unsigned hash;
516 struct ip_vs_dest *dest;
517 struct list_head *l,*e;
518
519
520
521
522
523 hash = ip_vs_rs_hashkey(daddr, dport);
524
525 l = &ip_vs_rtable[hash];
526
527 read_lock(&__ip_vs_rs_lock);
528 for (e=l->next; e!=l; e=e->next) {
529 dest = list_entry(e, struct ip_vs_dest, d_list);
530 if ((dest->addr == daddr)
531 && (dest->port == dport)
532 && ((dest->protocol == protocol) ||
533 dest->vfwmark)) {
534
535 read_unlock(&__ip_vs_rs_lock);
536 return dest;
537 }
538 }
539 read_unlock(&__ip_vs_rs_lock);
540
541 return NULL;
542}
543
544
545
546
547static struct ip_vs_dest *
548ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
549{
550 struct ip_vs_dest *dest;
551 struct list_head *l, *e;
552
553
554
555
556 l = &svc->destinations;
557 for (e=l->next; e!=l; e=e->next) {
558 dest = list_entry(e, struct ip_vs_dest, n_list);
559 if ((dest->addr == daddr) && (dest->port == dport)) {
560
561 return dest;
562 }
563 }
564
565 return NULL;
566}
567
568
569
570
571
572
573
574
575
576
577
578
579static struct ip_vs_dest *
580ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport)
581{
582 struct ip_vs_dest *dest;
583 struct list_head *l, *e;
584
585
586
587
588 l = &ip_vs_dest_trash;
589
590 for (e=l->next; e!=l; e=e->next) {
591 dest = list_entry(e, struct ip_vs_dest, n_list);
592 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
593 "refcnt=%d\n",
594 dest->vfwmark,
595 NIPQUAD(dest->addr), ntohs(dest->port),
596 atomic_read(&dest->refcnt));
597 if (dest->addr == daddr &&
598 dest->port == dport &&
599 dest->vfwmark == svc->fwmark &&
600 dest->protocol == svc->protocol &&
601 (svc->fwmark ||
602 (dest->vaddr == svc->addr &&
603 dest->vport == svc->port))) {
604
605 return dest;
606 }
607
608
609
610
611 if (atomic_read(&dest->refcnt) == 1) {
612 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
613 "from trash\n",
614 dest->vfwmark,
615 NIPQUAD(dest->addr), ntohs(dest->port));
616 e = e->prev;
617 list_del(&dest->n_list);
618 __ip_vs_dst_reset(dest);
619 __ip_vs_unbind_svc(dest);
620 kfree(dest);
621 }
622 }
623
624 return NULL;
625}
626
627
628
629
630
631
632
633
634
635
636
637static void ip_vs_trash_cleanup(void)
638{
639 struct ip_vs_dest *dest;
640 struct list_head *l;
641
642 l = &ip_vs_dest_trash;
643
644 while (l->next != l) {
645 dest = list_entry(l->next, struct ip_vs_dest, n_list);
646 list_del(&dest->n_list);
647 __ip_vs_dst_reset(dest);
648 __ip_vs_unbind_svc(dest);
649 kfree(dest);
650 }
651}
652
653
654static inline void
655__ip_vs_zero_stats(struct ip_vs_stats *stats)
656{
657 spin_lock_bh(&stats->lock);
658 memset(stats, 0, (char *)&stats->lock - (char *)stats);
659 spin_unlock_bh(&stats->lock);
660 ip_vs_zero_estimator(stats);
661}
662
663
664
665
666static void __ip_vs_update_dest(struct ip_vs_service *svc,
667 struct ip_vs_dest *dest,
668 struct ip_vs_rule_user *ur)
669{
670 int conn_flags;
671
672
673
674
675 atomic_set(&dest->weight, ur->weight);
676
677 conn_flags = ur->conn_flags | IP_VS_CONN_F_INACTIVE;
678
679
680
681
682 if (inet_addr_type(ur->daddr) == RTN_LOCAL) {
683 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
684 | IP_VS_CONN_F_LOCALNODE;
685 }
686
687
688
689
690 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
691 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
692 } else {
693
694
695
696
697 write_lock_bh(&__ip_vs_rs_lock);
698 ip_vs_rs_hash(dest);
699 write_unlock_bh(&__ip_vs_rs_lock);
700 }
701 atomic_set(&dest->conn_flags, conn_flags);
702
703
704 if (!dest->svc) {
705 __ip_vs_bind_svc(dest, svc);
706 } else {
707 if (dest->svc != svc) {
708 __ip_vs_unbind_svc(dest);
709 __ip_vs_zero_stats(&dest->stats);
710 __ip_vs_bind_svc(dest, svc);
711 }
712 }
713
714
715 dest->flags |= IP_VS_DEST_F_AVAILABLE;
716}
717
718
719
720
721
722static int
723ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_rule_user *ur,
724 struct ip_vs_dest **destp)
725{
726 struct ip_vs_dest *dest;
727 unsigned atype;
728
729 EnterFunction(2);
730
731 atype = inet_addr_type(ur->daddr);
732 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
733 return -EINVAL;
734
735 *destp = dest = (struct ip_vs_dest*)
736 kmalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
737 if (dest == NULL) {
738 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
739 return -ENOMEM;
740 }
741 memset(dest, 0, sizeof(struct ip_vs_dest));
742
743 dest->protocol = svc->protocol;
744 dest->vaddr = svc->addr;
745 dest->vport = svc->port;
746 dest->vfwmark = svc->fwmark;
747 dest->addr = ur->daddr;
748 dest->port = ur->dport;
749
750 atomic_set(&dest->activeconns, 0);
751 atomic_set(&dest->inactconns, 0);
752 atomic_set(&dest->refcnt, 0);
753
754 INIT_LIST_HEAD(&dest->d_list);
755 dest->dst_lock = SPIN_LOCK_UNLOCKED;
756 dest->stats.lock = SPIN_LOCK_UNLOCKED;
757 __ip_vs_update_dest(svc, dest, ur);
758 ip_vs_new_estimator(&dest->stats);
759
760 LeaveFunction(2);
761 return 0;
762}
763
764
765
766
767
768static int ip_vs_add_dest(struct ip_vs_service *svc,
769 struct ip_vs_rule_user *ur)
770{
771 struct ip_vs_dest *dest;
772 __u32 daddr = ur->daddr;
773 __u16 dport = ur->dport;
774 int ret;
775
776 EnterFunction(2);
777
778 if (ur->weight < 0) {
779 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
780 return -ERANGE;
781 }
782
783
784
785
786 dest = ip_vs_lookup_dest(svc, daddr, dport);
787 if (dest != NULL) {
788 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
789 return -EEXIST;
790 }
791
792
793
794
795
796 dest = ip_vs_trash_get_dest(svc, daddr, dport);
797 if (dest != NULL) {
798 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
799 "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
800 NIPQUAD(daddr), ntohs(dport),
801 atomic_read(&dest->refcnt),
802 dest->vfwmark,
803 NIPQUAD(dest->vaddr),
804 ntohs(dest->vport));
805 __ip_vs_update_dest(svc, dest, ur);
806
807
808
809
810 list_del(&dest->n_list);
811
812 ip_vs_new_estimator(&dest->stats);
813
814 write_lock_bh(&__ip_vs_svc_lock);
815
816
817
818
819 while (atomic_read(&svc->usecnt) > 1) {};
820
821 list_add(&dest->n_list, &svc->destinations);
822 svc->num_dests++;
823
824
825 svc->scheduler->update_service(svc);
826
827 write_unlock_bh(&__ip_vs_svc_lock);
828 return 0;
829 }
830
831
832
833
834 ret = ip_vs_new_dest(svc, ur, &dest);
835 if (ret) {
836 return ret;
837 }
838
839
840
841
842 atomic_inc(&dest->refcnt);
843
844 write_lock_bh(&__ip_vs_svc_lock);
845
846
847
848
849 while (atomic_read(&svc->usecnt) > 1) {};
850
851 list_add(&dest->n_list, &svc->destinations);
852 svc->num_dests++;
853
854
855 svc->scheduler->update_service(svc);
856
857 write_unlock_bh(&__ip_vs_svc_lock);
858
859 LeaveFunction(2);
860
861 return 0;
862}
863
864
865
866
867
868static int ip_vs_edit_dest(struct ip_vs_service *svc,
869 struct ip_vs_rule_user *ur)
870{
871 struct ip_vs_dest *dest;
872 __u32 daddr = ur->daddr;
873 __u16 dport = ur->dport;
874
875 EnterFunction(2);
876
877 if (ur->weight < 0) {
878 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
879 return -ERANGE;
880 }
881
882
883
884
885 dest = ip_vs_lookup_dest(svc, daddr, dport);
886 if (dest == NULL) {
887 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
888 return -ENOENT;
889 }
890
891 __ip_vs_update_dest(svc, dest, ur);
892
893 write_lock_bh(&__ip_vs_svc_lock);
894
895
896 while (atomic_read(&svc->usecnt) > 1) {};
897
898
899 svc->scheduler->update_service(svc);
900
901 write_unlock_bh(&__ip_vs_svc_lock);
902
903 LeaveFunction(2);
904
905 return 0;
906}
907
908
909
910
911
912static void __ip_vs_del_dest(struct ip_vs_dest *dest)
913{
914 ip_vs_kill_estimator(&dest->stats);
915
916
917
918
919 write_lock_bh(&__ip_vs_rs_lock);
920 ip_vs_rs_unhash(dest);
921 write_unlock_bh(&__ip_vs_rs_lock);
922
923
924
925
926
927
928 if (atomic_dec_and_test(&dest->refcnt)) {
929 __ip_vs_dst_reset(dest);
930
931
932
933
934
935 atomic_dec(&dest->svc->refcnt);
936 kfree(dest);
937 } else {
938 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
939 NIPQUAD(dest->addr), ntohs(dest->port),
940 atomic_read(&dest->refcnt));
941 list_add(&dest->n_list, &ip_vs_dest_trash);
942 atomic_inc(&dest->refcnt);
943 }
944}
945
946
947
948
949
950static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
951 struct ip_vs_dest *dest,
952 int svcupd)
953{
954 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
955
956
957
958
959 list_del(&dest->n_list);
960 svc->num_dests--;
961 if (svcupd) {
962
963
964
965 svc->scheduler->update_service(svc);
966 }
967}
968
969
970
971
972
973static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_rule_user *ur)
974{
975 struct ip_vs_dest *dest;
976 __u32 daddr = ur->daddr;
977 __u16 dport = ur->dport;
978
979 EnterFunction(2);
980
981 dest = ip_vs_lookup_dest(svc, daddr, dport);
982 if (dest == NULL) {
983 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
984 return -ENOENT;
985 }
986
987 write_lock_bh(&__ip_vs_svc_lock);
988
989
990
991
992 while (atomic_read(&svc->usecnt) > 1) {};
993
994
995
996
997 __ip_vs_unlink_dest(svc, dest, 1);
998
999 write_unlock_bh(&__ip_vs_svc_lock);
1000
1001
1002
1003
1004 __ip_vs_del_dest(dest);
1005
1006 LeaveFunction(2);
1007
1008 return 0;
1009}
1010
1011
1012
1013
1014
1015static int
1016ip_vs_add_service(struct ip_vs_rule_user *ur, struct ip_vs_service **svc_p)
1017{
1018 int ret = 0;
1019 struct ip_vs_scheduler *sched;
1020 struct ip_vs_service *svc = NULL;
1021
1022 MOD_INC_USE_COUNT;
1023
1024
1025
1026
1027 sched = ip_vs_scheduler_get(ur->sched_name);
1028 if (sched == NULL) {
1029 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1030 ur->sched_name);
1031 ret = -ENOENT;
1032 goto out_mod_dec;
1033 }
1034
1035 svc = (struct ip_vs_service*)
1036 kmalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1037 if (svc == NULL) {
1038 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1039 ret = -ENOMEM;
1040 goto out_err;
1041 }
1042 memset(svc, 0, sizeof(struct ip_vs_service));
1043
1044 svc->protocol = ur->protocol;
1045 svc->addr = ur->vaddr;
1046 svc->port = ur->vport;
1047 svc->fwmark = ur->vfwmark;
1048 svc->flags = ur->vs_flags;
1049 svc->timeout = ur->timeout * HZ;
1050 svc->netmask = ur->netmask;
1051
1052 INIT_LIST_HEAD(&svc->destinations);
1053 svc->sched_lock = RW_LOCK_UNLOCKED;
1054 svc->stats.lock = SPIN_LOCK_UNLOCKED;
1055
1056
1057
1058
1059 ret = ip_vs_bind_scheduler(svc, sched);
1060 if (ret) {
1061 goto out_err;
1062 }
1063
1064
1065
1066
1067 if (svc->port == FTPPORT)
1068 atomic_inc(&ip_vs_ftpsvc_counter);
1069 else if (svc->port == 0)
1070 atomic_inc(&ip_vs_nullsvc_counter);
1071
1072
1073
1074
1075 atomic_set(&svc->usecnt, 1);
1076 atomic_set(&svc->refcnt, 0);
1077
1078 ip_vs_new_estimator(&svc->stats);
1079 ip_vs_num_services++;
1080
1081
1082
1083
1084 write_lock_bh(&__ip_vs_svc_lock);
1085 ip_vs_svc_hash(svc);
1086 write_unlock_bh(&__ip_vs_svc_lock);
1087
1088 *svc_p = svc;
1089 return 0;
1090
1091 out_err:
1092 if (svc)
1093 kfree(svc);
1094 ip_vs_scheduler_put(sched);
1095 out_mod_dec:
1096 MOD_DEC_USE_COUNT;
1097 return ret;
1098}
1099
1100
1101
1102
1103
1104static int ip_vs_edit_service(struct ip_vs_service *svc,
1105 struct ip_vs_rule_user *ur)
1106{
1107 struct ip_vs_scheduler *sched, *old_sched;
1108 int ret = 0;
1109
1110
1111
1112
1113 sched = ip_vs_scheduler_get(ur->sched_name);
1114 if (sched == NULL) {
1115 IP_VS_INFO("Scheduler module ip_vs_%s.o not found\n",
1116 ur->sched_name);
1117 return -ENOENT;
1118 }
1119
1120 write_lock_bh(&__ip_vs_svc_lock);
1121
1122
1123
1124
1125 while (atomic_read(&svc->usecnt) > 1) {};
1126
1127
1128
1129
1130 svc->flags = ur->vs_flags | IP_VS_SVC_F_HASHED;
1131 svc->timeout = ur->timeout * HZ;
1132 svc->netmask = ur->netmask;
1133
1134 old_sched = svc->scheduler;
1135 if (sched != old_sched) {
1136
1137
1138
1139 if ((ret = ip_vs_unbind_scheduler(svc))) {
1140 old_sched = sched;
1141 goto out;
1142 }
1143
1144
1145
1146
1147 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158 ip_vs_bind_scheduler(svc, old_sched);
1159 old_sched = sched;
1160 }
1161 }
1162
1163 out:
1164 write_unlock_bh(&__ip_vs_svc_lock);
1165
1166 if (old_sched)
1167 ip_vs_scheduler_put(old_sched);
1168
1169 return ret;
1170}
1171
1172
1173
1174
1175
1176
1177static void __ip_vs_del_service(struct ip_vs_service *svc)
1178{
1179 struct list_head *l;
1180 struct ip_vs_dest *dest;
1181 struct ip_vs_scheduler *old_sched;
1182
1183 ip_vs_num_services--;
1184 ip_vs_kill_estimator(&svc->stats);
1185
1186
1187
1188
1189 old_sched = svc->scheduler;
1190 ip_vs_unbind_scheduler(svc);
1191 if (old_sched && old_sched->module)
1192 __MOD_DEC_USE_COUNT(old_sched->module);
1193
1194
1195
1196
1197 l = &svc->destinations;
1198 while (l->next != l) {
1199 dest = list_entry(l->next, struct ip_vs_dest, n_list);
1200 __ip_vs_unlink_dest(svc, dest, 0);
1201 __ip_vs_del_dest(dest);
1202 }
1203
1204
1205
1206
1207 if (svc->port == FTPPORT)
1208 atomic_dec(&ip_vs_ftpsvc_counter);
1209 else if (svc->port == 0)
1210 atomic_dec(&ip_vs_nullsvc_counter);
1211
1212
1213
1214
1215 if (atomic_read(&svc->refcnt) == 0)
1216 kfree(svc);
1217 MOD_DEC_USE_COUNT;
1218}
1219
1220
1221
1222
1223static int ip_vs_del_service(struct ip_vs_service *svc)
1224{
1225 if (svc == NULL)
1226 return -EEXIST;
1227
1228
1229
1230
1231 write_lock_bh(&__ip_vs_svc_lock);
1232
1233 ip_vs_svc_unhash(svc);
1234
1235
1236
1237
1238 while (atomic_read(&svc->usecnt) > 1) {};
1239
1240 __ip_vs_del_service(svc);
1241
1242 write_unlock_bh(&__ip_vs_svc_lock);
1243
1244 return 0;
1245}
1246
1247
1248
1249
1250
1251static int ip_vs_flush(void)
1252{
1253 int idx;
1254 struct ip_vs_service *svc;
1255 struct list_head *l;
1256
1257
1258
1259
1260 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1261 l = &ip_vs_svc_table[idx];
1262 while (l->next != l) {
1263 svc = list_entry(l->next,struct ip_vs_service,s_list);
1264 write_lock_bh(&__ip_vs_svc_lock);
1265 ip_vs_svc_unhash(svc);
1266
1267
1268
1269 while (atomic_read(&svc->usecnt) > 0) {};
1270 __ip_vs_del_service(svc);
1271 write_unlock_bh(&__ip_vs_svc_lock);
1272 }
1273 }
1274
1275
1276
1277
1278 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1279 l = &ip_vs_svc_fwm_table[idx];
1280 while (l->next != l) {
1281 svc = list_entry(l->next,struct ip_vs_service,f_list);
1282 write_lock_bh(&__ip_vs_svc_lock);
1283 ip_vs_svc_unhash(svc);
1284
1285
1286
1287 while (atomic_read(&svc->usecnt) > 0) {};
1288 __ip_vs_del_service(svc);
1289 write_unlock_bh(&__ip_vs_svc_lock);
1290 }
1291 }
1292
1293 return 0;
1294}
1295
1296
1297
1298
1299
1300static int ip_vs_zero_service(struct ip_vs_service *svc)
1301{
1302 struct list_head *l;
1303 struct ip_vs_dest *dest;
1304
1305 write_lock_bh(&__ip_vs_svc_lock);
1306 list_for_each (l, &svc->destinations) {
1307 dest = list_entry(l, struct ip_vs_dest, n_list);
1308 __ip_vs_zero_stats(&dest->stats);
1309 }
1310 __ip_vs_zero_stats(&svc->stats);
1311 write_unlock_bh(&__ip_vs_svc_lock);
1312 return 0;
1313}
1314
1315static int ip_vs_zero_all(void)
1316{
1317 int idx;
1318 struct list_head *l;
1319 struct ip_vs_service *svc;
1320
1321 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1322 list_for_each (l, &ip_vs_svc_table[idx]) {
1323 svc = list_entry(l, struct ip_vs_service, s_list);
1324 ip_vs_zero_service(svc);
1325 }
1326 }
1327
1328 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1329 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1330 svc = list_entry(l, struct ip_vs_service, f_list);
1331 ip_vs_zero_service(svc);
1332 }
1333 }
1334
1335 __ip_vs_zero_stats(&ip_vs_stats);
1336 return 0;
1337}
1338
1339
1340static int ip_vs_sysctl_defense_mode(ctl_table *ctl, int write,
1341 struct file * filp, void *buffer, size_t *lenp)
1342{
1343 int *valp = ctl->data;
1344 int val = *valp;
1345 int ret;
1346
1347 ret = proc_dointvec(ctl, write, filp, buffer, lenp);
1348 if (write && (*valp != val)) {
1349 if ((*valp < 0) || (*valp > 3)) {
1350
1351 *valp = val;
1352 } else {
1353 local_bh_disable();
1354 update_defense_level();
1355 local_bh_enable();
1356 }
1357 }
1358 return ret;
1359}
1360
1361
1362
1363
1364
1365struct ip_vs_sysctl_table {
1366 struct ctl_table_header *sysctl_header;
1367 ctl_table vs_vars[NET_IPV4_VS_LAST];
1368 ctl_table vs_dir[2];
1369 ctl_table ipv4_dir[2];
1370 ctl_table root_dir[2];
1371};
1372
1373
1374static struct ip_vs_sysctl_table ipv4_vs_table = {
1375 NULL,
1376 {{NET_IPV4_VS_AMEMTHRESH, "amemthresh",
1377 &sysctl_ip_vs_amemthresh, sizeof(int), 0644, NULL,
1378 &proc_dointvec},
1379#ifdef CONFIG_IP_VS_DEBUG
1380 {NET_IPV4_VS_DEBUG_LEVEL, "debug_level",
1381 &sysctl_ip_vs_debug_level, sizeof(int), 0644, NULL,
1382 &proc_dointvec},
1383#endif
1384 {NET_IPV4_VS_AMDROPRATE, "am_droprate",
1385 &sysctl_ip_vs_am_droprate, sizeof(int), 0644, NULL,
1386 &proc_dointvec},
1387 {NET_IPV4_VS_DROP_ENTRY, "drop_entry",
1388 &sysctl_ip_vs_drop_entry, sizeof(int), 0644, NULL,
1389 &ip_vs_sysctl_defense_mode},
1390 {NET_IPV4_VS_DROP_PACKET, "drop_packet",
1391 &sysctl_ip_vs_drop_packet, sizeof(int), 0644, NULL,
1392 &ip_vs_sysctl_defense_mode},
1393 {NET_IPV4_VS_SECURE_TCP, "secure_tcp",
1394 &sysctl_ip_vs_secure_tcp, sizeof(int), 0644, NULL,
1395 &ip_vs_sysctl_defense_mode},
1396 {NET_IPV4_VS_TO_ES, "timeout_established",
1397 &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1398 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1399 {NET_IPV4_VS_TO_SS, "timeout_synsent",
1400 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1401 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1402 {NET_IPV4_VS_TO_SR, "timeout_synrecv",
1403 &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1404 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1405 {NET_IPV4_VS_TO_FW, "timeout_finwait",
1406 &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1407 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1408 {NET_IPV4_VS_TO_TW, "timeout_timewait",
1409 &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1410 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1411 {NET_IPV4_VS_TO_CL, "timeout_close",
1412 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1413 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1414 {NET_IPV4_VS_TO_CW, "timeout_closewait",
1415 &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1416 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1417 {NET_IPV4_VS_TO_LA, "timeout_lastack",
1418 &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1419 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1420 {NET_IPV4_VS_TO_LI, "timeout_listen",
1421 &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1422 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1423 {NET_IPV4_VS_TO_SA, "timeout_synack",
1424 &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1425 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1426 {NET_IPV4_VS_TO_UDP, "timeout_udp",
1427 &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1428 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1429 {NET_IPV4_VS_TO_ICMP, "timeout_icmp",
1430 &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1431 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
1432 {NET_IPV4_VS_CACHE_BYPASS, "cache_bypass",
1433 &sysctl_ip_vs_cache_bypass, sizeof(int), 0644, NULL,
1434 &proc_dointvec},
1435 {NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn",
1436 &sysctl_ip_vs_expire_nodest_conn, sizeof(int), 0644, NULL,
1437 &proc_dointvec},
1438 {NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold",
1439 &sysctl_ip_vs_sync_threshold, sizeof(int), 0644, NULL,
1440 &proc_dointvec},
1441 {NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send",
1442 &sysctl_ip_vs_nat_icmp_send, sizeof(int), 0644, NULL,
1443 &proc_dointvec},
1444 {NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template",
1445 &sysctl_ip_vs_expire_quiescent_template, sizeof(int), 0644, NULL,
1446 &proc_dointvec},
1447 {0}},
1448 {{NET_IPV4_VS, "vs", NULL, 0, 0555, ipv4_vs_table.vs_vars},
1449 {0}},
1450 {{NET_IPV4, "ipv4", NULL, 0, 0555, ipv4_vs_table.vs_dir},
1451 {0}},
1452 {{CTL_NET, "net", NULL, 0, 0555, ipv4_vs_table.ipv4_dir},
1453 {0}}
1454};
1455
1456
1457
1458
1459
1460
1461static inline char *ip_vs_fwd_name(unsigned flags)
1462{
1463 char *fwd;
1464
1465 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1466 case IP_VS_CONN_F_LOCALNODE:
1467 fwd = "Local";
1468 break;
1469 case IP_VS_CONN_F_TUNNEL:
1470 fwd = "Tunnel";
1471 break;
1472 case IP_VS_CONN_F_DROUTE:
1473 fwd = "Route";
1474 break;
1475 default:
1476 fwd = "Masq";
1477 }
1478 return fwd;
1479}
1480
1481static int ip_vs_get_info(char *buf, char **start, off_t offset, int length)
1482{
1483 int len=0;
1484 off_t pos=0;
1485 char temp[64], temp2[32];
1486 int idx;
1487 struct ip_vs_service *svc;
1488 struct ip_vs_dest *dest;
1489 struct list_head *l, *e, *p, *q;
1490
1491
1492
1493
1494
1495
1496 pos = 192;
1497 if (pos > offset) {
1498 sprintf(temp,
1499 "IP Virtual Server version %d.%d.%d (size=%d)",
1500 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1501 len += sprintf(buf+len, "%-63s\n", temp);
1502 len += sprintf(buf+len, "%-63s\n",
1503 "Prot LocalAddress:Port Scheduler Flags");
1504 len += sprintf(buf+len, "%-63s\n",
1505 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn");
1506 }
1507
1508 read_lock_bh(&__ip_vs_svc_lock);
1509
1510
1511 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1512 l = &ip_vs_svc_table[idx];
1513 for (e=l->next; e!=l; e=e->next) {
1514 svc = list_entry(e, struct ip_vs_service, s_list);
1515 pos += 64;
1516 if (pos > offset) {
1517 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1518 sprintf(temp2, "persistent %d %08X",
1519 svc->timeout,
1520 ntohl(svc->netmask));
1521 else
1522 temp2[0] = '\0';
1523
1524 sprintf(temp, "%s %08X:%04X %s %s",
1525 ip_vs_proto_name(svc->protocol),
1526 ntohl(svc->addr),
1527 ntohs(svc->port),
1528 svc->scheduler->name, temp2);
1529 len += sprintf(buf+len, "%-63s\n", temp);
1530 if (len >= length)
1531 goto done;
1532 }
1533
1534 p = &svc->destinations;
1535 for (q=p->next; q!=p; q=q->next) {
1536 dest = list_entry(q, struct ip_vs_dest, n_list);
1537 pos += 64;
1538 if (pos <= offset)
1539 continue;
1540 sprintf(temp,
1541 " -> %08X:%04X %-7s %-6d %-10d %-10d",
1542 ntohl(dest->addr),
1543 ntohs(dest->port),
1544 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1545 atomic_read(&dest->weight),
1546 atomic_read(&dest->activeconns),
1547 atomic_read(&dest->inactconns));
1548 len += sprintf(buf+len, "%-63s\n", temp);
1549 if (len >= length)
1550 goto done;
1551 }
1552 }
1553 }
1554
1555
1556 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1557 l = &ip_vs_svc_fwm_table[idx];
1558 for (e=l->next; e!=l; e=e->next) {
1559 svc = list_entry(e, struct ip_vs_service, f_list);
1560 pos += 64;
1561 if (pos > offset) {
1562 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1563 sprintf(temp2, "persistent %d %08X",
1564 svc->timeout,
1565 ntohl(svc->netmask));
1566 else
1567 temp2[0] = '\0';
1568
1569 sprintf(temp, "FWM %08X %s %s",
1570 svc->fwmark,
1571 svc->scheduler->name, temp2);
1572 len += sprintf(buf+len, "%-63s\n", temp);
1573 if (len >= length)
1574 goto done;
1575 }
1576
1577 p = &svc->destinations;
1578 for (q=p->next; q!=p; q=q->next) {
1579 dest = list_entry(q, struct ip_vs_dest, n_list);
1580 pos += 64;
1581 if (pos <= offset)
1582 continue;
1583 sprintf(temp,
1584 " -> %08X:%04X %-7s %-6d %-10d %-10d",
1585 ntohl(dest->addr),
1586 ntohs(dest->port),
1587 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1588 atomic_read(&dest->weight),
1589 atomic_read(&dest->activeconns),
1590 atomic_read(&dest->inactconns));
1591 len += sprintf(buf+len, "%-63s\n", temp);
1592 if (len >= length)
1593 goto done;
1594 }
1595 }
1596 }
1597
1598 done:
1599 read_unlock_bh(&__ip_vs_svc_lock);
1600
1601 *start = buf+len-(pos-offset);
1602 len = pos-offset;
1603 if (len > length)
1604 len = length;
1605 if (len < 0)
1606 len = 0;
1607 return len;
1608}
1609
1610
1611struct ip_vs_stats ip_vs_stats;
1612
1613static int
1614ip_vs_stats_get_info(char *buf, char **start, off_t offset, int length)
1615{
1616 int len=0;
1617 off_t pos=0;
1618 char temp[64];
1619
1620 pos += 320;
1621 if (pos > offset) {
1622 len += sprintf(buf+len, "%-63s\n%-63s\n",
1623
1624 " Total Incoming Outgoing Incoming Outgoing",
1625 " Conns Packets Packets Bytes Bytes");
1626
1627 spin_lock_bh(&ip_vs_stats.lock);
1628 sprintf(temp, "%8X %8X %8X %8X%08X %8X%08X",
1629 ip_vs_stats.conns,
1630 ip_vs_stats.inpkts,
1631 ip_vs_stats.outpkts,
1632 (__u32)(ip_vs_stats.inbytes>>32),
1633 (__u32)ip_vs_stats.inbytes,
1634 (__u32)(ip_vs_stats.outbytes>>32),
1635 (__u32)ip_vs_stats.outbytes);
1636 len += sprintf(buf+len, "%-62s\n\n", temp);
1637
1638 len += sprintf(buf+len, "%-63s\n",
1639
1640 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s");
1641 sprintf(temp, "%8X %8X %8X %16X %16X",
1642 ip_vs_stats.cps,
1643 ip_vs_stats.inpps,
1644 ip_vs_stats.outpps,
1645 ip_vs_stats.inbps,
1646 ip_vs_stats.outbps);
1647 len += sprintf(buf+len, "%-63s\n", temp);
1648
1649 spin_unlock_bh(&ip_vs_stats.lock);
1650 }
1651
1652 *start = buf+len-(pos-offset);
1653 len = pos-offset;
1654 if (len > length)
1655 len = length;
1656 if (len < 0)
1657 len = 0;
1658 return len;
1659}
1660
1661
1662
1663
1664
1665static int ip_vs_set_timeouts(struct ip_vs_rule_user *u)
1666{
1667 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1668 u->tcp_timeout,
1669 u->tcp_fin_timeout,
1670 u->udp_timeout);
1671
1672 if (u->tcp_timeout) {
1673 vs_timeout_table.timeout[IP_VS_S_ESTABLISHED]
1674 = u->tcp_timeout * HZ;
1675 }
1676
1677 if (u->tcp_fin_timeout) {
1678 vs_timeout_table.timeout[IP_VS_S_FIN_WAIT]
1679 = u->tcp_fin_timeout * HZ;
1680 }
1681
1682 if (u->udp_timeout) {
1683 vs_timeout_table.timeout[IP_VS_S_UDP]
1684 = u->udp_timeout * HZ;
1685 }
1686 return 0;
1687}
1688
1689
1690static int
1691do_ip_vs_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1692{
1693 int ret;
1694 struct ip_vs_rule_user *urule;
1695 struct ip_vs_service *svc = NULL;
1696
1697 if (!capable(CAP_NET_ADMIN))
1698 return -EPERM;
1699
1700
1701
1702
1703
1704 if (len < sizeof(struct ip_vs_rule_user)) {
1705 IP_VS_ERR("set_ctl: len %u < %Zu\n",
1706 len, sizeof(struct ip_vs_rule_user));
1707 return -EINVAL;
1708 } else if (len > 128000) {
1709 IP_VS_ERR("set_ctl: len %u > 128000\n", len);
1710 return -EINVAL;
1711 } else if ((urule = kmalloc(len, GFP_KERNEL)) == NULL) {
1712 IP_VS_ERR("set_ctl: no mem for len %u\n", len);
1713 return -ENOMEM;
1714 } else if (copy_from_user(urule, user, len) != 0) {
1715 ret = -EFAULT;
1716 goto out_free;
1717 }
1718
1719 MOD_INC_USE_COUNT;
1720 if (down_interruptible(&__ip_vs_mutex)) {
1721 ret = -ERESTARTSYS;
1722 goto out_dec;
1723 }
1724
1725 if (cmd == IP_VS_SO_SET_FLUSH) {
1726
1727 ret = ip_vs_flush();
1728 goto out_unlock;
1729 } else if (cmd == IP_VS_SO_SET_TIMEOUTS) {
1730
1731 ret = ip_vs_set_timeouts(urule);
1732 goto out_unlock;
1733 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1734 ret = start_sync_thread(urule->state, urule->mcast_ifn,
1735 urule->syncid);
1736 goto out_unlock;
1737 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1738 ret = stop_sync_thread(urule->state);
1739 goto out_unlock;
1740 } else if (cmd == IP_VS_SO_SET_ZERO) {
1741
1742 if (!urule->vfwmark && !urule->vaddr && !urule->vport) {
1743 ret = ip_vs_zero_all();
1744 goto out_unlock;
1745 }
1746 }
1747
1748
1749
1750
1751 if (urule->protocol!=IPPROTO_TCP && urule->protocol!=IPPROTO_UDP) {
1752 IP_VS_ERR("set_ctl: invalid protocol %d %d.%d.%d.%d:%d %s\n",
1753 urule->protocol, NIPQUAD(urule->vaddr),
1754 ntohs(urule->vport), urule->sched_name);
1755 ret = -EFAULT;
1756 goto out_unlock;
1757 }
1758
1759
1760
1761
1762 if (urule->vfwmark == 0)
1763 svc = __ip_vs_service_get(urule->protocol,
1764 urule->vaddr, urule->vport);
1765 else
1766 svc = __ip_vs_svc_fwm_get(urule->vfwmark);
1767
1768 if (cmd != IP_VS_SO_SET_ADD
1769 && (svc == NULL || svc->protocol != urule->protocol)) {
1770 ret = -ESRCH;
1771 goto out_unlock;
1772 }
1773
1774 switch (cmd) {
1775 case IP_VS_SO_SET_ADD:
1776 if (svc != NULL)
1777 ret = -EEXIST;
1778 else
1779 ret = ip_vs_add_service(urule, &svc);
1780 break;
1781 case IP_VS_SO_SET_EDIT:
1782 ret = ip_vs_edit_service(svc, urule);
1783 break;
1784 case IP_VS_SO_SET_DEL:
1785 ret = ip_vs_del_service(svc);
1786 if (!ret)
1787 goto out_unlock;
1788 break;
1789 case IP_VS_SO_SET_ADDDEST:
1790 ret = ip_vs_add_dest(svc, urule);
1791 break;
1792 case IP_VS_SO_SET_EDITDEST:
1793 ret = ip_vs_edit_dest(svc, urule);
1794 break;
1795 case IP_VS_SO_SET_DELDEST:
1796 ret = ip_vs_del_dest(svc, urule);
1797 break;
1798 case IP_VS_SO_SET_ZERO:
1799 ret = ip_vs_zero_service(svc);
1800 break;
1801 default:
1802 ret = -EINVAL;
1803 }
1804
1805 if (svc)
1806 ip_vs_service_put(svc);
1807
1808 out_unlock:
1809 up(&__ip_vs_mutex);
1810 out_dec:
1811 MOD_DEC_USE_COUNT;
1812 out_free:
1813 kfree(urule);
1814 return ret;
1815}
1816
1817
1818static inline void
1819__ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
1820{
1821 spin_lock_bh(&src->lock);
1822 memcpy(dst, src, (char*)&src->lock - (char*)src);
1823 spin_unlock_bh(&src->lock);
1824}
1825
1826static inline int
1827__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
1828 struct ip_vs_get_services *uptr)
1829{
1830 int idx, count=0;
1831 struct ip_vs_service *svc;
1832 struct list_head *l;
1833 struct ip_vs_service_user entry;
1834 int ret = 0;
1835
1836 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1837 list_for_each (l, &ip_vs_svc_table[idx]) {
1838 if (count >= get->num_services)
1839 goto out;
1840 svc = list_entry(l, struct ip_vs_service, s_list);
1841 entry.protocol = svc->protocol;
1842 entry.addr = svc->addr;
1843 entry.port = svc->port;
1844 entry.fwmark = svc->fwmark;
1845 strcpy(entry.sched_name, svc->scheduler->name);
1846 entry.flags = svc->flags;
1847 entry.timeout = svc->timeout / HZ;
1848 entry.netmask = svc->netmask;
1849 entry.num_dests = svc->num_dests;
1850 __ip_vs_copy_stats(&entry.stats, &svc->stats);
1851 if (copy_to_user(&uptr->entrytable[count],
1852 &entry, sizeof(entry))) {
1853 ret = -EFAULT;
1854 goto out;
1855 }
1856 count++;
1857 }
1858 }
1859
1860 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1861 list_for_each (l, &ip_vs_svc_fwm_table[idx]) {
1862 if (count >= get->num_services)
1863 goto out;
1864 svc = list_entry(l, struct ip_vs_service, f_list);
1865 entry.protocol = svc->protocol;
1866 entry.addr = svc->addr;
1867 entry.port = svc->port;
1868 entry.fwmark = svc->fwmark;
1869 strcpy(entry.sched_name, svc->scheduler->name);
1870 entry.flags = svc->flags;
1871 entry.timeout = svc->timeout / HZ;
1872 entry.netmask = svc->netmask;
1873 entry.num_dests = svc->num_dests;
1874 __ip_vs_copy_stats(&entry.stats, &svc->stats);
1875 if (copy_to_user(&uptr->entrytable[count],
1876 &entry, sizeof(entry))) {
1877 ret = -EFAULT;
1878 goto out;
1879 }
1880 count++;
1881 }
1882 }
1883 out:
1884 return ret;
1885}
1886
1887static inline int
1888__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
1889 struct ip_vs_get_dests *uptr)
1890{
1891 struct ip_vs_service *svc;
1892 int ret = 0;
1893
1894 if (get->fwmark)
1895 svc = __ip_vs_svc_fwm_get(get->fwmark);
1896 else
1897 svc = __ip_vs_service_get(get->protocol,
1898 get->addr, get->port);
1899 if (svc) {
1900 int count = 0;
1901 struct ip_vs_dest *dest;
1902 struct list_head *l, *e;
1903 struct ip_vs_dest_user entry;
1904
1905 l = &svc->destinations;
1906 for (e=l->next; e!=l; e=e->next) {
1907 if (count >= get->num_dests)
1908 break;
1909 dest = list_entry(e, struct ip_vs_dest, n_list);
1910 entry.addr = dest->addr;
1911 entry.port = dest->port;
1912 entry.flags = atomic_read(&dest->conn_flags);
1913 entry.weight = atomic_read(&dest->weight);
1914 entry.activeconns = atomic_read(&dest->activeconns);
1915 entry.inactconns = atomic_read(&dest->inactconns);
1916 __ip_vs_copy_stats(&entry.stats, &dest->stats);
1917 if (copy_to_user(&uptr->entrytable[count],
1918 &entry, sizeof(entry))) {
1919 ret = -EFAULT;
1920 break;
1921 }
1922 count++;
1923 }
1924 ip_vs_service_put(svc);
1925 } else
1926 ret = -ESRCH;
1927 return ret;
1928}
1929
1930static inline void
1931__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
1932{
1933 u->tcp_timeout = vs_timeout_table.timeout[IP_VS_S_ESTABLISHED] / HZ;
1934 u->tcp_fin_timeout = vs_timeout_table.timeout[IP_VS_S_FIN_WAIT] / HZ;
1935 u->udp_timeout = vs_timeout_table.timeout[IP_VS_S_UDP] / HZ;
1936}
1937
1938static int
1939do_ip_vs_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1940{
1941 int ret = 0;
1942
1943 if (!capable(CAP_NET_ADMIN))
1944 return -EPERM;
1945
1946 if (down_interruptible(&__ip_vs_mutex))
1947 return -ERESTARTSYS;
1948
1949 switch (cmd) {
1950 case IP_VS_SO_GET_VERSION:
1951 {
1952 char buf[64];
1953
1954 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
1955 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1956 if (*len < strlen(buf)+1) {
1957 ret = -EINVAL;
1958 goto out;
1959 }
1960 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
1961 ret = -EFAULT;
1962 goto out;
1963 }
1964 *len = strlen(buf)+1;
1965 }
1966 break;
1967
1968 case IP_VS_SO_GET_INFO:
1969 {
1970 struct ip_vs_getinfo info;
1971 info.version = IP_VS_VERSION_CODE;
1972 info.size = IP_VS_CONN_TAB_SIZE;
1973 info.num_services = ip_vs_num_services;
1974 if (copy_to_user(user, &info, sizeof(info)) != 0)
1975 ret = -EFAULT;
1976 }
1977 break;
1978
1979 case IP_VS_SO_GET_SERVICES:
1980 {
1981 struct ip_vs_get_services get;
1982
1983 if (*len < sizeof(get)) {
1984 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
1985 ret = -EINVAL;
1986 goto out;
1987 }
1988 if (copy_from_user(&get, user, sizeof(get))) {
1989 ret = -EFAULT;
1990 goto out;
1991 }
1992 if (*len != (sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services)) {
1993 IP_VS_ERR("length: %u != %Zu\n", *len,
1994 sizeof(get)+sizeof(struct ip_vs_service_user)*get.num_services);
1995 ret = -EINVAL;
1996 goto out;
1997 }
1998 ret = __ip_vs_get_service_entries(&get, user);
1999 }
2000 break;
2001
2002 case IP_VS_SO_GET_SERVICE:
2003 {
2004 struct ip_vs_service_user get;
2005 struct ip_vs_service *svc;
2006
2007 if (*len != sizeof(get)) {
2008 IP_VS_ERR("length: %u != %Zu\n", *len, sizeof(get));
2009 ret = -EINVAL;
2010 goto out;
2011 }
2012 if (copy_from_user(&get, user, sizeof(get))) {
2013 ret = -EFAULT;
2014 goto out;
2015 }
2016
2017 if (get.fwmark)
2018 svc = __ip_vs_svc_fwm_get(get.fwmark);
2019 else
2020 svc = __ip_vs_service_get(get.protocol,
2021 get.addr, get.port);
2022 if (svc) {
2023 strcpy(get.sched_name, svc->scheduler->name);
2024 get.flags = svc->flags;
2025 get.timeout = svc->timeout / HZ;
2026 get.netmask = svc->netmask;
2027 get.num_dests = svc->num_dests;
2028 __ip_vs_copy_stats(&get.stats, &svc->stats);
2029 if (copy_to_user(user, &get, *len) != 0)
2030 ret = -EFAULT;
2031 ip_vs_service_put(svc);
2032 } else
2033 ret = -ESRCH;
2034 }
2035 break;
2036
2037 case IP_VS_SO_GET_DESTS:
2038 {
2039 struct ip_vs_get_dests get;
2040
2041 if (*len < sizeof(get)) {
2042 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(get));
2043 ret = -EINVAL;
2044 goto out;
2045 }
2046 if (copy_from_user(&get, user, sizeof(get))) {
2047 ret = -EFAULT;
2048 goto out;
2049 }
2050 if (*len != (sizeof(get) +
2051 sizeof(struct ip_vs_dest_user)*get.num_dests)) {
2052 IP_VS_ERR("length: %u != %Zu\n", *len,
2053 sizeof(get)+sizeof(struct ip_vs_dest_user)*get.num_dests);
2054 ret = -EINVAL;
2055 goto out;
2056 }
2057 ret = __ip_vs_get_dest_entries(&get, user);
2058 }
2059 break;
2060
2061 case IP_VS_SO_GET_TIMEOUTS:
2062 {
2063 struct ip_vs_timeout_user u;
2064
2065 if (*len < sizeof(u)) {
2066 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2067 ret = -EINVAL;
2068 goto out;
2069 }
2070 __ip_vs_get_timeouts(&u);
2071 if (copy_to_user(user, &u, sizeof(u)) != 0)
2072 ret = -EFAULT;
2073 }
2074 break;
2075
2076 case IP_VS_SO_GET_DAEMON:
2077 {
2078 struct ip_vs_daemon_user u;
2079
2080 if (*len < sizeof(u)) {
2081 IP_VS_ERR("length: %u < %Zu\n", *len, sizeof(u));
2082 ret = -EINVAL;
2083 goto out;
2084 }
2085 u.state = ip_vs_sync_state;
2086 if (ip_vs_sync_state & IP_VS_STATE_MASTER)
2087 strcpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn);
2088 if (ip_vs_sync_state & IP_VS_STATE_BACKUP)
2089 strcpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn);
2090 if (copy_to_user(user, &u, sizeof(u)) != 0)
2091 ret = -EFAULT;
2092 }
2093 break;
2094
2095 default:
2096 ret = -EINVAL;
2097 }
2098
2099 out:
2100 up(&__ip_vs_mutex);
2101 return ret;
2102}
2103
2104
2105static struct nf_sockopt_ops ip_vs_sockopts = {
2106 { NULL, NULL }, PF_INET,
2107 IP_VS_BASE_CTL, IP_VS_SO_SET_MAX+1, do_ip_vs_set_ctl,
2108 IP_VS_BASE_CTL, IP_VS_SO_GET_MAX+1, do_ip_vs_get_ctl
2109};
2110
2111
2112int ip_vs_control_init(void)
2113{
2114 int ret;
2115 int idx;
2116
2117 EnterFunction(2);
2118
2119 ret = nf_register_sockopt(&ip_vs_sockopts);
2120 if (ret) {
2121 IP_VS_ERR("cannot register sockopt.\n");
2122 return ret;
2123 }
2124
2125 proc_net_create("ip_vs", 0, ip_vs_get_info);
2126 proc_net_create("ip_vs_stats", 0, ip_vs_stats_get_info);
2127
2128 ipv4_vs_table.sysctl_header =
2129 register_sysctl_table(ipv4_vs_table.root_dir, 0);
2130
2131
2132
2133
2134 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2135 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
2136 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
2137 }
2138 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
2139 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
2140 }
2141
2142 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
2143 ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
2144 ip_vs_new_estimator(&ip_vs_stats);
2145
2146
2147 init_timer(&defense_timer);
2148 defense_timer.function = defense_timer_handler;
2149 defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
2150 add_timer(&defense_timer);
2151
2152 LeaveFunction(2);
2153 return 0;
2154}
2155
2156void ip_vs_control_cleanup(void)
2157{
2158 EnterFunction(2);
2159 ip_vs_trash_cleanup();
2160 del_timer_sync(&defense_timer);
2161 ip_vs_kill_estimator(&ip_vs_stats);
2162 unregister_sysctl_table(ipv4_vs_table.sysctl_header);
2163 proc_net_remove("ip_vs_stats");
2164 proc_net_remove("ip_vs");
2165 nf_unregister_sockopt(&ip_vs_sockopts);
2166 LeaveFunction(2);
2167}
2168