1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define KMSG_COMPONENT "IPVS"
35#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
36
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/inetdevice.h>
40#include <linux/net.h>
41#include <linux/completion.h>
42#include <linux/delay.h>
43#include <linux/skbuff.h>
44#include <linux/in.h>
45#include <linux/igmp.h>
46#include <linux/udp.h>
47#include <linux/err.h>
48#include <linux/kthread.h>
49#include <linux/wait.h>
50#include <linux/kernel.h>
51
52#include <asm/unaligned.h>
53
54#include <net/ip.h>
55#include <net/sock.h>
56
57#include <net/ip_vs.h>
58
59#define IP_VS_SYNC_GROUP 0xe0000051
60#define IP_VS_SYNC_PORT 8848
61
62#define SYNC_PROTO_VER 1
63
64static struct lock_class_key __ipvs_sync_key;
65
66
67
68
69struct ip_vs_sync_conn_v0 {
70 __u8 reserved;
71
72
73 __u8 protocol;
74 __be16 cport;
75 __be16 vport;
76 __be16 dport;
77 __be32 caddr;
78 __be32 vaddr;
79 __be32 daddr;
80
81
82 __be16 flags;
83 __be16 state;
84
85
86};
87
88struct ip_vs_sync_conn_options {
89 struct ip_vs_seq in_seq;
90 struct ip_vs_seq out_seq;
91};
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131struct ip_vs_sync_v4 {
132 __u8 type;
133 __u8 protocol;
134 __be16 ver_size;
135
136 __be32 flags;
137 __be16 state;
138
139 __be16 cport;
140 __be16 vport;
141 __be16 dport;
142 __be32 fwmark;
143 __be32 timeout;
144 __be32 caddr;
145 __be32 vaddr;
146 __be32 daddr;
147
148
149};
150
151
152
153struct ip_vs_sync_v6 {
154 __u8 type;
155 __u8 protocol;
156 __be16 ver_size;
157
158 __be32 flags;
159 __be16 state;
160
161 __be16 cport;
162 __be16 vport;
163 __be16 dport;
164 __be32 fwmark;
165 __be32 timeout;
166 struct in6_addr caddr;
167 struct in6_addr vaddr;
168 struct in6_addr daddr;
169
170
171};
172
173union ip_vs_sync_conn {
174 struct ip_vs_sync_v4 v4;
175 struct ip_vs_sync_v6 v6;
176};
177
178
179#define STYPE_INET6 0
180#define STYPE_F_INET6 (1 << STYPE_INET6)
181
182#define SVER_SHIFT 12
183#define SVER_MASK 0x0fff
184
185#define IPVS_OPT_SEQ_DATA 1
186#define IPVS_OPT_PE_DATA 2
187#define IPVS_OPT_PE_NAME 3
188#define IPVS_OPT_PARAM 7
189
190#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
194
195struct ip_vs_sync_thread_data {
196 struct net *net;
197 struct socket *sock;
198 char *buf;
199};
200
201
202#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
203#define FULL_CONN_SIZE \
204(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241#define SYNC_MESG_HEADER_LEN 4
242#define MAX_CONNS_PER_SYNCBUFF 255
243
244
245struct ip_vs_sync_mesg_v0 {
246 __u8 nr_conns;
247 __u8 syncid;
248 __u16 size;
249
250
251};
252
253
254struct ip_vs_sync_mesg {
255 __u8 reserved;
256 __u8 syncid;
257 __u16 size;
258 __u8 nr_conns;
259 __s8 version;
260 __u16 spare;
261
262};
263
264struct ip_vs_sync_buff {
265 struct list_head list;
266 unsigned long firstuse;
267
268
269 struct ip_vs_sync_mesg *mesg;
270 unsigned char *head;
271 unsigned char *end;
272};
273
274
275static struct sockaddr_in mcast_addr = {
276 .sin_family = AF_INET,
277 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
278 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
279};
280
281
282
283
284
285static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
286{
287 ho->init_seq = get_unaligned_be32(&no->init_seq);
288 ho->delta = get_unaligned_be32(&no->delta);
289 ho->previous_delta = get_unaligned_be32(&no->previous_delta);
290}
291
292
293
294
295
296static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
297{
298 put_unaligned_be32(ho->init_seq, &no->init_seq);
299 put_unaligned_be32(ho->delta, &no->delta);
300 put_unaligned_be32(ho->previous_delta, &no->previous_delta);
301}
302
303static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
304{
305 struct ip_vs_sync_buff *sb;
306
307 spin_lock_bh(&ipvs->sync_lock);
308 if (list_empty(&ipvs->sync_queue)) {
309 sb = NULL;
310 } else {
311 sb = list_entry(ipvs->sync_queue.next,
312 struct ip_vs_sync_buff,
313 list);
314 list_del(&sb->list);
315 }
316 spin_unlock_bh(&ipvs->sync_lock);
317
318 return sb;
319}
320
321
322
323
324static inline struct ip_vs_sync_buff *
325ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
326{
327 struct ip_vs_sync_buff *sb;
328
329 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
330 return NULL;
331
332 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
333 if (!sb->mesg) {
334 kfree(sb);
335 return NULL;
336 }
337 sb->mesg->reserved = 0;
338 sb->mesg->version = SYNC_PROTO_VER;
339 sb->mesg->syncid = ipvs->master_syncid;
340 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
341 sb->mesg->nr_conns = 0;
342 sb->mesg->spare = 0;
343 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
344 sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
345
346 sb->firstuse = jiffies;
347 return sb;
348}
349
350static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
351{
352 kfree(sb->mesg);
353 kfree(sb);
354}
355
356static inline void sb_queue_tail(struct netns_ipvs *ipvs)
357{
358 struct ip_vs_sync_buff *sb = ipvs->sync_buff;
359
360 spin_lock(&ipvs->sync_lock);
361 if (ipvs->sync_state & IP_VS_STATE_MASTER)
362 list_add_tail(&sb->list, &ipvs->sync_queue);
363 else
364 ip_vs_sync_buff_release(sb);
365 spin_unlock(&ipvs->sync_lock);
366}
367
368
369
370
371
372static inline struct ip_vs_sync_buff *
373get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
374{
375 struct ip_vs_sync_buff *sb;
376
377 spin_lock_bh(&ipvs->sync_buff_lock);
378 if (ipvs->sync_buff &&
379 time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
380 sb = ipvs->sync_buff;
381 ipvs->sync_buff = NULL;
382 } else
383 sb = NULL;
384 spin_unlock_bh(&ipvs->sync_buff_lock);
385 return sb;
386}
387
388
389
390
391
392void ip_vs_sync_switch_mode(struct net *net, int mode)
393{
394 struct netns_ipvs *ipvs = net_ipvs(net);
395
396 if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
397 return;
398 if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
399 return;
400
401 spin_lock_bh(&ipvs->sync_buff_lock);
402
403 if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
404 kfree(ipvs->sync_buff);
405 ipvs->sync_buff = NULL;
406 } else {
407 spin_lock_bh(&ipvs->sync_lock);
408 if (ipvs->sync_state & IP_VS_STATE_MASTER)
409 list_add_tail(&ipvs->sync_buff->list,
410 &ipvs->sync_queue);
411 else
412 ip_vs_sync_buff_release(ipvs->sync_buff);
413 spin_unlock_bh(&ipvs->sync_lock);
414 }
415 spin_unlock_bh(&ipvs->sync_buff_lock);
416}
417
418
419
420
421static inline struct ip_vs_sync_buff *
422ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
423{
424 struct ip_vs_sync_buff *sb;
425 struct ip_vs_sync_mesg_v0 *mesg;
426
427 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
428 return NULL;
429
430 sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
431 if (!sb->mesg) {
432 kfree(sb);
433 return NULL;
434 }
435 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
436 mesg->nr_conns = 0;
437 mesg->syncid = ipvs->master_syncid;
438 mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
439 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
440 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
441 sb->firstuse = jiffies;
442 return sb;
443}
444
445
446
447
448
449void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
450{
451 struct netns_ipvs *ipvs = net_ipvs(net);
452 struct ip_vs_sync_mesg_v0 *m;
453 struct ip_vs_sync_conn_v0 *s;
454 int len;
455
456 if (unlikely(cp->af != AF_INET))
457 return;
458
459 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
460 return;
461
462 spin_lock(&ipvs->sync_buff_lock);
463 if (!ipvs->sync_buff) {
464 ipvs->sync_buff =
465 ip_vs_sync_buff_create_v0(ipvs);
466 if (!ipvs->sync_buff) {
467 spin_unlock(&ipvs->sync_buff_lock);
468 pr_err("ip_vs_sync_buff_create failed.\n");
469 return;
470 }
471 }
472
473 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
474 SIMPLE_CONN_SIZE;
475 m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
476 s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
477
478
479 s->reserved = 0;
480 s->protocol = cp->protocol;
481 s->cport = cp->cport;
482 s->vport = cp->vport;
483 s->dport = cp->dport;
484 s->caddr = cp->caddr.ip;
485 s->vaddr = cp->vaddr.ip;
486 s->daddr = cp->daddr.ip;
487 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
488 s->state = htons(cp->state);
489 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
490 struct ip_vs_sync_conn_options *opt =
491 (struct ip_vs_sync_conn_options *)&s[1];
492 memcpy(opt, &cp->in_seq, sizeof(*opt));
493 }
494
495 m->nr_conns++;
496 m->size += len;
497 ipvs->sync_buff->head += len;
498
499
500 if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
501 sb_queue_tail(ipvs);
502 ipvs->sync_buff = NULL;
503 }
504 spin_unlock(&ipvs->sync_buff_lock);
505
506
507 if (cp->control)
508 ip_vs_sync_conn(net, cp->control);
509}
510
511
512
513
514
515
516void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
517{
518 struct netns_ipvs *ipvs = net_ipvs(net);
519 struct ip_vs_sync_mesg *m;
520 union ip_vs_sync_conn *s;
521 __u8 *p;
522 unsigned int len, pe_name_len, pad;
523
524
525 if (sysctl_sync_ver(ipvs) == 0) {
526 ip_vs_sync_conn_v0(net, cp);
527 return;
528 }
529
530 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
531 goto control;
532sloop:
533
534 pe_name_len = 0;
535 if (cp->pe_data_len) {
536 if (!cp->pe_data || !cp->dest) {
537 IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
538 return;
539 }
540 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
541 }
542
543 spin_lock(&ipvs->sync_buff_lock);
544
545#ifdef CONFIG_IP_VS_IPV6
546 if (cp->af == AF_INET6)
547 len = sizeof(struct ip_vs_sync_v6);
548 else
549#endif
550 len = sizeof(struct ip_vs_sync_v4);
551
552 if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
553 len += sizeof(struct ip_vs_sync_conn_options) + 2;
554
555 if (cp->pe_data_len)
556 len += cp->pe_data_len + 2;
557 if (pe_name_len)
558 len += pe_name_len + 2;
559
560
561 pad = 0;
562 if (ipvs->sync_buff) {
563 pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
564 if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
565 sb_queue_tail(ipvs);
566 ipvs->sync_buff = NULL;
567 pad = 0;
568 }
569 }
570
571 if (!ipvs->sync_buff) {
572 ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
573 if (!ipvs->sync_buff) {
574 spin_unlock(&ipvs->sync_buff_lock);
575 pr_err("ip_vs_sync_buff_create failed.\n");
576 return;
577 }
578 }
579
580 m = ipvs->sync_buff->mesg;
581 p = ipvs->sync_buff->head;
582 ipvs->sync_buff->head += pad + len;
583 m->size += pad + len;
584
585 while (pad--)
586 *(p++) = 0;
587
588 s = (union ip_vs_sync_conn *)p;
589
590
591 s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
592 s->v4.ver_size = htons(len & SVER_MASK);
593 s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
594 s->v4.state = htons(cp->state);
595 s->v4.protocol = cp->protocol;
596 s->v4.cport = cp->cport;
597 s->v4.vport = cp->vport;
598 s->v4.dport = cp->dport;
599 s->v4.fwmark = htonl(cp->fwmark);
600 s->v4.timeout = htonl(cp->timeout / HZ);
601 m->nr_conns++;
602
603#ifdef CONFIG_IP_VS_IPV6
604 if (cp->af == AF_INET6) {
605 p += sizeof(struct ip_vs_sync_v6);
606 s->v6.caddr = cp->caddr.in6;
607 s->v6.vaddr = cp->vaddr.in6;
608 s->v6.daddr = cp->daddr.in6;
609 } else
610#endif
611 {
612 p += sizeof(struct ip_vs_sync_v4);
613 s->v4.caddr = cp->caddr.ip;
614 s->v4.vaddr = cp->vaddr.ip;
615 s->v4.daddr = cp->daddr.ip;
616 }
617 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
618 *(p++) = IPVS_OPT_SEQ_DATA;
619 *(p++) = sizeof(struct ip_vs_sync_conn_options);
620 hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
621 p += sizeof(struct ip_vs_seq);
622 hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
623 p += sizeof(struct ip_vs_seq);
624 }
625
626 if (cp->pe_data_len && cp->pe_data) {
627 *(p++) = IPVS_OPT_PE_DATA;
628 *(p++) = cp->pe_data_len;
629 memcpy(p, cp->pe_data, cp->pe_data_len);
630 p += cp->pe_data_len;
631 if (pe_name_len) {
632
633 *(p++) = IPVS_OPT_PE_NAME;
634 *(p++) = pe_name_len;
635 memcpy(p, cp->pe->name, pe_name_len);
636 p += pe_name_len;
637 }
638 }
639
640 spin_unlock(&ipvs->sync_buff_lock);
641
642control:
643
644 cp = cp->control;
645 if (!cp)
646 return;
647
648
649
650
651 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
652 int pkts = atomic_add_return(1, &cp->in_pkts);
653
654 if (pkts % sysctl_sync_period(ipvs) != 1)
655 return;
656 }
657 goto sloop;
658}
659
660
661
662
663static inline int
664ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
665 struct ip_vs_conn_param *p,
666 __u8 *pe_data, unsigned int pe_data_len,
667 __u8 *pe_name, unsigned int pe_name_len)
668{
669#ifdef CONFIG_IP_VS_IPV6
670 if (af == AF_INET6)
671 ip_vs_conn_fill_param(net, af, sc->v6.protocol,
672 (const union nf_inet_addr *)&sc->v6.caddr,
673 sc->v6.cport,
674 (const union nf_inet_addr *)&sc->v6.vaddr,
675 sc->v6.vport, p);
676 else
677#endif
678 ip_vs_conn_fill_param(net, af, sc->v4.protocol,
679 (const union nf_inet_addr *)&sc->v4.caddr,
680 sc->v4.cport,
681 (const union nf_inet_addr *)&sc->v4.vaddr,
682 sc->v4.vport, p);
683
684 if (pe_data_len) {
685 if (pe_name_len) {
686 char buff[IP_VS_PENAME_MAXLEN+1];
687
688 memcpy(buff, pe_name, pe_name_len);
689 buff[pe_name_len]=0;
690 p->pe = __ip_vs_pe_getbyname(buff);
691 if (!p->pe) {
692 IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
693 buff);
694 return 1;
695 }
696 } else {
697 IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
698 return 1;
699 }
700
701 p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
702 if (!p->pe_data) {
703 if (p->pe->module)
704 module_put(p->pe->module);
705 return -ENOMEM;
706 }
707 p->pe_data_len = pe_data_len;
708 }
709 return 0;
710}
711
712
713
714
715
716
717
718static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
719 unsigned int flags, unsigned int state,
720 unsigned int protocol, unsigned int type,
721 const union nf_inet_addr *daddr, __be16 dport,
722 unsigned long timeout, __u32 fwmark,
723 struct ip_vs_sync_conn_options *opt)
724{
725 struct ip_vs_dest *dest;
726 struct ip_vs_conn *cp;
727 struct netns_ipvs *ipvs = net_ipvs(net);
728
729 if (!(flags & IP_VS_CONN_F_TEMPLATE))
730 cp = ip_vs_conn_in_get(param);
731 else
732 cp = ip_vs_ct_in_get(param);
733
734 if (cp && param->pe_data)
735 kfree(param->pe_data);
736 if (!cp) {
737
738
739
740
741
742 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
743 param->vport, protocol, fwmark, flags);
744
745
746 if (protocol == IPPROTO_TCP) {
747 if (state != IP_VS_TCP_S_ESTABLISHED)
748 flags |= IP_VS_CONN_F_INACTIVE;
749 else
750 flags &= ~IP_VS_CONN_F_INACTIVE;
751 } else if (protocol == IPPROTO_SCTP) {
752 if (state != IP_VS_SCTP_S_ESTABLISHED)
753 flags |= IP_VS_CONN_F_INACTIVE;
754 else
755 flags &= ~IP_VS_CONN_F_INACTIVE;
756 }
757 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
758 if (dest)
759 atomic_dec(&dest->refcnt);
760 if (!cp) {
761 if (param->pe_data)
762 kfree(param->pe_data);
763 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
764 return;
765 }
766 } else if (!cp->dest) {
767 dest = ip_vs_try_bind_dest(cp);
768 if (dest)
769 atomic_dec(&dest->refcnt);
770 } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
771 (cp->state != state)) {
772
773 dest = cp->dest;
774 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
775 (state != IP_VS_TCP_S_ESTABLISHED)) {
776 atomic_dec(&dest->activeconns);
777 atomic_inc(&dest->inactconns);
778 cp->flags |= IP_VS_CONN_F_INACTIVE;
779 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
780 (state == IP_VS_TCP_S_ESTABLISHED)) {
781 atomic_inc(&dest->activeconns);
782 atomic_dec(&dest->inactconns);
783 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
784 }
785 } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
786 (cp->state != state)) {
787 dest = cp->dest;
788 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
789 (state != IP_VS_SCTP_S_ESTABLISHED)) {
790 atomic_dec(&dest->activeconns);
791 atomic_inc(&dest->inactconns);
792 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
793 }
794 }
795
796 if (opt)
797 memcpy(&cp->in_seq, opt, sizeof(*opt));
798 atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
799 cp->state = state;
800 cp->old_state = cp->state;
801
802
803
804
805
806
807
808
809
810 if (timeout) {
811 if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
812 timeout = MAX_SCHEDULE_TIMEOUT / HZ;
813 cp->timeout = timeout*HZ;
814 } else {
815 struct ip_vs_proto_data *pd;
816
817 pd = ip_vs_proto_data_get(net, protocol);
818 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
819 cp->timeout = pd->timeout_table[state];
820 else
821 cp->timeout = (3*60*HZ);
822 }
823 ip_vs_conn_put(cp);
824}
825
826
827
828
829static void ip_vs_process_message_v0(struct net *net, const char *buffer,
830 const size_t buflen)
831{
832 struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
833 struct ip_vs_sync_conn_v0 *s;
834 struct ip_vs_sync_conn_options *opt;
835 struct ip_vs_protocol *pp;
836 struct ip_vs_conn_param param;
837 char *p;
838 int i;
839
840 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
841 for (i=0; i<m->nr_conns; i++) {
842 unsigned flags, state;
843
844 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
845 IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
846 return;
847 }
848 s = (struct ip_vs_sync_conn_v0 *) p;
849 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
850 flags &= ~IP_VS_CONN_F_HASHED;
851 if (flags & IP_VS_CONN_F_SEQ_MASK) {
852 opt = (struct ip_vs_sync_conn_options *)&s[1];
853 p += FULL_CONN_SIZE;
854 if (p > buffer+buflen) {
855 IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
856 return;
857 }
858 } else {
859 opt = NULL;
860 p += SIMPLE_CONN_SIZE;
861 }
862
863 state = ntohs(s->state);
864 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
865 pp = ip_vs_proto_get(s->protocol);
866 if (!pp) {
867 IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
868 s->protocol);
869 continue;
870 }
871 if (state >= pp->num_states) {
872 IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
873 pp->name, state);
874 continue;
875 }
876 } else {
877
878 if (state > 0) {
879 IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
880 state);
881 state = 0;
882 }
883 }
884
885 ip_vs_conn_fill_param(net, AF_INET, s->protocol,
886 (const union nf_inet_addr *)&s->caddr,
887 s->cport,
888 (const union nf_inet_addr *)&s->vaddr,
889 s->vport, ¶m);
890
891
892 ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
893 (union nf_inet_addr *)&s->daddr, s->dport,
894 0, 0, opt);
895 }
896}
897
898
899
900
901static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
902 __u32 *opt_flags,
903 struct ip_vs_sync_conn_options *opt)
904{
905 struct ip_vs_sync_conn_options *topt;
906
907 topt = (struct ip_vs_sync_conn_options *)p;
908
909 if (plen != sizeof(struct ip_vs_sync_conn_options)) {
910 IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
911 return -EINVAL;
912 }
913 if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
914 IP_VS_DBG(2, "BACKUP, conn options found twice\n");
915 return -EINVAL;
916 }
917 ntoh_seq(&topt->in_seq, &opt->in_seq);
918 ntoh_seq(&topt->out_seq, &opt->out_seq);
919 *opt_flags |= IPVS_OPT_F_SEQ_DATA;
920 return 0;
921}
922
923static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
924 __u8 **data, unsigned int maxlen,
925 __u32 *opt_flags, __u32 flag)
926{
927 if (plen > maxlen) {
928 IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
929 return -EINVAL;
930 }
931 if (*opt_flags & flag) {
932 IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
933 return -EINVAL;
934 }
935 *data_len = plen;
936 *data = p;
937 *opt_flags |= flag;
938 return 0;
939}
940
941
942
943static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
944{
945 struct ip_vs_sync_conn_options opt;
946 union ip_vs_sync_conn *s;
947 struct ip_vs_protocol *pp;
948 struct ip_vs_conn_param param;
949 __u32 flags;
950 unsigned int af, state, pe_data_len=0, pe_name_len=0;
951 __u8 *pe_data=NULL, *pe_name=NULL;
952 __u32 opt_flags=0;
953 int retc=0;
954
955 s = (union ip_vs_sync_conn *) p;
956
957 if (s->v6.type & STYPE_F_INET6) {
958#ifdef CONFIG_IP_VS_IPV6
959 af = AF_INET6;
960 p += sizeof(struct ip_vs_sync_v6);
961#else
962 IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
963 retc = 10;
964 goto out;
965#endif
966 } else if (!s->v4.type) {
967 af = AF_INET;
968 p += sizeof(struct ip_vs_sync_v4);
969 } else {
970 return -10;
971 }
972 if (p > msg_end)
973 return -20;
974
975
976 while (p < msg_end) {
977 int ptype;
978 int plen;
979
980 if (p+2 > msg_end)
981 return -30;
982 ptype = *(p++);
983 plen = *(p++);
984
985 if (!plen || ((p + plen) > msg_end))
986 return -40;
987
988 switch (ptype & ~IPVS_OPT_F_PARAM) {
989 case IPVS_OPT_SEQ_DATA:
990 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
991 return -50;
992 break;
993
994 case IPVS_OPT_PE_DATA:
995 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
996 IP_VS_PEDATA_MAXLEN, &opt_flags,
997 IPVS_OPT_F_PE_DATA))
998 return -60;
999 break;
1000
1001 case IPVS_OPT_PE_NAME:
1002 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1003 IP_VS_PENAME_MAXLEN, &opt_flags,
1004 IPVS_OPT_F_PE_NAME))
1005 return -70;
1006 break;
1007
1008 default:
1009
1010 if (!(ptype & IPVS_OPT_F_PARAM)) {
1011 IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
1012 ptype & ~IPVS_OPT_F_PARAM);
1013 retc = 20;
1014 goto out;
1015 }
1016 }
1017 p += plen;
1018 }
1019
1020
1021 flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
1022 flags |= IP_VS_CONN_F_SYNC;
1023 state = ntohs(s->v4.state);
1024
1025 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1026 pp = ip_vs_proto_get(s->v4.protocol);
1027 if (!pp) {
1028 IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
1029 s->v4.protocol);
1030 retc = 30;
1031 goto out;
1032 }
1033 if (state >= pp->num_states) {
1034 IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
1035 pp->name, state);
1036 retc = 40;
1037 goto out;
1038 }
1039 } else {
1040
1041 if (state > 0) {
1042 IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
1043 state);
1044 state = 0;
1045 }
1046 }
1047 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1048 pe_data_len, pe_name, pe_name_len)) {
1049 retc = 50;
1050 goto out;
1051 }
1052
1053 if (af == AF_INET)
1054 ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
1055 (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
1056 ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
1057 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1058 );
1059#ifdef CONFIG_IP_VS_IPV6
1060 else
1061 ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
1062 (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
1063 ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
1064 (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
1065 );
1066#endif
1067 return 0;
1068
1069out:
1070 IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
1071 return retc;
1072
1073}
1074
1075
1076
1077
1078
1079static void ip_vs_process_message(struct net *net, __u8 *buffer,
1080 const size_t buflen)
1081{
1082 struct netns_ipvs *ipvs = net_ipvs(net);
1083 struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
1084 __u8 *p, *msg_end;
1085 int i, nr_conns;
1086
1087 if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
1088 IP_VS_DBG(2, "BACKUP, message header too short\n");
1089 return;
1090 }
1091
1092 m2->size = ntohs(m2->size);
1093
1094 if (buflen != m2->size) {
1095 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1096 return;
1097 }
1098
1099 if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
1100 IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
1101 return;
1102 }
1103
1104 if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
1105 && (m2->spare == 0)) {
1106
1107 msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
1108 nr_conns = m2->nr_conns;
1109
1110 for (i=0; i<nr_conns; i++) {
1111 union ip_vs_sync_conn *s;
1112 unsigned size;
1113 int retc;
1114
1115 p = msg_end;
1116 if (p + sizeof(s->v4) > buffer+buflen) {
1117 IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
1118 return;
1119 }
1120 s = (union ip_vs_sync_conn *)p;
1121 size = ntohs(s->v4.ver_size) & SVER_MASK;
1122 msg_end = p + size;
1123
1124 if (msg_end > buffer+buflen) {
1125 IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
1126 return;
1127 }
1128 if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
1129 IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
1130 ntohs(s->v4.ver_size) >> SVER_SHIFT);
1131 return;
1132 }
1133
1134 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1135 if (retc < 0) {
1136 IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
1137 retc);
1138 return;
1139 }
1140
1141 msg_end = p + ((size + 3) & ~3);
1142 }
1143 } else {
1144
1145 ip_vs_process_message_v0(net, buffer, buflen);
1146 return;
1147 }
1148}
1149
1150
1151
1152
1153
1154static void set_mcast_loop(struct sock *sk, u_char loop)
1155{
1156 struct inet_sock *inet = inet_sk(sk);
1157
1158
1159 lock_sock(sk);
1160 inet->mc_loop = loop ? 1 : 0;
1161 release_sock(sk);
1162}
1163
1164
1165
1166
1167static void set_mcast_ttl(struct sock *sk, u_char ttl)
1168{
1169 struct inet_sock *inet = inet_sk(sk);
1170
1171
1172 lock_sock(sk);
1173 inet->mc_ttl = ttl;
1174 release_sock(sk);
1175}
1176
1177
1178
1179
1180static int set_mcast_if(struct sock *sk, char *ifname)
1181{
1182 struct net_device *dev;
1183 struct inet_sock *inet = inet_sk(sk);
1184 struct net *net = sock_net(sk);
1185
1186 dev = __dev_get_by_name(net, ifname);
1187 if (!dev)
1188 return -ENODEV;
1189
1190 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1191 return -EINVAL;
1192
1193 lock_sock(sk);
1194 inet->mc_index = dev->ifindex;
1195
1196 release_sock(sk);
1197
1198 return 0;
1199}
1200
1201
1202
1203
1204
1205
1206static int set_sync_mesg_maxlen(struct net *net, int sync_state)
1207{
1208 struct netns_ipvs *ipvs = net_ipvs(net);
1209 struct net_device *dev;
1210 int num;
1211
1212 if (sync_state == IP_VS_STATE_MASTER) {
1213 dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
1214 if (!dev)
1215 return -ENODEV;
1216
1217 num = (dev->mtu - sizeof(struct iphdr) -
1218 sizeof(struct udphdr) -
1219 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
1220 ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
1221 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
1222 IP_VS_DBG(7, "setting the maximum length of sync sending "
1223 "message %d.\n", ipvs->send_mesg_maxlen);
1224 } else if (sync_state == IP_VS_STATE_BACKUP) {
1225 dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
1226 if (!dev)
1227 return -ENODEV;
1228
1229 ipvs->recv_mesg_maxlen = dev->mtu -
1230 sizeof(struct iphdr) - sizeof(struct udphdr);
1231 IP_VS_DBG(7, "setting the maximum length of sync receiving "
1232 "message %d.\n", ipvs->recv_mesg_maxlen);
1233 }
1234
1235 return 0;
1236}
1237
1238
1239
1240
1241
1242
1243
1244static int
1245join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
1246{
1247 struct net *net = sock_net(sk);
1248 struct ip_mreqn mreq;
1249 struct net_device *dev;
1250 int ret;
1251
1252 memset(&mreq, 0, sizeof(mreq));
1253 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
1254
1255 dev = __dev_get_by_name(net, ifname);
1256 if (!dev)
1257 return -ENODEV;
1258 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
1259 return -EINVAL;
1260
1261 mreq.imr_ifindex = dev->ifindex;
1262
1263 lock_sock(sk);
1264 ret = ip_mc_join_group(sk, &mreq);
1265 release_sock(sk);
1266
1267 return ret;
1268}
1269
1270
1271static int bind_mcastif_addr(struct socket *sock, char *ifname)
1272{
1273 struct net *net = sock_net(sock->sk);
1274 struct net_device *dev;
1275 __be32 addr;
1276 struct sockaddr_in sin;
1277
1278 dev = __dev_get_by_name(net, ifname);
1279 if (!dev)
1280 return -ENODEV;
1281
1282 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
1283 if (!addr)
1284 pr_err("You probably need to specify IP address on "
1285 "multicast interface.\n");
1286
1287 IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
1288 ifname, &addr);
1289
1290
1291 sin.sin_family = AF_INET;
1292 sin.sin_addr.s_addr = addr;
1293 sin.sin_port = 0;
1294
1295 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
1296}
1297
1298
1299
1300
1301static struct socket *make_send_sock(struct net *net)
1302{
1303 struct netns_ipvs *ipvs = net_ipvs(net);
1304 struct socket *sock;
1305 int result;
1306
1307
1308 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1309 if (result < 0) {
1310 pr_err("Error during creation of socket; terminating\n");
1311 return ERR_PTR(result);
1312 }
1313
1314
1315
1316
1317
1318 sk_change_net(sock->sk, net);
1319 result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
1320 if (result < 0) {
1321 pr_err("Error setting outbound mcast interface\n");
1322 goto error;
1323 }
1324
1325 set_mcast_loop(sock->sk, 0);
1326 set_mcast_ttl(sock->sk, 1);
1327
1328 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
1329 if (result < 0) {
1330 pr_err("Error binding address of the mcast interface\n");
1331 goto error;
1332 }
1333
1334 result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
1335 sizeof(struct sockaddr), 0);
1336 if (result < 0) {
1337 pr_err("Error connecting to the multicast addr\n");
1338 goto error;
1339 }
1340
1341 return sock;
1342
1343error:
1344 sk_release_kernel(sock->sk);
1345 return ERR_PTR(result);
1346}
1347
1348
1349
1350
1351
1352static struct socket *make_receive_sock(struct net *net)
1353{
1354 struct netns_ipvs *ipvs = net_ipvs(net);
1355 struct socket *sock;
1356 int result;
1357
1358
1359 result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1360 if (result < 0) {
1361 pr_err("Error during creation of socket; terminating\n");
1362 return ERR_PTR(result);
1363 }
1364
1365
1366
1367
1368
1369 sk_change_net(sock->sk, net);
1370
1371 sock->sk->sk_reuse = 1;
1372
1373 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
1374 sizeof(struct sockaddr));
1375 if (result < 0) {
1376 pr_err("Error binding to the multicast addr\n");
1377 goto error;
1378 }
1379
1380
1381 result = join_mcast_group(sock->sk,
1382 (struct in_addr *) &mcast_addr.sin_addr,
1383 ipvs->backup_mcast_ifn);
1384 if (result < 0) {
1385 pr_err("Error joining to the multicast group\n");
1386 goto error;
1387 }
1388
1389 return sock;
1390
1391error:
1392 sk_release_kernel(sock->sk);
1393 return ERR_PTR(result);
1394}
1395
1396
1397static int
1398ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
1399{
1400 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL};
1401 struct kvec iov;
1402 int len;
1403
1404 EnterFunction(7);
1405 iov.iov_base = (void *)buffer;
1406 iov.iov_len = length;
1407
1408 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length));
1409
1410 LeaveFunction(7);
1411 return len;
1412}
1413
1414static void
1415ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1416{
1417 int msize;
1418
1419 msize = msg->size;
1420
1421
1422 msg->size = htons(msg->size);
1423
1424 if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
1425 pr_err("ip_vs_send_async error\n");
1426}
1427
1428static int
1429ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
1430{
1431 struct msghdr msg = {NULL,};
1432 struct kvec iov;
1433 int len;
1434
1435 EnterFunction(7);
1436
1437
1438 iov.iov_base = buffer;
1439 iov.iov_len = (size_t)buflen;
1440
1441 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);
1442
1443 if (len < 0)
1444 return -1;
1445
1446 LeaveFunction(7);
1447 return len;
1448}
1449
1450
1451static int sync_thread_master(void *data)
1452{
1453 struct ip_vs_sync_thread_data *tinfo = data;
1454 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1455 struct ip_vs_sync_buff *sb;
1456
1457 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
1458 "syncid = %d\n",
1459 ipvs->master_mcast_ifn, ipvs->master_syncid);
1460
1461 while (!kthread_should_stop()) {
1462 while ((sb = sb_dequeue(ipvs))) {
1463 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
1464 ip_vs_sync_buff_release(sb);
1465 }
1466
1467
1468 sb = get_curr_sync_buff(ipvs, 2 * HZ);
1469 if (sb) {
1470 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
1471 ip_vs_sync_buff_release(sb);
1472 }
1473
1474 schedule_timeout_interruptible(HZ);
1475 }
1476
1477
1478 while ((sb = sb_dequeue(ipvs)))
1479 ip_vs_sync_buff_release(sb);
1480
1481
1482 sb = get_curr_sync_buff(ipvs, 0);
1483 if (sb)
1484 ip_vs_sync_buff_release(sb);
1485
1486
1487 sk_release_kernel(tinfo->sock->sk);
1488 kfree(tinfo);
1489
1490 return 0;
1491}
1492
1493
1494static int sync_thread_backup(void *data)
1495{
1496 struct ip_vs_sync_thread_data *tinfo = data;
1497 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
1498 int len;
1499
1500 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
1501 "syncid = %d\n",
1502 ipvs->backup_mcast_ifn, ipvs->backup_syncid);
1503
1504 while (!kthread_should_stop()) {
1505 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
1506 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
1507 || kthread_should_stop());
1508
1509
1510 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
1511 len = ip_vs_receive(tinfo->sock, tinfo->buf,
1512 ipvs->recv_mesg_maxlen);
1513 if (len <= 0) {
1514 pr_err("receiving message error\n");
1515 break;
1516 }
1517
1518
1519
1520 local_bh_disable();
1521 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1522 local_bh_enable();
1523 }
1524 }
1525
1526
1527 sk_release_kernel(tinfo->sock->sk);
1528 kfree(tinfo->buf);
1529 kfree(tinfo);
1530
1531 return 0;
1532}
1533
1534
1535int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
1536{
1537 struct ip_vs_sync_thread_data *tinfo;
1538 struct task_struct **realtask, *task;
1539 struct socket *sock;
1540 struct netns_ipvs *ipvs = net_ipvs(net);
1541 char *name, *buf = NULL;
1542 int (*threadfn)(void *data);
1543 int result = -ENOMEM;
1544
1545 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1546 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
1547 sizeof(struct ip_vs_sync_conn_v0));
1548
1549
1550 if (state == IP_VS_STATE_MASTER) {
1551 if (ipvs->master_thread)
1552 return -EEXIST;
1553
1554 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
1555 sizeof(ipvs->master_mcast_ifn));
1556 ipvs->master_syncid = syncid;
1557 realtask = &ipvs->master_thread;
1558 name = "ipvs_master:%d";
1559 threadfn = sync_thread_master;
1560 sock = make_send_sock(net);
1561 } else if (state == IP_VS_STATE_BACKUP) {
1562 if (ipvs->backup_thread)
1563 return -EEXIST;
1564
1565 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
1566 sizeof(ipvs->backup_mcast_ifn));
1567 ipvs->backup_syncid = syncid;
1568 realtask = &ipvs->backup_thread;
1569 name = "ipvs_backup:%d";
1570 threadfn = sync_thread_backup;
1571 sock = make_receive_sock(net);
1572 } else {
1573 return -EINVAL;
1574 }
1575
1576 if (IS_ERR(sock)) {
1577 result = PTR_ERR(sock);
1578 goto out;
1579 }
1580
1581 set_sync_mesg_maxlen(net, state);
1582 if (state == IP_VS_STATE_BACKUP) {
1583 buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
1584 if (!buf)
1585 goto outsocket;
1586 }
1587
1588 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
1589 if (!tinfo)
1590 goto outbuf;
1591
1592 tinfo->net = net;
1593 tinfo->sock = sock;
1594 tinfo->buf = buf;
1595
1596 task = kthread_run(threadfn, tinfo, name, ipvs->gen);
1597 if (IS_ERR(task)) {
1598 result = PTR_ERR(task);
1599 goto outtinfo;
1600 }
1601
1602
1603 *realtask = task;
1604 ipvs->sync_state |= state;
1605
1606
1607 ip_vs_use_count_inc();
1608
1609 return 0;
1610
1611outtinfo:
1612 kfree(tinfo);
1613outbuf:
1614 kfree(buf);
1615outsocket:
1616 sk_release_kernel(sock->sk);
1617out:
1618 return result;
1619}
1620
1621
1622int stop_sync_thread(struct net *net, int state)
1623{
1624 struct netns_ipvs *ipvs = net_ipvs(net);
1625 int retc = -EINVAL;
1626
1627 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
1628
1629 if (state == IP_VS_STATE_MASTER) {
1630 if (!ipvs->master_thread)
1631 return -ESRCH;
1632
1633 pr_info("stopping master sync thread %d ...\n",
1634 task_pid_nr(ipvs->master_thread));
1635
1636
1637
1638
1639
1640
1641
1642 spin_lock_bh(&ipvs->sync_lock);
1643 ipvs->sync_state &= ~IP_VS_STATE_MASTER;
1644 spin_unlock_bh(&ipvs->sync_lock);
1645 retc = kthread_stop(ipvs->master_thread);
1646 ipvs->master_thread = NULL;
1647 } else if (state == IP_VS_STATE_BACKUP) {
1648 if (!ipvs->backup_thread)
1649 return -ESRCH;
1650
1651 pr_info("stopping backup sync thread %d ...\n",
1652 task_pid_nr(ipvs->backup_thread));
1653
1654 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
1655 retc = kthread_stop(ipvs->backup_thread);
1656 ipvs->backup_thread = NULL;
1657 }
1658
1659
1660 ip_vs_use_count_dec();
1661
1662 return retc;
1663}
1664
1665
1666
1667
1668int __net_init ip_vs_sync_net_init(struct net *net)
1669{
1670 struct netns_ipvs *ipvs = net_ipvs(net);
1671
1672 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
1673 INIT_LIST_HEAD(&ipvs->sync_queue);
1674 spin_lock_init(&ipvs->sync_lock);
1675 spin_lock_init(&ipvs->sync_buff_lock);
1676
1677 ipvs->sync_mcast_addr.sin_family = AF_INET;
1678 ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
1679 ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
1680 return 0;
1681}
1682
1683void ip_vs_sync_net_cleanup(struct net *net)
1684{
1685 int retc;
1686 struct netns_ipvs *ipvs = net_ipvs(net);
1687
1688 mutex_lock(&ipvs->sync_mutex);
1689 retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
1690 if (retc && retc != -ESRCH)
1691 pr_err("Failed to stop Master Daemon\n");
1692
1693 retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
1694 if (retc && retc != -ESRCH)
1695 pr_err("Failed to stop Backup Daemon\n");
1696 mutex_unlock(&ipvs->sync_mutex);
1697}
1698