1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44#include <linux/module.h>
45#include <linux/kernel.h>
46
47
48#include <linux/fs.h>
49#include <linux/sysctl.h>
50
51#include <net/ip_vs.h>
52
53
54
55
56
57
58#define CHECK_EXPIRE_INTERVAL (60*HZ)
59#define ENTRY_TIMEOUT (6*60*HZ)
60
61
62
63
64
65
66
67#define COUNT_FOR_FULL_EXPIRATION 30
68static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
69
70
71
72
73
74#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
75#define CONFIG_IP_VS_LBLC_TAB_BITS 10
76#endif
77#define IP_VS_LBLC_TAB_BITS CONFIG_IP_VS_LBLC_TAB_BITS
78#define IP_VS_LBLC_TAB_SIZE (1 << IP_VS_LBLC_TAB_BITS)
79#define IP_VS_LBLC_TAB_MASK (IP_VS_LBLC_TAB_SIZE - 1)
80
81
82
83
84
85
86struct ip_vs_lblc_entry {
87 struct list_head list;
88 __u32 addr;
89 struct ip_vs_dest *dest;
90 unsigned long lastuse;
91};
92
93
94
95
96
97struct ip_vs_lblc_table {
98 rwlock_t lock;
99 struct list_head bucket[IP_VS_LBLC_TAB_SIZE];
100 atomic_t entries;
101 int max_size;
102 struct timer_list periodic_timer;
103 int rover;
104 int counter;
105};
106
107
108
109
110
111struct ip_vs_lblc_sysctl_table {
112 struct ctl_table_header *sysctl_header;
113 ctl_table vs_vars[2];
114 ctl_table vs_dir[2];
115 ctl_table ipv4_dir[2];
116 ctl_table root_dir[2];
117};
118
119
120static struct ip_vs_lblc_sysctl_table lblc_sysctl_table = {
121 NULL,
122 {{NET_IPV4_VS_LBLC_EXPIRE, "lblc_expiration",
123 &sysctl_ip_vs_lblc_expiration,
124 sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
125 {0}},
126 {{NET_IPV4_VS, "vs", NULL, 0, 0555, lblc_sysctl_table.vs_vars},
127 {0}},
128 {{NET_IPV4, "ipv4", NULL, 0, 0555, lblc_sysctl_table.vs_dir},
129 {0}},
130 {{CTL_NET, "net", NULL, 0, 0555, lblc_sysctl_table.ipv4_dir},
131 {0}}
132};
133
134
135
136
137
138
139static inline struct ip_vs_lblc_entry *
140ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest)
141{
142 struct ip_vs_lblc_entry *en;
143
144 en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
145 if (en == NULL) {
146 IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
147 return NULL;
148 }
149
150 INIT_LIST_HEAD(&en->list);
151 en->addr = daddr;
152
153 atomic_inc(&dest->refcnt);
154 en->dest = dest;
155
156 return en;
157}
158
159
160static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
161{
162 list_del(&en->list);
163
164
165
166
167 atomic_dec(&en->dest->refcnt);
168 kfree(en);
169}
170
171
172
173
174
175static inline unsigned ip_vs_lblc_hashkey(__u32 addr)
176{
177 return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
178}
179
180
181
182
183
184
185static int
186ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
187{
188 unsigned hash;
189
190 if (!list_empty(&en->list)) {
191 IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
192 "called from %p\n", __builtin_return_address(0));
193 return 0;
194 }
195
196
197
198
199 hash = ip_vs_lblc_hashkey(en->addr);
200
201 write_lock(&tbl->lock);
202 list_add(&en->list, &tbl->bucket[hash]);
203 atomic_inc(&tbl->entries);
204 write_unlock(&tbl->lock);
205
206 return 1;
207}
208
209
210#if 0000
211
212
213
214
215static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
216 struct ip_vs_lblc_entry *en)
217{
218 if (list_empty(&en->list)) {
219 IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
220 "called from %p\n", __builtin_return_address(0));
221 return 0;
222 }
223
224
225
226
227 write_lock(&tbl->lock);
228 list_del(&en->list);
229 INIT_LIST_HEAD(&en->list);
230 write_unlock(&tbl->lock);
231
232 return 1;
233}
234#endif
235
236
237
238
239
240static inline struct ip_vs_lblc_entry *
241ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr)
242{
243 unsigned hash;
244 struct ip_vs_lblc_entry *en;
245 struct list_head *l,*e;
246
247 hash = ip_vs_lblc_hashkey(addr);
248 l = &tbl->bucket[hash];
249
250 read_lock(&tbl->lock);
251
252 for (e=l->next; e!=l; e=e->next) {
253 en = list_entry(e, struct ip_vs_lblc_entry, list);
254 if (en->addr == addr) {
255
256 read_unlock(&tbl->lock);
257 return en;
258 }
259 }
260
261 read_unlock(&tbl->lock);
262
263 return NULL;
264}
265
266
267
268
269
270static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
271{
272 int i;
273 struct list_head *l;
274 struct ip_vs_lblc_entry *en;
275
276 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
277 write_lock(&tbl->lock);
278 for (l=&tbl->bucket[i]; l->next!=l; ) {
279 en = list_entry(l->next,
280 struct ip_vs_lblc_entry, list);
281 ip_vs_lblc_free(en);
282 atomic_dec(&tbl->entries);
283 }
284 write_unlock(&tbl->lock);
285 }
286}
287
288
289static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
290{
291 unsigned long now = jiffies;
292 int i, j;
293 struct list_head *l, *e;
294 struct ip_vs_lblc_entry *en;
295
296 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
297 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
298 e = l = &tbl->bucket[j];
299 write_lock(&tbl->lock);
300 while (e->next != l) {
301 en = list_entry(e->next,
302 struct ip_vs_lblc_entry, list);
303 if ((now - en->lastuse) <
304 sysctl_ip_vs_lblc_expiration) {
305 e = e->next;
306 continue;
307 }
308 ip_vs_lblc_free(en);
309 atomic_dec(&tbl->entries);
310 }
311 write_unlock(&tbl->lock);
312 }
313 tbl->rover = j;
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328static void ip_vs_lblc_check_expire(unsigned long data)
329{
330 struct ip_vs_lblc_table *tbl;
331 unsigned long now = jiffies;
332 int goal;
333 int i, j;
334 struct list_head *l, *e;
335 struct ip_vs_lblc_entry *en;
336
337 tbl = (struct ip_vs_lblc_table *)data;
338
339 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
340
341 ip_vs_lblc_full_check(tbl);
342 tbl->counter = 1;
343 goto out;
344 }
345
346 if (atomic_read(&tbl->entries) <= tbl->max_size) {
347 tbl->counter++;
348 goto out;
349 }
350
351 goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
352 if (goal > tbl->max_size/2)
353 goal = tbl->max_size/2;
354
355 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
356 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
357 e = l = &tbl->bucket[j];
358 write_lock(&tbl->lock);
359 while (e->next != l) {
360 en = list_entry(e->next,
361 struct ip_vs_lblc_entry, list);
362 if ((now - en->lastuse) < ENTRY_TIMEOUT) {
363 e = e->next;
364 continue;
365 }
366 ip_vs_lblc_free(en);
367 atomic_dec(&tbl->entries);
368 goal--;
369 }
370 write_unlock(&tbl->lock);
371 if (goal <= 0)
372 break;
373 }
374 tbl->rover = j;
375
376 out:
377 mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
378}
379
380
381static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
382{
383 int i;
384 struct ip_vs_lblc_table *tbl;
385
386
387
388
389 tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
390 if (tbl == NULL) {
391 IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
392 return -ENOMEM;
393 }
394 svc->sched_data = tbl;
395 IP_VS_DBG(6, "LBLC hash table (memory=%dbytes) allocated for "
396 "current service\n",
397 sizeof(struct ip_vs_lblc_table));
398
399
400
401
402 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
403 INIT_LIST_HEAD(&tbl->bucket[i]);
404 }
405 tbl->lock = RW_LOCK_UNLOCKED;
406 tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
407 tbl->rover = 0;
408 tbl->counter = 1;
409
410
411
412
413 init_timer(&tbl->periodic_timer);
414 tbl->periodic_timer.data = (unsigned long)tbl;
415 tbl->periodic_timer.function = ip_vs_lblc_check_expire;
416 tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
417 add_timer(&tbl->periodic_timer);
418
419 return 0;
420}
421
422
423static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
424{
425 struct ip_vs_lblc_table *tbl = svc->sched_data;
426
427
428 del_timer_sync(&tbl->periodic_timer);
429
430
431 ip_vs_lblc_flush(tbl);
432
433
434 kfree(svc->sched_data);
435 IP_VS_DBG(6, "LBLC hash table (memory=%dbytes) released\n",
436 sizeof(struct ip_vs_lblc_table));
437
438 return 0;
439}
440
441
442static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
443{
444 return 0;
445}
446
447
448static inline struct ip_vs_dest *
449__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
450{
451 register struct list_head *l, *e;
452 struct ip_vs_dest *dest, *least;
453 int loh, doh;
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 l = &svc->destinations;
474 for (e=l->next; e!=l; e=e->next) {
475 least = list_entry(e, struct ip_vs_dest, n_list);
476 if (atomic_read(&least->weight) > 0) {
477 loh = atomic_read(&least->activeconns) * 50
478 + atomic_read(&least->inactconns);
479 goto nextstage;
480 }
481 }
482 return NULL;
483
484
485
486
487 nextstage:
488 for (e=e->next; e!=l; e=e->next) {
489 dest = list_entry(e, struct ip_vs_dest, n_list);
490 doh = atomic_read(&dest->activeconns) * 50
491 + atomic_read(&dest->inactconns);
492 if (loh * atomic_read(&dest->weight) >
493 doh * atomic_read(&least->weight)) {
494 least = dest;
495 loh = doh;
496 }
497 }
498
499 IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
500 "activeconns %d refcnt %d weight %d overhead %d\n",
501 NIPQUAD(least->addr), ntohs(least->port),
502 atomic_read(&least->activeconns),
503 atomic_read(&least->refcnt),
504 atomic_read(&least->weight), loh);
505
506 return least;
507}
508
509
510
511
512
513
514static inline int
515is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
516{
517 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
518 register struct list_head *l, *e;
519 struct ip_vs_dest *d;
520
521 l = &svc->destinations;
522 for (e=l->next; e!=l; e=e->next) {
523 d = list_entry(e, struct ip_vs_dest, n_list);
524 if (atomic_read(&d->activeconns)*2
525 < atomic_read(&d->weight)) {
526 return 1;
527 }
528 }
529 }
530 return 0;
531}
532
533
534
535
536
537static struct ip_vs_dest *
538ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
539{
540 struct ip_vs_dest *dest;
541 struct ip_vs_lblc_table *tbl;
542 struct ip_vs_lblc_entry *en;
543
544 IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
545
546 tbl = (struct ip_vs_lblc_table *)svc->sched_data;
547 en = ip_vs_lblc_get(tbl, iph->daddr);
548 if (en == NULL) {
549 dest = __ip_vs_wlc_schedule(svc, iph);
550 if (dest == NULL) {
551 IP_VS_DBG(1, "no destination available\n");
552 return NULL;
553 }
554 en = ip_vs_lblc_new(iph->daddr, dest);
555 if (en == NULL) {
556 return NULL;
557 }
558 ip_vs_lblc_hash(tbl, en);
559 } else {
560 dest = en->dest;
561 if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
562 || atomic_read(&dest->weight) <= 0
563 || is_overloaded(dest, svc)) {
564 dest = __ip_vs_wlc_schedule(svc, iph);
565 if (dest == NULL) {
566 IP_VS_DBG(1, "no destination available\n");
567 return NULL;
568 }
569 atomic_dec(&en->dest->refcnt);
570 atomic_inc(&dest->refcnt);
571 en->dest = dest;
572 }
573 }
574 en->lastuse = jiffies;
575
576 IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
577 "--> server %u.%u.%u.%u:%d\n",
578 NIPQUAD(en->addr),
579 NIPQUAD(dest->addr),
580 ntohs(dest->port));
581
582 return dest;
583}
584
585
586
587
588
589static struct ip_vs_scheduler ip_vs_lblc_scheduler =
590{
591 {0},
592 "lblc",
593 ATOMIC_INIT(0),
594 THIS_MODULE,
595 ip_vs_lblc_init_svc,
596 ip_vs_lblc_done_svc,
597 ip_vs_lblc_update_svc,
598 ip_vs_lblc_schedule,
599};
600
601
602static int __init ip_vs_lblc_init(void)
603{
604 INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
605 lblc_sysctl_table.sysctl_header =
606 register_sysctl_table(lblc_sysctl_table.root_dir, 0);
607 return register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
608}
609
610
611static void __exit ip_vs_lblc_cleanup(void)
612{
613 unregister_sysctl_table(lblc_sysctl_table.sysctl_header);
614 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
615}
616
617
618module_init(ip_vs_lblc_init);
619module_exit(ip_vs_lblc_cleanup);
620MODULE_LICENSE("GPL");
621