1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42#include <linux/ip.h>
43#include <linux/module.h>
44#include <linux/kernel.h>
45#include <linux/skbuff.h>
46#include <linux/jiffies.h>
47
48
49#include <linux/fs.h>
50#include <linux/sysctl.h>
51
52#include <net/ip_vs.h>
53
54
55
56
57
58
59#define CHECK_EXPIRE_INTERVAL (60*HZ)
60#define ENTRY_TIMEOUT (6*60*HZ)
61
62
63
64
65
66
67
68#define COUNT_FOR_FULL_EXPIRATION 30
69static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
70
71
72
73
74
75#ifndef CONFIG_IP_VS_LBLC_TAB_BITS
76#define CONFIG_IP_VS_LBLC_TAB_BITS 10
77#endif
78#define IP_VS_LBLC_TAB_BITS CONFIG_IP_VS_LBLC_TAB_BITS
79#define IP_VS_LBLC_TAB_SIZE (1 << IP_VS_LBLC_TAB_BITS)
80#define IP_VS_LBLC_TAB_MASK (IP_VS_LBLC_TAB_SIZE - 1)
81
82
83
84
85
86
87struct ip_vs_lblc_entry {
88 struct list_head list;
89 __be32 addr;
90 struct ip_vs_dest *dest;
91 unsigned long lastuse;
92};
93
94
95
96
97
98struct ip_vs_lblc_table {
99 struct list_head bucket[IP_VS_LBLC_TAB_SIZE];
100 atomic_t entries;
101 int max_size;
102 struct timer_list periodic_timer;
103 int rover;
104 int counter;
105};
106
107
108
109
110
111
112static ctl_table vs_vars_table[] = {
113 {
114 .procname = "lblc_expiration",
115 .data = &sysctl_ip_vs_lblc_expiration,
116 .maxlen = sizeof(int),
117 .mode = 0644,
118 .proc_handler = &proc_dointvec_jiffies,
119 },
120 { .ctl_name = 0 }
121};
122
123static struct ctl_table_header * sysctl_header;
124
125static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
126{
127 list_del(&en->list);
128
129
130
131
132 atomic_dec(&en->dest->refcnt);
133 kfree(en);
134}
135
136
137
138
139
140static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
141{
142 return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
143}
144
145
146
147
148
149
150static void
151ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
152{
153 unsigned hash = ip_vs_lblc_hashkey(en->addr);
154
155 list_add(&en->list, &tbl->bucket[hash]);
156 atomic_inc(&tbl->entries);
157}
158
159
160
161
162
163
164static inline struct ip_vs_lblc_entry *
165ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
166{
167 unsigned hash = ip_vs_lblc_hashkey(addr);
168 struct ip_vs_lblc_entry *en;
169
170 list_for_each_entry(en, &tbl->bucket[hash], list)
171 if (en->addr == addr)
172 return en;
173
174 return NULL;
175}
176
177
178
179
180
181
182static inline struct ip_vs_lblc_entry *
183ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
184 struct ip_vs_dest *dest)
185{
186 struct ip_vs_lblc_entry *en;
187
188 en = ip_vs_lblc_get(tbl, daddr);
189 if (!en) {
190 en = kmalloc(sizeof(*en), GFP_ATOMIC);
191 if (!en) {
192 IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
193 return NULL;
194 }
195
196 en->addr = daddr;
197 en->lastuse = jiffies;
198
199 atomic_inc(&dest->refcnt);
200 en->dest = dest;
201
202 ip_vs_lblc_hash(tbl, en);
203 } else if (en->dest != dest) {
204 atomic_dec(&en->dest->refcnt);
205 atomic_inc(&dest->refcnt);
206 en->dest = dest;
207 }
208
209 return en;
210}
211
212
213
214
215
216static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
217{
218 struct ip_vs_lblc_entry *en, *nxt;
219 int i;
220
221 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
222 list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
223 ip_vs_lblc_free(en);
224 atomic_dec(&tbl->entries);
225 }
226 }
227}
228
229
230static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
231{
232 struct ip_vs_lblc_table *tbl = svc->sched_data;
233 struct ip_vs_lblc_entry *en, *nxt;
234 unsigned long now = jiffies;
235 int i, j;
236
237 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
238 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
239
240 write_lock(&svc->sched_lock);
241 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
242 if (time_before(now,
243 en->lastuse + sysctl_ip_vs_lblc_expiration))
244 continue;
245
246 ip_vs_lblc_free(en);
247 atomic_dec(&tbl->entries);
248 }
249 write_unlock(&svc->sched_lock);
250 }
251 tbl->rover = j;
252}
253
254
255
256
257
258
259
260
261
262
263
264
265
266static void ip_vs_lblc_check_expire(unsigned long data)
267{
268 struct ip_vs_service *svc = (struct ip_vs_service *) data;
269 struct ip_vs_lblc_table *tbl = svc->sched_data;
270 unsigned long now = jiffies;
271 int goal;
272 int i, j;
273 struct ip_vs_lblc_entry *en, *nxt;
274
275 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
276
277 ip_vs_lblc_full_check(svc);
278 tbl->counter = 1;
279 goto out;
280 }
281
282 if (atomic_read(&tbl->entries) <= tbl->max_size) {
283 tbl->counter++;
284 goto out;
285 }
286
287 goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
288 if (goal > tbl->max_size/2)
289 goal = tbl->max_size/2;
290
291 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
292 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
293
294 write_lock(&svc->sched_lock);
295 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
296 if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
297 continue;
298
299 ip_vs_lblc_free(en);
300 atomic_dec(&tbl->entries);
301 goal--;
302 }
303 write_unlock(&svc->sched_lock);
304 if (goal <= 0)
305 break;
306 }
307 tbl->rover = j;
308
309 out:
310 mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
311}
312
313
314static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
315{
316 int i;
317 struct ip_vs_lblc_table *tbl;
318
319
320
321
322 tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
323 if (tbl == NULL) {
324 IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
325 return -ENOMEM;
326 }
327 svc->sched_data = tbl;
328 IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
329 "current service\n", sizeof(*tbl));
330
331
332
333
334 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
335 INIT_LIST_HEAD(&tbl->bucket[i]);
336 }
337 tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
338 tbl->rover = 0;
339 tbl->counter = 1;
340
341
342
343
344 setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
345 (unsigned long)svc);
346 mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
347
348 return 0;
349}
350
351
352static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
353{
354 struct ip_vs_lblc_table *tbl = svc->sched_data;
355
356
357 del_timer_sync(&tbl->periodic_timer);
358
359
360 ip_vs_lblc_flush(tbl);
361
362
363 kfree(tbl);
364 IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
365 sizeof(*tbl));
366
367 return 0;
368}
369
370
371static inline struct ip_vs_dest *
372__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
373{
374 struct ip_vs_dest *dest, *least;
375 int loh, doh;
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394 list_for_each_entry(dest, &svc->destinations, n_list) {
395 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
396 continue;
397 if (atomic_read(&dest->weight) > 0) {
398 least = dest;
399 loh = atomic_read(&least->activeconns) * 50
400 + atomic_read(&least->inactconns);
401 goto nextstage;
402 }
403 }
404 return NULL;
405
406
407
408
409 nextstage:
410 list_for_each_entry_continue(dest, &svc->destinations, n_list) {
411 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
412 continue;
413
414 doh = atomic_read(&dest->activeconns) * 50
415 + atomic_read(&dest->inactconns);
416 if (loh * atomic_read(&dest->weight) >
417 doh * atomic_read(&least->weight)) {
418 least = dest;
419 loh = doh;
420 }
421 }
422
423 IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d "
424 "activeconns %d refcnt %d weight %d overhead %d\n",
425 NIPQUAD(least->addr.ip), ntohs(least->port),
426 atomic_read(&least->activeconns),
427 atomic_read(&least->refcnt),
428 atomic_read(&least->weight), loh);
429
430 return least;
431}
432
433
434
435
436
437
438static inline int
439is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
440{
441 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
442 struct ip_vs_dest *d;
443
444 list_for_each_entry(d, &svc->destinations, n_list) {
445 if (atomic_read(&d->activeconns)*2
446 < atomic_read(&d->weight)) {
447 return 1;
448 }
449 }
450 }
451 return 0;
452}
453
454
455
456
457
458static struct ip_vs_dest *
459ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
460{
461 struct ip_vs_lblc_table *tbl = svc->sched_data;
462 struct iphdr *iph = ip_hdr(skb);
463 struct ip_vs_dest *dest = NULL;
464 struct ip_vs_lblc_entry *en;
465
466 IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
467
468
469 read_lock(&svc->sched_lock);
470 en = ip_vs_lblc_get(tbl, iph->daddr);
471 if (en) {
472
473 en->lastuse = jiffies;
474
475
476
477
478
479
480
481
482
483
484 if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
485 dest = en->dest;
486 }
487 read_unlock(&svc->sched_lock);
488
489
490 if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
491 goto out;
492
493
494 dest = __ip_vs_lblc_schedule(svc, iph);
495 if (!dest) {
496 IP_VS_DBG(1, "no destination available\n");
497 return NULL;
498 }
499
500
501 write_lock(&svc->sched_lock);
502 ip_vs_lblc_new(tbl, iph->daddr, dest);
503 write_unlock(&svc->sched_lock);
504
505out:
506 IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
507 "--> server %u.%u.%u.%u:%d\n",
508 NIPQUAD(iph->daddr),
509 NIPQUAD(dest->addr.ip),
510 ntohs(dest->port));
511
512 return dest;
513}
514
515
516
517
518
519static struct ip_vs_scheduler ip_vs_lblc_scheduler =
520{
521 .name = "lblc",
522 .refcnt = ATOMIC_INIT(0),
523 .module = THIS_MODULE,
524 .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
525#ifdef CONFIG_IP_VS_IPV6
526 .supports_ipv6 = 0,
527#endif
528 .init_service = ip_vs_lblc_init_svc,
529 .done_service = ip_vs_lblc_done_svc,
530 .schedule = ip_vs_lblc_schedule,
531};
532
533
534static int __init ip_vs_lblc_init(void)
535{
536 int ret;
537
538 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
539 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
540 if (ret)
541 unregister_sysctl_table(sysctl_header);
542 return ret;
543}
544
545
546static void __exit ip_vs_lblc_cleanup(void)
547{
548 unregister_sysctl_table(sysctl_header);
549 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
550}
551
552
553module_init(ip_vs_lblc_init);
554module_exit(ip_vs_lblc_cleanup);
555MODULE_LICENSE("GPL");
556