1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/mm.h>
27#include <linux/module.h>
28#include <linux/math64.h>
29#include <net/tcp.h>
30
31#define BICTCP_BETA_SCALE 1024
32
33
34#define BICTCP_HZ 10
35
36
37#define HYSTART_ACK_TRAIN 0x1
38#define HYSTART_DELAY 0x2
39
40
41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45
46static int fast_convergence __read_mostly = 1;
47static int beta __read_mostly = 717;
48static int initial_ssthresh __read_mostly;
49static int bic_scale __read_mostly = 41;
50static int tcp_friendliness __read_mostly = 1;
51
52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
56
57static u32 cube_rtt_scale __read_mostly;
58static u32 beta_scale __read_mostly;
59static u64 cube_factor __read_mostly;
60
61
62module_param(fast_convergence, int, 0644);
63MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
64module_param(beta, int, 0644);
65MODULE_PARM_DESC(beta, "beta for multiplicative increase");
66module_param(initial_ssthresh, int, 0644);
67MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
68module_param(bic_scale, int, 0444);
69MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
70module_param(tcp_friendliness, int, 0644);
71MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
72module_param(hystart, int, 0644);
73MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
74module_param(hystart_detect, int, 0644);
75MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
76 " 1: packet-train 2: delay 3: both packet-train and delay");
77module_param(hystart_low_window, int, 0644);
78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
81
82
83struct bictcp {
84 u32 cnt;
85 u32 last_max_cwnd;
86 u32 loss_cwnd;
87 u32 last_cwnd;
88 u32 last_time;
89 u32 bic_origin_point;
90 u32 bic_K;
91 u32 delay_min;
92 u32 epoch_start;
93 u32 ack_cnt;
94 u32 tcp_cwnd;
95#define ACK_RATIO_SHIFT 4
96#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
97 u16 delayed_ack;
98 u8 sample_cnt;
99 u8 found;
100 u32 round_start;
101 u32 end_seq;
102 u32 last_ack;
103 u32 curr_rtt;
104};
105
106static inline void bictcp_reset(struct bictcp *ca)
107{
108 ca->cnt = 0;
109 ca->last_max_cwnd = 0;
110 ca->last_cwnd = 0;
111 ca->last_time = 0;
112 ca->bic_origin_point = 0;
113 ca->bic_K = 0;
114 ca->delay_min = 0;
115 ca->epoch_start = 0;
116 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
117 ca->ack_cnt = 0;
118 ca->tcp_cwnd = 0;
119 ca->found = 0;
120}
121
122static inline u32 bictcp_clock(void)
123{
124#if HZ < 1000
125 return ktime_to_ms(ktime_get_real());
126#else
127 return jiffies_to_msecs(jiffies);
128#endif
129}
130
131static inline void bictcp_hystart_reset(struct sock *sk)
132{
133 struct tcp_sock *tp = tcp_sk(sk);
134 struct bictcp *ca = inet_csk_ca(sk);
135
136 ca->round_start = ca->last_ack = bictcp_clock();
137 ca->end_seq = tp->snd_nxt;
138 ca->curr_rtt = 0;
139 ca->sample_cnt = 0;
140}
141
142static void bictcp_init(struct sock *sk)
143{
144 struct bictcp *ca = inet_csk_ca(sk);
145
146 bictcp_reset(ca);
147 ca->loss_cwnd = 0;
148
149 if (hystart)
150 bictcp_hystart_reset(sk);
151
152 if (!hystart && initial_ssthresh)
153 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
154}
155
156
157
158
159
160static u32 cubic_root(u64 a)
161{
162 u32 x, b, shift;
163
164
165
166
167
168
169
170
171 static const u8 v[] = {
172 0, 54, 54, 54, 118, 118, 118, 118,
173 123, 129, 134, 138, 143, 147, 151, 156,
174 157, 161, 164, 168, 170, 173, 176, 179,
175 181, 185, 187, 190, 192, 194, 197, 199,
176 200, 202, 204, 206, 209, 211, 213, 215,
177 217, 219, 221, 222, 224, 225, 227, 229,
178 231, 232, 234, 236, 237, 239, 240, 242,
179 244, 245, 246, 248, 250, 251, 252, 254,
180 };
181
182 b = fls64(a);
183 if (b < 7) {
184
185 return ((u32)v[(u32)a] + 35) >> 6;
186 }
187
188 b = ((b * 84) >> 8) - 1;
189 shift = (a >> (b * 3));
190
191 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
192
193
194
195
196
197
198
199 x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
200 x = ((x * 341) >> 10);
201 return x;
202}
203
204
205
206
207static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
208{
209 u64 offs;
210 u32 delta, t, bic_target, max_cnt;
211
212 ca->ack_cnt++;
213
214 if (ca->last_cwnd == cwnd &&
215 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
216 return;
217
218 ca->last_cwnd = cwnd;
219 ca->last_time = tcp_time_stamp;
220
221 if (ca->epoch_start == 0) {
222 ca->epoch_start = tcp_time_stamp;
223 ca->ack_cnt = 1;
224 ca->tcp_cwnd = cwnd;
225
226 if (ca->last_max_cwnd <= cwnd) {
227 ca->bic_K = 0;
228 ca->bic_origin_point = cwnd;
229 } else {
230
231
232
233 ca->bic_K = cubic_root(cube_factor
234 * (ca->last_max_cwnd - cwnd));
235 ca->bic_origin_point = ca->last_max_cwnd;
236 }
237 }
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254 t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
255 - ca->epoch_start) << BICTCP_HZ) / HZ;
256
257 if (t < ca->bic_K)
258 offs = ca->bic_K - t;
259 else
260 offs = t - ca->bic_K;
261
262
263 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
264 if (t < ca->bic_K)
265 bic_target = ca->bic_origin_point - delta;
266 else
267 bic_target = ca->bic_origin_point + delta;
268
269
270 if (bic_target > cwnd) {
271 ca->cnt = cwnd / (bic_target - cwnd);
272 } else {
273 ca->cnt = 100 * cwnd;
274 }
275
276
277
278
279
280 if (ca->last_max_cwnd == 0 && ca->cnt > 20)
281 ca->cnt = 20;
282
283
284 if (tcp_friendliness) {
285 u32 scale = beta_scale;
286 delta = (cwnd * scale) >> 3;
287 while (ca->ack_cnt > delta) {
288 ca->ack_cnt -= delta;
289 ca->tcp_cwnd++;
290 }
291
292 if (ca->tcp_cwnd > cwnd){
293 delta = ca->tcp_cwnd - cwnd;
294 max_cnt = cwnd / delta;
295 if (ca->cnt > max_cnt)
296 ca->cnt = max_cnt;
297 }
298 }
299
300 ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
301 if (ca->cnt == 0)
302 ca->cnt = 1;
303}
304
305static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
306{
307 struct tcp_sock *tp = tcp_sk(sk);
308 struct bictcp *ca = inet_csk_ca(sk);
309
310 if (!tcp_is_cwnd_limited(sk, in_flight))
311 return;
312
313 if (tp->snd_cwnd <= tp->snd_ssthresh) {
314 if (hystart && after(ack, ca->end_seq))
315 bictcp_hystart_reset(sk);
316 tcp_slow_start(tp);
317 } else {
318 bictcp_update(ca, tp->snd_cwnd);
319 tcp_cong_avoid_ai(tp, ca->cnt);
320 }
321
322}
323
324static u32 bictcp_recalc_ssthresh(struct sock *sk)
325{
326 const struct tcp_sock *tp = tcp_sk(sk);
327 struct bictcp *ca = inet_csk_ca(sk);
328
329 ca->epoch_start = 0;
330
331
332 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
333 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
334 / (2 * BICTCP_BETA_SCALE);
335 else
336 ca->last_max_cwnd = tp->snd_cwnd;
337
338 ca->loss_cwnd = tp->snd_cwnd;
339
340 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
341}
342
343static u32 bictcp_undo_cwnd(struct sock *sk)
344{
345 struct bictcp *ca = inet_csk_ca(sk);
346
347 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
348}
349
350static void bictcp_state(struct sock *sk, u8 new_state)
351{
352 if (new_state == TCP_CA_Loss) {
353 bictcp_reset(inet_csk_ca(sk));
354 bictcp_hystart_reset(sk);
355 }
356}
357
358static void hystart_update(struct sock *sk, u32 delay)
359{
360 struct tcp_sock *tp = tcp_sk(sk);
361 struct bictcp *ca = inet_csk_ca(sk);
362
363 if (!(ca->found & hystart_detect)) {
364 u32 now = bictcp_clock();
365
366
367 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
368 ca->last_ack = now;
369 if ((s32)(now - ca->round_start) > ca->delay_min >> 4)
370 ca->found |= HYSTART_ACK_TRAIN;
371 }
372
373
374 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
375 if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
376 ca->curr_rtt = delay;
377
378 ca->sample_cnt++;
379 } else {
380 if (ca->curr_rtt > ca->delay_min +
381 HYSTART_DELAY_THRESH(ca->delay_min>>4))
382 ca->found |= HYSTART_DELAY;
383 }
384
385
386
387
388 if (ca->found & hystart_detect)
389 tp->snd_ssthresh = tp->snd_cwnd;
390 }
391}
392
393
394
395
396static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
397{
398 const struct inet_connection_sock *icsk = inet_csk(sk);
399 const struct tcp_sock *tp = tcp_sk(sk);
400 struct bictcp *ca = inet_csk_ca(sk);
401 u32 delay;
402
403 if (icsk->icsk_ca_state == TCP_CA_Open) {
404 u32 ratio = ca->delayed_ack;
405
406 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
407 ratio += cnt;
408
409 ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
410 }
411
412
413 if (rtt_us < 0)
414 return;
415
416
417 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
418 return;
419
420 delay = (rtt_us << 3) / USEC_PER_MSEC;
421 if (delay == 0)
422 delay = 1;
423
424
425 if (ca->delay_min == 0 || ca->delay_min > delay)
426 ca->delay_min = delay;
427
428
429 if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
430 tp->snd_cwnd >= hystart_low_window)
431 hystart_update(sk, delay);
432}
433
434static struct tcp_congestion_ops cubictcp __read_mostly = {
435 .init = bictcp_init,
436 .ssthresh = bictcp_recalc_ssthresh,
437 .cong_avoid = bictcp_cong_avoid,
438 .set_state = bictcp_state,
439 .undo_cwnd = bictcp_undo_cwnd,
440 .pkts_acked = bictcp_acked,
441 .owner = THIS_MODULE,
442 .name = "cubic",
443};
444
445static int __init cubictcp_register(void)
446{
447 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
448
449
450
451
452
453 beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
454
455 cube_rtt_scale = (bic_scale * 10);
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471 cube_factor = 1ull << (10+3*BICTCP_HZ);
472
473
474 do_div(cube_factor, bic_scale * 10);
475
476
477 if (hystart && HZ < 1000)
478 cubictcp.flags |= TCP_CONG_RTT_STAMP;
479
480 return tcp_register_congestion_control(&cubictcp);
481}
482
483static void __exit cubictcp_unregister(void)
484{
485 tcp_unregister_congestion_control(&cubictcp);
486}
487
488module_init(cubictcp_register);
489module_exit(cubictcp_unregister);
490
491MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
492MODULE_LICENSE("GPL");
493MODULE_DESCRIPTION("CUBIC TCP");
494MODULE_VERSION("2.3");
495