1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/mm.h>
27#include <linux/module.h>
28#include <linux/math64.h>
29#include <net/tcp.h>
30
31#define BICTCP_BETA_SCALE 1024
32
33
34#define BICTCP_HZ 10
35
36
37#define HYSTART_ACK_TRAIN 0x1
38#define HYSTART_DELAY 0x2
39
40
41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (2U<<3)
43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45
46static int fast_convergence __read_mostly = 1;
47static int beta __read_mostly = 717;
48static int initial_ssthresh __read_mostly;
49static int bic_scale __read_mostly = 41;
50static int tcp_friendliness __read_mostly = 1;
51
52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16;
55
56static u32 cube_rtt_scale __read_mostly;
57static u32 beta_scale __read_mostly;
58static u64 cube_factor __read_mostly;
59
60
61module_param(fast_convergence, int, 0644);
62MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
63module_param(beta, int, 0644);
64MODULE_PARM_DESC(beta, "beta for multiplicative increase");
65module_param(initial_ssthresh, int, 0644);
66MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
67module_param(bic_scale, int, 0444);
68MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
69module_param(tcp_friendliness, int, 0644);
70MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
71module_param(hystart, int, 0644);
72MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm");
73module_param(hystart_detect, int, 0644);
74MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
75 " 1: packet-train 2: delay 3: both packet-train and delay");
76module_param(hystart_low_window, int, 0644);
77MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
78
79
80struct bictcp {
81 u32 cnt;
82 u32 last_max_cwnd;
83 u32 loss_cwnd;
84 u32 last_cwnd;
85 u32 last_time;
86 u32 bic_origin_point;
87 u32 bic_K;
88 u32 delay_min;
89 u32 epoch_start;
90 u32 ack_cnt;
91 u32 tcp_cwnd;
92#define ACK_RATIO_SHIFT 4
93 u16 delayed_ack;
94 u8 sample_cnt;
95 u8 found;
96 u32 round_start;
97 u32 end_seq;
98 u32 last_jiffies;
99 u32 curr_rtt;
100};
101
102static inline void bictcp_reset(struct bictcp *ca)
103{
104 ca->cnt = 0;
105 ca->last_max_cwnd = 0;
106 ca->loss_cwnd = 0;
107 ca->last_cwnd = 0;
108 ca->last_time = 0;
109 ca->bic_origin_point = 0;
110 ca->bic_K = 0;
111 ca->delay_min = 0;
112 ca->epoch_start = 0;
113 ca->delayed_ack = 2 << ACK_RATIO_SHIFT;
114 ca->ack_cnt = 0;
115 ca->tcp_cwnd = 0;
116 ca->found = 0;
117}
118
119static inline void bictcp_hystart_reset(struct sock *sk)
120{
121 struct tcp_sock *tp = tcp_sk(sk);
122 struct bictcp *ca = inet_csk_ca(sk);
123
124 ca->round_start = ca->last_jiffies = jiffies;
125 ca->end_seq = tp->snd_nxt;
126 ca->curr_rtt = 0;
127 ca->sample_cnt = 0;
128}
129
130static void bictcp_init(struct sock *sk)
131{
132 bictcp_reset(inet_csk_ca(sk));
133
134 if (hystart)
135 bictcp_hystart_reset(sk);
136
137 if (!hystart && initial_ssthresh)
138 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
139}
140
141
142
143
144
145static u32 cubic_root(u64 a)
146{
147 u32 x, b, shift;
148
149
150
151
152
153
154
155
156 static const u8 v[] = {
157 0, 54, 54, 54, 118, 118, 118, 118,
158 123, 129, 134, 138, 143, 147, 151, 156,
159 157, 161, 164, 168, 170, 173, 176, 179,
160 181, 185, 187, 190, 192, 194, 197, 199,
161 200, 202, 204, 206, 209, 211, 213, 215,
162 217, 219, 221, 222, 224, 225, 227, 229,
163 231, 232, 234, 236, 237, 239, 240, 242,
164 244, 245, 246, 248, 250, 251, 252, 254,
165 };
166
167 b = fls64(a);
168 if (b < 7) {
169
170 return ((u32)v[(u32)a] + 35) >> 6;
171 }
172
173 b = ((b * 84) >> 8) - 1;
174 shift = (a >> (b * 3));
175
176 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
177
178
179
180
181
182
183
184 x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1)));
185 x = ((x * 341) >> 10);
186 return x;
187}
188
189
190
191
192static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
193{
194 u64 offs;
195 u32 delta, t, bic_target, max_cnt;
196
197 ca->ack_cnt++;
198
199 if (ca->last_cwnd == cwnd &&
200 (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
201 return;
202
203 ca->last_cwnd = cwnd;
204 ca->last_time = tcp_time_stamp;
205
206 if (ca->epoch_start == 0) {
207 ca->epoch_start = tcp_time_stamp;
208 ca->ack_cnt = 1;
209 ca->tcp_cwnd = cwnd;
210
211 if (ca->last_max_cwnd <= cwnd) {
212 ca->bic_K = 0;
213 ca->bic_origin_point = cwnd;
214 } else {
215
216
217
218 ca->bic_K = cubic_root(cube_factor
219 * (ca->last_max_cwnd - cwnd));
220 ca->bic_origin_point = ca->last_max_cwnd;
221 }
222 }
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239 t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
240 << BICTCP_HZ) / HZ;
241
242 if (t < ca->bic_K)
243 offs = ca->bic_K - t;
244 else
245 offs = t - ca->bic_K;
246
247
248 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
249 if (t < ca->bic_K)
250 bic_target = ca->bic_origin_point - delta;
251 else
252 bic_target = ca->bic_origin_point + delta;
253
254
255 if (bic_target > cwnd) {
256 ca->cnt = cwnd / (bic_target - cwnd);
257 } else {
258 ca->cnt = 100 * cwnd;
259 }
260
261
262 if (tcp_friendliness) {
263 u32 scale = beta_scale;
264 delta = (cwnd * scale) >> 3;
265 while (ca->ack_cnt > delta) {
266 ca->ack_cnt -= delta;
267 ca->tcp_cwnd++;
268 }
269
270 if (ca->tcp_cwnd > cwnd){
271 delta = ca->tcp_cwnd - cwnd;
272 max_cnt = cwnd / delta;
273 if (ca->cnt > max_cnt)
274 ca->cnt = max_cnt;
275 }
276 }
277
278 ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
279 if (ca->cnt == 0)
280 ca->cnt = 1;
281}
282
283static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
284{
285 struct tcp_sock *tp = tcp_sk(sk);
286 struct bictcp *ca = inet_csk_ca(sk);
287
288 if (!tcp_is_cwnd_limited(sk, in_flight))
289 return;
290
291 if (tp->snd_cwnd <= tp->snd_ssthresh) {
292 if (hystart && after(ack, ca->end_seq))
293 bictcp_hystart_reset(sk);
294 tcp_slow_start(tp);
295 } else {
296 bictcp_update(ca, tp->snd_cwnd);
297 tcp_cong_avoid_ai(tp, ca->cnt);
298 }
299
300}
301
302static u32 bictcp_recalc_ssthresh(struct sock *sk)
303{
304 const struct tcp_sock *tp = tcp_sk(sk);
305 struct bictcp *ca = inet_csk_ca(sk);
306
307 ca->epoch_start = 0;
308
309
310 if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
311 ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
312 / (2 * BICTCP_BETA_SCALE);
313 else
314 ca->last_max_cwnd = tp->snd_cwnd;
315
316 ca->loss_cwnd = tp->snd_cwnd;
317
318 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
319}
320
321static u32 bictcp_undo_cwnd(struct sock *sk)
322{
323 struct bictcp *ca = inet_csk_ca(sk);
324
325 return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd);
326}
327
328static void bictcp_state(struct sock *sk, u8 new_state)
329{
330 if (new_state == TCP_CA_Loss) {
331 bictcp_reset(inet_csk_ca(sk));
332 bictcp_hystart_reset(sk);
333 }
334}
335
336static void hystart_update(struct sock *sk, u32 delay)
337{
338 struct tcp_sock *tp = tcp_sk(sk);
339 struct bictcp *ca = inet_csk_ca(sk);
340
341 if (!(ca->found & hystart_detect)) {
342 u32 curr_jiffies = jiffies;
343
344
345 if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) {
346 ca->last_jiffies = curr_jiffies;
347 if (curr_jiffies - ca->round_start >= ca->delay_min>>4)
348 ca->found |= HYSTART_ACK_TRAIN;
349 }
350
351
352 if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
353 if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
354 ca->curr_rtt = delay;
355
356 ca->sample_cnt++;
357 } else {
358 if (ca->curr_rtt > ca->delay_min +
359 HYSTART_DELAY_THRESH(ca->delay_min>>4))
360 ca->found |= HYSTART_DELAY;
361 }
362
363
364
365
366 if (ca->found & hystart_detect)
367 tp->snd_ssthresh = tp->snd_cwnd;
368 }
369}
370
371
372
373
374static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
375{
376 const struct inet_connection_sock *icsk = inet_csk(sk);
377 const struct tcp_sock *tp = tcp_sk(sk);
378 struct bictcp *ca = inet_csk_ca(sk);
379 u32 delay;
380
381 if (icsk->icsk_ca_state == TCP_CA_Open) {
382 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
383 ca->delayed_ack += cnt;
384 }
385
386
387 if (rtt_us < 0)
388 return;
389
390
391 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
392 return;
393
394 delay = usecs_to_jiffies(rtt_us) << 3;
395 if (delay == 0)
396 delay = 1;
397
398
399 if (ca->delay_min == 0 || ca->delay_min > delay)
400 ca->delay_min = delay;
401
402
403 if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
404 tp->snd_cwnd >= hystart_low_window)
405 hystart_update(sk, delay);
406}
407
408static struct tcp_congestion_ops cubictcp = {
409 .init = bictcp_init,
410 .ssthresh = bictcp_recalc_ssthresh,
411 .cong_avoid = bictcp_cong_avoid,
412 .set_state = bictcp_state,
413 .undo_cwnd = bictcp_undo_cwnd,
414 .pkts_acked = bictcp_acked,
415 .owner = THIS_MODULE,
416 .name = "cubic",
417};
418
419static int __init cubictcp_register(void)
420{
421 BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
422
423
424
425
426
427 beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
428
429 cube_rtt_scale = (bic_scale * 10);
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445 cube_factor = 1ull << (10+3*BICTCP_HZ);
446
447
448 do_div(cube_factor, bic_scale * 10);
449
450 return tcp_register_congestion_control(&cubictcp);
451}
452
453static void __exit cubictcp_unregister(void)
454{
455 tcp_unregister_congestion_control(&cubictcp);
456}
457
458module_init(cubictcp_register);
459module_exit(cubictcp_unregister);
460
461MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
462MODULE_LICENSE("GPL");
463MODULE_DESCRIPTION("CUBIC TCP");
464MODULE_VERSION("2.3");
465