linux/net/ipv4/tcp_metrics.c
<<
>>
Prefs
   1#include <linux/rcupdate.h>
   2#include <linux/spinlock.h>
   3#include <linux/jiffies.h>
   4#include <linux/bootmem.h>
   5#include <linux/module.h>
   6#include <linux/cache.h>
   7#include <linux/slab.h>
   8#include <linux/init.h>
   9#include <linux/tcp.h>
  10#include <linux/hash.h>
  11
  12#include <net/inet_connection_sock.h>
  13#include <net/net_namespace.h>
  14#include <net/request_sock.h>
  15#include <net/inetpeer.h>
  16#include <net/sock.h>
  17#include <net/ipv6.h>
  18#include <net/dst.h>
  19#include <net/tcp.h>
  20
  21int sysctl_tcp_nometrics_save __read_mostly;
  22
  23enum tcp_metric_index {
  24        TCP_METRIC_RTT,
  25        TCP_METRIC_RTTVAR,
  26        TCP_METRIC_SSTHRESH,
  27        TCP_METRIC_CWND,
  28        TCP_METRIC_REORDERING,
  29
  30        /* Always last.  */
  31        TCP_METRIC_MAX,
  32};
  33
  34struct tcp_fastopen_metrics {
  35        u16     mss;
  36        u16     syn_loss:10;            /* Recurring Fast Open SYN losses */
  37        unsigned long   last_syn_loss;  /* Last Fast Open SYN loss */
  38        struct  tcp_fastopen_cookie     cookie;
  39};
  40
  41struct tcp_metrics_block {
  42        struct tcp_metrics_block __rcu  *tcpm_next;
  43        struct inetpeer_addr            tcpm_addr;
  44        unsigned long                   tcpm_stamp;
  45        u32                             tcpm_ts;
  46        u32                             tcpm_ts_stamp;
  47        u32                             tcpm_lock;
  48        u32                             tcpm_vals[TCP_METRIC_MAX];
  49        struct tcp_fastopen_metrics     tcpm_fastopen;
  50};
  51
  52static bool tcp_metric_locked(struct tcp_metrics_block *tm,
  53                              enum tcp_metric_index idx)
  54{
  55        return tm->tcpm_lock & (1 << idx);
  56}
  57
  58static u32 tcp_metric_get(struct tcp_metrics_block *tm,
  59                          enum tcp_metric_index idx)
  60{
  61        return tm->tcpm_vals[idx];
  62}
  63
  64static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm,
  65                                  enum tcp_metric_index idx)
  66{
  67        return msecs_to_jiffies(tm->tcpm_vals[idx]);
  68}
  69
  70static void tcp_metric_set(struct tcp_metrics_block *tm,
  71                           enum tcp_metric_index idx,
  72                           u32 val)
  73{
  74        tm->tcpm_vals[idx] = val;
  75}
  76
  77static void tcp_metric_set_msecs(struct tcp_metrics_block *tm,
  78                                 enum tcp_metric_index idx,
  79                                 u32 val)
  80{
  81        tm->tcpm_vals[idx] = jiffies_to_msecs(val);
  82}
  83
  84static bool addr_same(const struct inetpeer_addr *a,
  85                      const struct inetpeer_addr *b)
  86{
  87        const struct in6_addr *a6, *b6;
  88
  89        if (a->family != b->family)
  90                return false;
  91        if (a->family == AF_INET)
  92                return a->addr.a4 == b->addr.a4;
  93
  94        a6 = (const struct in6_addr *) &a->addr.a6[0];
  95        b6 = (const struct in6_addr *) &b->addr.a6[0];
  96
  97        return ipv6_addr_equal(a6, b6);
  98}
  99
 100struct tcpm_hash_bucket {
 101        struct tcp_metrics_block __rcu  *chain;
 102};
 103
 104static DEFINE_SPINLOCK(tcp_metrics_lock);
 105
 106static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
 107{
 108        u32 val;
 109
 110        tm->tcpm_stamp = jiffies;
 111
 112        val = 0;
 113        if (dst_metric_locked(dst, RTAX_RTT))
 114                val |= 1 << TCP_METRIC_RTT;
 115        if (dst_metric_locked(dst, RTAX_RTTVAR))
 116                val |= 1 << TCP_METRIC_RTTVAR;
 117        if (dst_metric_locked(dst, RTAX_SSTHRESH))
 118                val |= 1 << TCP_METRIC_SSTHRESH;
 119        if (dst_metric_locked(dst, RTAX_CWND))
 120                val |= 1 << TCP_METRIC_CWND;
 121        if (dst_metric_locked(dst, RTAX_REORDERING))
 122                val |= 1 << TCP_METRIC_REORDERING;
 123        tm->tcpm_lock = val;
 124
 125        tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT);
 126        tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR);
 127        tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
 128        tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
 129        tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
 130        tm->tcpm_ts = 0;
 131        tm->tcpm_ts_stamp = 0;
 132        tm->tcpm_fastopen.mss = 0;
 133        tm->tcpm_fastopen.syn_loss = 0;
 134        tm->tcpm_fastopen.cookie.len = 0;
 135}
 136
 137static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
 138                                          struct inetpeer_addr *addr,
 139                                          unsigned int hash,
 140                                          bool reclaim)
 141{
 142        struct tcp_metrics_block *tm;
 143        struct net *net;
 144
 145        spin_lock_bh(&tcp_metrics_lock);
 146        net = dev_net(dst->dev);
 147        if (unlikely(reclaim)) {
 148                struct tcp_metrics_block *oldest;
 149
 150                oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain);
 151                for (tm = rcu_dereference(oldest->tcpm_next); tm;
 152                     tm = rcu_dereference(tm->tcpm_next)) {
 153                        if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
 154                                oldest = tm;
 155                }
 156                tm = oldest;
 157        } else {
 158                tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
 159                if (!tm)
 160                        goto out_unlock;
 161        }
 162        tm->tcpm_addr = *addr;
 163
 164        tcpm_suck_dst(tm, dst);
 165
 166        if (likely(!reclaim)) {
 167                tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
 168                rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm);
 169        }
 170
 171out_unlock:
 172        spin_unlock_bh(&tcp_metrics_lock);
 173        return tm;
 174}
 175
 176#define TCP_METRICS_TIMEOUT             (60 * 60 * HZ)
 177
 178static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
 179{
 180        if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
 181                tcpm_suck_dst(tm, dst);
 182}
 183
 184#define TCP_METRICS_RECLAIM_DEPTH       5
 185#define TCP_METRICS_RECLAIM_PTR         (struct tcp_metrics_block *) 0x1UL
 186
 187static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth)
 188{
 189        if (tm)
 190                return tm;
 191        if (depth > TCP_METRICS_RECLAIM_DEPTH)
 192                return TCP_METRICS_RECLAIM_PTR;
 193        return NULL;
 194}
 195
 196static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr,
 197                                                   struct net *net, unsigned int hash)
 198{
 199        struct tcp_metrics_block *tm;
 200        int depth = 0;
 201
 202        for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
 203             tm = rcu_dereference(tm->tcpm_next)) {
 204                if (addr_same(&tm->tcpm_addr, addr))
 205                        break;
 206                depth++;
 207        }
 208        return tcp_get_encode(tm, depth);
 209}
 210
 211static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
 212                                                       struct dst_entry *dst)
 213{
 214        struct tcp_metrics_block *tm;
 215        struct inetpeer_addr addr;
 216        unsigned int hash;
 217        struct net *net;
 218
 219        addr.family = req->rsk_ops->family;
 220        switch (addr.family) {
 221        case AF_INET:
 222                addr.addr.a4 = inet_rsk(req)->rmt_addr;
 223                hash = (__force unsigned int) addr.addr.a4;
 224                break;
 225        case AF_INET6:
 226                *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr;
 227                hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr);
 228                break;
 229        default:
 230                return NULL;
 231        }
 232
 233        net = dev_net(dst->dev);
 234        hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
 235
 236        for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
 237             tm = rcu_dereference(tm->tcpm_next)) {
 238                if (addr_same(&tm->tcpm_addr, &addr))
 239                        break;
 240        }
 241        tcpm_check_stamp(tm, dst);
 242        return tm;
 243}
 244
 245static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw)
 246{
 247        struct inet6_timewait_sock *tw6;
 248        struct tcp_metrics_block *tm;
 249        struct inetpeer_addr addr;
 250        unsigned int hash;
 251        struct net *net;
 252
 253        addr.family = tw->tw_family;
 254        switch (addr.family) {
 255        case AF_INET:
 256                addr.addr.a4 = tw->tw_daddr;
 257                hash = (__force unsigned int) addr.addr.a4;
 258                break;
 259        case AF_INET6:
 260                tw6 = inet6_twsk((struct sock *)tw);
 261                *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr;
 262                hash = ipv6_addr_hash(&tw6->tw_v6_daddr);
 263                break;
 264        default:
 265                return NULL;
 266        }
 267
 268        net = twsk_net(tw);
 269        hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
 270
 271        for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
 272             tm = rcu_dereference(tm->tcpm_next)) {
 273                if (addr_same(&tm->tcpm_addr, &addr))
 274                        break;
 275        }
 276        return tm;
 277}
 278
 279static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
 280                                                 struct dst_entry *dst,
 281                                                 bool create)
 282{
 283        struct tcp_metrics_block *tm;
 284        struct inetpeer_addr addr;
 285        unsigned int hash;
 286        struct net *net;
 287        bool reclaim;
 288
 289        addr.family = sk->sk_family;
 290        switch (addr.family) {
 291        case AF_INET:
 292                addr.addr.a4 = inet_sk(sk)->inet_daddr;
 293                hash = (__force unsigned int) addr.addr.a4;
 294                break;
 295        case AF_INET6:
 296                *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr;
 297                hash = ipv6_addr_hash(&inet6_sk(sk)->daddr);
 298                break;
 299        default:
 300                return NULL;
 301        }
 302
 303        net = dev_net(dst->dev);
 304        hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
 305
 306        tm = __tcp_get_metrics(&addr, net, hash);
 307        reclaim = false;
 308        if (tm == TCP_METRICS_RECLAIM_PTR) {
 309                reclaim = true;
 310                tm = NULL;
 311        }
 312        if (!tm && create)
 313                tm = tcpm_new(dst, &addr, hash, reclaim);
 314        else
 315                tcpm_check_stamp(tm, dst);
 316
 317        return tm;
 318}
 319
 320/* Save metrics learned by this TCP session.  This function is called
 321 * only, when TCP finishes successfully i.e. when it enters TIME-WAIT
 322 * or goes from LAST-ACK to CLOSE.
 323 */
 324void tcp_update_metrics(struct sock *sk)
 325{
 326        const struct inet_connection_sock *icsk = inet_csk(sk);
 327        struct dst_entry *dst = __sk_dst_get(sk);
 328        struct tcp_sock *tp = tcp_sk(sk);
 329        struct tcp_metrics_block *tm;
 330        unsigned long rtt;
 331        u32 val;
 332        int m;
 333
 334        if (sysctl_tcp_nometrics_save || !dst)
 335                return;
 336
 337        if (dst->flags & DST_HOST)
 338                dst_confirm(dst);
 339
 340        rcu_read_lock();
 341        if (icsk->icsk_backoff || !tp->srtt) {
 342                /* This session failed to estimate rtt. Why?
 343                 * Probably, no packets returned in time.  Reset our
 344                 * results.
 345                 */
 346                tm = tcp_get_metrics(sk, dst, false);
 347                if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT))
 348                        tcp_metric_set(tm, TCP_METRIC_RTT, 0);
 349                goto out_unlock;
 350        } else
 351                tm = tcp_get_metrics(sk, dst, true);
 352
 353        if (!tm)
 354                goto out_unlock;
 355
 356        rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
 357        m = rtt - tp->srtt;
 358
 359        /* If newly calculated rtt larger than stored one, store new
 360         * one. Otherwise, use EWMA. Remember, rtt overestimation is
 361         * always better than underestimation.
 362         */
 363        if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
 364                if (m <= 0)
 365                        rtt = tp->srtt;
 366                else
 367                        rtt -= (m >> 3);
 368                tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt);
 369        }
 370
 371        if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
 372                unsigned long var;
 373
 374                if (m < 0)
 375                        m = -m;
 376
 377                /* Scale deviation to rttvar fixed point */
 378                m >>= 1;
 379                if (m < tp->mdev)
 380                        m = tp->mdev;
 381
 382                var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
 383                if (m >= var)
 384                        var = m;
 385                else
 386                        var -= (var - m) >> 2;
 387
 388                tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var);
 389        }
 390
 391        if (tcp_in_initial_slowstart(tp)) {
 392                /* Slow start still did not finish. */
 393                if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
 394                        val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
 395                        if (val && (tp->snd_cwnd >> 1) > val)
 396                                tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
 397                                               tp->snd_cwnd >> 1);
 398                }
 399                if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
 400                        val = tcp_metric_get(tm, TCP_METRIC_CWND);
 401                        if (tp->snd_cwnd > val)
 402                                tcp_metric_set(tm, TCP_METRIC_CWND,
 403                                               tp->snd_cwnd);
 404                }
 405        } else if (tp->snd_cwnd > tp->snd_ssthresh &&
 406                   icsk->icsk_ca_state == TCP_CA_Open) {
 407                /* Cong. avoidance phase, cwnd is reliable. */
 408                if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
 409                        tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
 410                                       max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
 411                if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
 412                        val = tcp_metric_get(tm, TCP_METRIC_CWND);
 413                        tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1);
 414                }
 415        } else {
 416                /* Else slow start did not finish, cwnd is non-sense,
 417                 * ssthresh may be also invalid.
 418                 */
 419                if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
 420                        val = tcp_metric_get(tm, TCP_METRIC_CWND);
 421                        tcp_metric_set(tm, TCP_METRIC_CWND,
 422                                       (val + tp->snd_ssthresh) >> 1);
 423                }
 424                if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
 425                        val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
 426                        if (val && tp->snd_ssthresh > val)
 427                                tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
 428                                               tp->snd_ssthresh);
 429                }
 430                if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
 431                        val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
 432                        if (val < tp->reordering &&
 433                            tp->reordering != sysctl_tcp_reordering)
 434                                tcp_metric_set(tm, TCP_METRIC_REORDERING,
 435                                               tp->reordering);
 436                }
 437        }
 438        tm->tcpm_stamp = jiffies;
 439out_unlock:
 440        rcu_read_unlock();
 441}
 442
 443/* Initialize metrics on socket. */
 444
 445void tcp_init_metrics(struct sock *sk)
 446{
 447        struct dst_entry *dst = __sk_dst_get(sk);
 448        struct tcp_sock *tp = tcp_sk(sk);
 449        struct tcp_metrics_block *tm;
 450        u32 val;
 451
 452        if (dst == NULL)
 453                goto reset;
 454
 455        dst_confirm(dst);
 456
 457        rcu_read_lock();
 458        tm = tcp_get_metrics(sk, dst, true);
 459        if (!tm) {
 460                rcu_read_unlock();
 461                goto reset;
 462        }
 463
 464        if (tcp_metric_locked(tm, TCP_METRIC_CWND))
 465                tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
 466
 467        val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
 468        if (val) {
 469                tp->snd_ssthresh = val;
 470                if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
 471                        tp->snd_ssthresh = tp->snd_cwnd_clamp;
 472        } else {
 473                /* ssthresh may have been reduced unnecessarily during.
 474                 * 3WHS. Restore it back to its initial default.
 475                 */
 476                tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 477        }
 478        val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
 479        if (val && tp->reordering != val) {
 480                tcp_disable_fack(tp);
 481                tcp_disable_early_retrans(tp);
 482                tp->reordering = val;
 483        }
 484
 485        val = tcp_metric_get(tm, TCP_METRIC_RTT);
 486        if (val == 0 || tp->srtt == 0) {
 487                rcu_read_unlock();
 488                goto reset;
 489        }
 490        /* Initial rtt is determined from SYN,SYN-ACK.
 491         * The segment is small and rtt may appear much
 492         * less than real one. Use per-dst memory
 493         * to make it more realistic.
 494         *
 495         * A bit of theory. RTT is time passed after "normal" sized packet
 496         * is sent until it is ACKed. In normal circumstances sending small
 497         * packets force peer to delay ACKs and calculation is correct too.
 498         * The algorithm is adaptive and, provided we follow specs, it
 499         * NEVER underestimate RTT. BUT! If peer tries to make some clever
 500         * tricks sort of "quick acks" for time long enough to decrease RTT
 501         * to low value, and then abruptly stops to do it and starts to delay
 502         * ACKs, wait for troubles.
 503         */
 504        val = msecs_to_jiffies(val);
 505        if (val > tp->srtt) {
 506                tp->srtt = val;
 507                tp->rtt_seq = tp->snd_nxt;
 508        }
 509        val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
 510        if (val > tp->mdev) {
 511                tp->mdev = val;
 512                tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
 513        }
 514        rcu_read_unlock();
 515
 516        tcp_set_rto(sk);
 517reset:
 518        if (tp->srtt == 0) {
 519                /* RFC6298: 5.7 We've failed to get a valid RTT sample from
 520                 * 3WHS. This is most likely due to retransmission,
 521                 * including spurious one. Reset the RTO back to 3secs
 522                 * from the more aggressive 1sec to avoid more spurious
 523                 * retransmission.
 524                 */
 525                tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
 526                inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
 527        }
 528        /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
 529         * retransmitted. In light of RFC6298 more aggressive 1sec
 530         * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
 531         * retransmission has occurred.
 532         */
 533        if (tp->total_retrans > 1)
 534                tp->snd_cwnd = 1;
 535        else
 536                tp->snd_cwnd = tcp_init_cwnd(tp, dst);
 537        tp->snd_cwnd_stamp = tcp_time_stamp;
 538}
 539
 540bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check)
 541{
 542        struct tcp_metrics_block *tm;
 543        bool ret;
 544
 545        if (!dst)
 546                return false;
 547
 548        rcu_read_lock();
 549        tm = __tcp_get_metrics_req(req, dst);
 550        if (paws_check) {
 551                if (tm &&
 552                    (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
 553                    (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW)
 554                        ret = false;
 555                else
 556                        ret = true;
 557        } else {
 558                if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp)
 559                        ret = true;
 560                else
 561                        ret = false;
 562        }
 563        rcu_read_unlock();
 564
 565        return ret;
 566}
 567EXPORT_SYMBOL_GPL(tcp_peer_is_proven);
 568
 569void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst)
 570{
 571        struct tcp_metrics_block *tm;
 572
 573        rcu_read_lock();
 574        tm = tcp_get_metrics(sk, dst, true);
 575        if (tm) {
 576                struct tcp_sock *tp = tcp_sk(sk);
 577
 578                if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) {
 579                        tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp;
 580                        tp->rx_opt.ts_recent = tm->tcpm_ts;
 581                }
 582        }
 583        rcu_read_unlock();
 584}
 585EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp);
 586
 587/* VJ's idea. Save last timestamp seen from this destination and hold
 588 * it at least for normal timewait interval to use for duplicate
 589 * segment detection in subsequent connections, before they enter
 590 * synchronized state.
 591 */
 592bool tcp_remember_stamp(struct sock *sk)
 593{
 594        struct dst_entry *dst = __sk_dst_get(sk);
 595        bool ret = false;
 596
 597        if (dst) {
 598                struct tcp_metrics_block *tm;
 599
 600                rcu_read_lock();
 601                tm = tcp_get_metrics(sk, dst, true);
 602                if (tm) {
 603                        struct tcp_sock *tp = tcp_sk(sk);
 604
 605                        if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 ||
 606                            ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
 607                             tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
 608                                tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
 609                                tm->tcpm_ts = tp->rx_opt.ts_recent;
 610                        }
 611                        ret = true;
 612                }
 613                rcu_read_unlock();
 614        }
 615        return ret;
 616}
 617
 618bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
 619{
 620        struct tcp_metrics_block *tm;
 621        bool ret = false;
 622
 623        rcu_read_lock();
 624        tm = __tcp_get_metrics_tw(tw);
 625        if (tm) {
 626                const struct tcp_timewait_sock *tcptw;
 627                struct sock *sk = (struct sock *) tw;
 628
 629                tcptw = tcp_twsk(sk);
 630                if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 ||
 631                    ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL &&
 632                     tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
 633                        tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
 634                        tm->tcpm_ts        = tcptw->tw_ts_recent;
 635                }
 636                ret = true;
 637        }
 638        rcu_read_unlock();
 639
 640        return ret;
 641}
 642
 643static DEFINE_SEQLOCK(fastopen_seqlock);
 644
 645void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
 646                            struct tcp_fastopen_cookie *cookie,
 647                            int *syn_loss, unsigned long *last_syn_loss)
 648{
 649        struct tcp_metrics_block *tm;
 650
 651        rcu_read_lock();
 652        tm = tcp_get_metrics(sk, __sk_dst_get(sk), false);
 653        if (tm) {
 654                struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
 655                unsigned int seq;
 656
 657                do {
 658                        seq = read_seqbegin(&fastopen_seqlock);
 659                        if (tfom->mss)
 660                                *mss = tfom->mss;
 661                        *cookie = tfom->cookie;
 662                        *syn_loss = tfom->syn_loss;
 663                        *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0;
 664                } while (read_seqretry(&fastopen_seqlock, seq));
 665        }
 666        rcu_read_unlock();
 667}
 668
 669void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
 670                            struct tcp_fastopen_cookie *cookie, bool syn_lost)
 671{
 672        struct tcp_metrics_block *tm;
 673
 674        rcu_read_lock();
 675        tm = tcp_get_metrics(sk, __sk_dst_get(sk), true);
 676        if (tm) {
 677                struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
 678
 679                write_seqlock_bh(&fastopen_seqlock);
 680                tfom->mss = mss;
 681                if (cookie->len > 0)
 682                        tfom->cookie = *cookie;
 683                if (syn_lost) {
 684                        ++tfom->syn_loss;
 685                        tfom->last_syn_loss = jiffies;
 686                } else
 687                        tfom->syn_loss = 0;
 688                write_sequnlock_bh(&fastopen_seqlock);
 689        }
 690        rcu_read_unlock();
 691}
 692
 693static unsigned int tcpmhash_entries;
 694static int __init set_tcpmhash_entries(char *str)
 695{
 696        ssize_t ret;
 697
 698        if (!str)
 699                return 0;
 700
 701        ret = kstrtouint(str, 0, &tcpmhash_entries);
 702        if (ret)
 703                return 0;
 704
 705        return 1;
 706}
 707__setup("tcpmhash_entries=", set_tcpmhash_entries);
 708
 709static int __net_init tcp_net_metrics_init(struct net *net)
 710{
 711        size_t size;
 712        unsigned int slots;
 713
 714        slots = tcpmhash_entries;
 715        if (!slots) {
 716                if (totalram_pages >= 128 * 1024)
 717                        slots = 16 * 1024;
 718                else
 719                        slots = 8 * 1024;
 720        }
 721
 722        net->ipv4.tcp_metrics_hash_log = order_base_2(slots);
 723        size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log;
 724
 725        net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL);
 726        if (!net->ipv4.tcp_metrics_hash)
 727                return -ENOMEM;
 728
 729        return 0;
 730}
 731
 732static void __net_exit tcp_net_metrics_exit(struct net *net)
 733{
 734        unsigned int i;
 735
 736        for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
 737                struct tcp_metrics_block *tm, *next;
 738
 739                tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
 740                while (tm) {
 741                        next = rcu_dereference_protected(tm->tcpm_next, 1);
 742                        kfree(tm);
 743                        tm = next;
 744                }
 745        }
 746        kfree(net->ipv4.tcp_metrics_hash);
 747}
 748
 749static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
 750        .init   =       tcp_net_metrics_init,
 751        .exit   =       tcp_net_metrics_exit,
 752};
 753
 754void __init tcp_metrics_init(void)
 755{
 756        register_pernet_subsys(&tcp_net_metrics_ops);
 757}
 758
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.