linux/net/rxrpc/rtt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* RTT/RTO calculation.
   3 *
   4 * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com)
   5 *
   6 * https://tools.ietf.org/html/rfc6298
   7 * https://tools.ietf.org/html/rfc1122#section-4.2.3.1
   8 * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
   9 */
  10
  11#include <linux/net.h>
  12#include "ar-internal.h"
  13
  14#define RXRPC_RTO_MAX   ((unsigned)(120 * HZ))
  15#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ))   /* RFC6298 2.1 initial RTO value        */
  16#define rxrpc_jiffies32 ((u32)jiffies)          /* As rxrpc_jiffies32 */
  17
  18static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
  19{
  20        return 200;
  21}
  22
  23static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
  24{
  25        return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
  26}
  27
  28static u32 rxrpc_bound_rto(u32 rto)
  29{
  30        return min(rto, RXRPC_RTO_MAX);
  31}
  32
  33/*
  34 * Called to compute a smoothed rtt estimate. The data fed to this
  35 * routine either comes from timestamps, or from segments that were
  36 * known _not_ to have been retransmitted [see Karn/Partridge
  37 * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
  38 * piece by Van Jacobson.
  39 * NOTE: the next three routines used to be one big routine.
  40 * To save cycles in the RFC 1323 implementation it was better to break
  41 * it up into three procedures. -- erics
  42 */
  43static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
  44{
  45        long m = sample_rtt_us; /* RTT */
  46        u32 srtt = peer->srtt_us;
  47
  48        /*      The following amusing code comes from Jacobson's
  49         *      article in SIGCOMM '88.  Note that rtt and mdev
  50         *      are scaled versions of rtt and mean deviation.
  51         *      This is designed to be as fast as possible
  52         *      m stands for "measurement".
  53         *
  54         *      On a 1990 paper the rto value is changed to:
  55         *      RTO = rtt + 4 * mdev
  56         *
  57         * Funny. This algorithm seems to be very broken.
  58         * These formulae increase RTO, when it should be decreased, increase
  59         * too slowly, when it should be increased quickly, decrease too quickly
  60         * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
  61         * does not matter how to _calculate_ it. Seems, it was trap
  62         * that VJ failed to avoid. 8)
  63         */
  64        if (srtt != 0) {
  65                m -= (srtt >> 3);       /* m is now error in rtt est */
  66                srtt += m;              /* rtt = 7/8 rtt + 1/8 new */
  67                if (m < 0) {
  68                        m = -m;         /* m is now abs(error) */
  69                        m -= (peer->mdev_us >> 2);   /* similar update on mdev */
  70                        /* This is similar to one of Eifel findings.
  71                         * Eifel blocks mdev updates when rtt decreases.
  72                         * This solution is a bit different: we use finer gain
  73                         * for mdev in this case (alpha*beta).
  74                         * Like Eifel it also prevents growth of rto,
  75                         * but also it limits too fast rto decreases,
  76                         * happening in pure Eifel.
  77                         */
  78                        if (m > 0)
  79                                m >>= 3;
  80                } else {
  81                        m -= (peer->mdev_us >> 2);   /* similar update on mdev */
  82                }
  83
  84                peer->mdev_us += m;             /* mdev = 3/4 mdev + 1/4 new */
  85                if (peer->mdev_us > peer->mdev_max_us) {
  86                        peer->mdev_max_us = peer->mdev_us;
  87                        if (peer->mdev_max_us > peer->rttvar_us)
  88                                peer->rttvar_us = peer->mdev_max_us;
  89                }
  90        } else {
  91                /* no previous measure. */
  92                srtt = m << 3;          /* take the measured time to be rtt */
  93                peer->mdev_us = m << 1; /* make sure rto = 3*rtt */
  94                peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer));
  95                peer->mdev_max_us = peer->rttvar_us;
  96        }
  97
  98        peer->srtt_us = max(1U, srtt);
  99}
 100
 101/*
 102 * Calculate rto without backoff.  This is the second half of Van Jacobson's
 103 * routine referred to above.
 104 */
 105static void rxrpc_set_rto(struct rxrpc_peer *peer)
 106{
 107        u32 rto;
 108
 109        /* 1. If rtt variance happened to be less 50msec, it is hallucination.
 110         *    It cannot be less due to utterly erratic ACK generation made
 111         *    at least by solaris and freebsd. "Erratic ACKs" has _nothing_
 112         *    to do with delayed acks, because at cwnd>2 true delack timeout
 113         *    is invisible. Actually, Linux-2.4 also generates erratic
 114         *    ACKs in some circumstances.
 115         */
 116        rto = __rxrpc_set_rto(peer);
 117
 118        /* 2. Fixups made earlier cannot be right.
 119         *    If we do not estimate RTO correctly without them,
 120         *    all the algo is pure shit and should be replaced
 121         *    with correct one. It is exactly, which we pretend to do.
 122         */
 123
 124        /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
 125         * guarantees that rto is higher.
 126         */
 127        peer->rto_j = rxrpc_bound_rto(rto);
 128}
 129
 130static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
 131{
 132        if (rtt_us < 0)
 133                return;
 134
 135        //rxrpc_update_rtt_min(peer, rtt_us);
 136        rxrpc_rtt_estimator(peer, rtt_us);
 137        rxrpc_set_rto(peer);
 138
 139        /* RFC6298: only reset backoff on valid RTT measurement. */
 140        peer->backoff = 0;
 141}
 142
 143/*
 144 * Add RTT information to cache.  This is called in softirq mode and has
 145 * exclusive access to the peer RTT data.
 146 */
 147void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
 148                        int rtt_slot,
 149                        rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
 150                        ktime_t send_time, ktime_t resp_time)
 151{
 152        struct rxrpc_peer *peer = call->peer;
 153        s64 rtt_us;
 154
 155        rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
 156        if (rtt_us < 0)
 157                return;
 158
 159        spin_lock(&peer->rtt_input_lock);
 160        rxrpc_ack_update_rtt(peer, rtt_us);
 161        if (peer->rtt_count < 3)
 162                peer->rtt_count++;
 163        spin_unlock(&peer->rtt_input_lock);
 164
 165        trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
 166                           peer->srtt_us >> 3, peer->rto_j);
 167}
 168
 169/*
 170 * Get the retransmission timeout to set in jiffies, backing it off each time
 171 * we retransmit.
 172 */
 173unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
 174{
 175        u64 timo_j;
 176        u8 backoff = READ_ONCE(peer->backoff);
 177
 178        timo_j = peer->rto_j;
 179        timo_j <<= backoff;
 180        if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
 181                WRITE_ONCE(peer->backoff, backoff + 1);
 182
 183        if (timo_j < 1)
 184                timo_j = 1;
 185
 186        return timo_j;
 187}
 188
 189void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
 190{
 191        peer->rto_j     = RXRPC_TIMEOUT_INIT;
 192        peer->mdev_us   = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
 193        peer->backoff   = 0;
 194        //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
 195}
 196