linux/net/ipv4/inet_lro.c
<<
>>
Prefs
   1/*
   2 *  linux/net/ipv4/inet_lro.c
   3 *
   4 *  Large Receive Offload (ipv4 / tcp)
   5 *
   6 *  (C) Copyright IBM Corp. 2007
   7 *
   8 *  Authors:
   9 *       Jan-Bernd Themann <themann@de.ibm.com>
  10 *       Christoph Raisch <raisch@de.ibm.com>
  11 *
  12 *
  13 * This program is free software; you can redistribute it and/or modify
  14 * it under the terms of the GNU General Public License as published by
  15 * the Free Software Foundation; either version 2, or (at your option)
  16 * any later version.
  17 *
  18 * This program is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 * GNU General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU General Public License
  24 * along with this program; if not, write to the Free Software
  25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  26 */
  27
  28
  29#include <linux/module.h>
  30#include <linux/if_vlan.h>
  31#include <linux/inet_lro.h>
  32
  33MODULE_LICENSE("GPL");
  34MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
  35MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
  36
  37#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
  38#define IP_HDR_LEN(iph) (iph->ihl << 2)
  39#define TCP_PAYLOAD_LENGTH(iph, tcph) \
  40        (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
  41
  42#define IPH_LEN_WO_OPTIONS 5
  43#define TCPH_LEN_WO_OPTIONS 5
  44#define TCPH_LEN_W_TIMESTAMP 8
  45
  46#define LRO_MAX_PG_HLEN 64
  47
  48#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
  49
  50/*
  51 * Basic tcp checks whether packet is suitable for LRO
  52 */
  53
  54static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
  55                            int len, const struct net_lro_desc *lro_desc)
  56{
  57        /* check ip header: don't aggregate padded frames */
  58        if (ntohs(iph->tot_len) != len)
  59                return -1;
  60
  61        if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
  62                return -1;
  63
  64        if (iph->ihl != IPH_LEN_WO_OPTIONS)
  65                return -1;
  66
  67        if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
  68            tcph->rst || tcph->syn || tcph->fin)
  69                return -1;
  70
  71        if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
  72                return -1;
  73
  74        if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
  75            tcph->doff != TCPH_LEN_W_TIMESTAMP)
  76                return -1;
  77
  78        /* check tcp options (only timestamp allowed) */
  79        if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
  80                __be32 *topt = (__be32 *)(tcph + 1);
  81
  82                if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
  83                                   | (TCPOPT_TIMESTAMP << 8)
  84                                   | TCPOLEN_TIMESTAMP))
  85                        return -1;
  86
  87                /* timestamp should be in right order */
  88                topt++;
  89                if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
  90                                      ntohl(*topt)))
  91                        return -1;
  92
  93                /* timestamp reply should not be zero */
  94                topt++;
  95                if (*topt == 0)
  96                        return -1;
  97        }
  98
  99        return 0;
 100}
 101
 102static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
 103{
 104        struct iphdr *iph = lro_desc->iph;
 105        struct tcphdr *tcph = lro_desc->tcph;
 106        __be32 *p;
 107        __wsum tcp_hdr_csum;
 108
 109        tcph->ack_seq = lro_desc->tcp_ack;
 110        tcph->window = lro_desc->tcp_window;
 111
 112        if (lro_desc->tcp_saw_tstamp) {
 113                p = (__be32 *)(tcph + 1);
 114                *(p+2) = lro_desc->tcp_rcv_tsecr;
 115        }
 116
 117        iph->tot_len = htons(lro_desc->ip_tot_len);
 118
 119        iph->check = 0;
 120        iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
 121
 122        tcph->check = 0;
 123        tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
 124        lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
 125        tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 126                                        lro_desc->ip_tot_len -
 127                                        IP_HDR_LEN(iph), IPPROTO_TCP,
 128                                        lro_desc->data_csum);
 129}
 130
 131static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
 132{
 133        __wsum tcp_csum;
 134        __wsum tcp_hdr_csum;
 135        __wsum tcp_ps_hdr_csum;
 136
 137        tcp_csum = ~csum_unfold(tcph->check);
 138        tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
 139
 140        tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 141                                             len + TCP_HDR_LEN(tcph),
 142                                             IPPROTO_TCP, 0);
 143
 144        return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
 145                        tcp_ps_hdr_csum);
 146}
 147
 148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 149                          struct iphdr *iph, struct tcphdr *tcph)
 150{
 151        int nr_frags;
 152        __be32 *ptr;
 153        u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 154
 155        nr_frags = skb_shinfo(skb)->nr_frags;
 156        lro_desc->parent = skb;
 157        lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
 158        lro_desc->iph = iph;
 159        lro_desc->tcph = tcph;
 160        lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
 161        lro_desc->tcp_ack = tcph->ack_seq;
 162        lro_desc->tcp_window = tcph->window;
 163
 164        lro_desc->pkt_aggr_cnt = 1;
 165        lro_desc->ip_tot_len = ntohs(iph->tot_len);
 166
 167        if (tcph->doff == 8) {
 168                ptr = (__be32 *)(tcph+1);
 169                lro_desc->tcp_saw_tstamp = 1;
 170                lro_desc->tcp_rcv_tsval = *(ptr+1);
 171                lro_desc->tcp_rcv_tsecr = *(ptr+2);
 172        }
 173
 174        lro_desc->mss = tcp_data_len;
 175        lro_desc->active = 1;
 176
 177        lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
 178                                                tcp_data_len);
 179}
 180
 181static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
 182{
 183        memset(lro_desc, 0, sizeof(struct net_lro_desc));
 184}
 185
 186static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
 187                           struct tcphdr *tcph, int tcp_data_len)
 188{
 189        struct sk_buff *parent = lro_desc->parent;
 190        __be32 *topt;
 191
 192        lro_desc->pkt_aggr_cnt++;
 193        lro_desc->ip_tot_len += tcp_data_len;
 194        lro_desc->tcp_next_seq += tcp_data_len;
 195        lro_desc->tcp_window = tcph->window;
 196        lro_desc->tcp_ack = tcph->ack_seq;
 197
 198        /* don't update tcp_rcv_tsval, would not work with PAWS */
 199        if (lro_desc->tcp_saw_tstamp) {
 200                topt = (__be32 *) (tcph + 1);
 201                lro_desc->tcp_rcv_tsecr = *(topt + 2);
 202        }
 203
 204        lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
 205                                             lro_tcp_data_csum(iph, tcph,
 206                                                               tcp_data_len),
 207                                             parent->len);
 208
 209        parent->len += tcp_data_len;
 210        parent->data_len += tcp_data_len;
 211        if (tcp_data_len > lro_desc->mss)
 212                lro_desc->mss = tcp_data_len;
 213}
 214
 215static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 216                           struct iphdr *iph, struct tcphdr *tcph)
 217{
 218        struct sk_buff *parent = lro_desc->parent;
 219        int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 220
 221        lro_add_common(lro_desc, iph, tcph, tcp_data_len);
 222
 223        skb_pull(skb, (skb->len - tcp_data_len));
 224        parent->truesize += skb->truesize;
 225
 226        if (lro_desc->last_skb)
 227                lro_desc->last_skb->next = skb;
 228        else
 229                skb_shinfo(parent)->frag_list = skb;
 230
 231        lro_desc->last_skb = skb;
 232}
 233
 234static void lro_add_frags(struct net_lro_desc *lro_desc,
 235                          int len, int hlen, int truesize,
 236                          struct skb_frag_struct *skb_frags,
 237                          struct iphdr *iph, struct tcphdr *tcph)
 238{
 239        struct sk_buff *skb = lro_desc->parent;
 240        int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 241
 242        lro_add_common(lro_desc, iph, tcph, tcp_data_len);
 243
 244        skb->truesize += truesize;
 245
 246        skb_frags[0].page_offset += hlen;
 247        skb_frag_size_sub(&skb_frags[0], hlen);
 248
 249        while (tcp_data_len > 0) {
 250                *(lro_desc->next_frag) = *skb_frags;
 251                tcp_data_len -= skb_frag_size(skb_frags);
 252                lro_desc->next_frag++;
 253                skb_frags++;
 254                skb_shinfo(skb)->nr_frags++;
 255        }
 256}
 257
 258static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
 259                              struct iphdr *iph,
 260                              struct tcphdr *tcph)
 261{
 262        if ((lro_desc->iph->saddr != iph->saddr) ||
 263            (lro_desc->iph->daddr != iph->daddr) ||
 264            (lro_desc->tcph->source != tcph->source) ||
 265            (lro_desc->tcph->dest != tcph->dest))
 266                return -1;
 267        return 0;
 268}
 269
 270static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
 271                                         struct net_lro_desc *lro_arr,
 272                                         struct iphdr *iph,
 273                                         struct tcphdr *tcph)
 274{
 275        struct net_lro_desc *lro_desc = NULL;
 276        struct net_lro_desc *tmp;
 277        int max_desc = lro_mgr->max_desc;
 278        int i;
 279
 280        for (i = 0; i < max_desc; i++) {
 281                tmp = &lro_arr[i];
 282                if (tmp->active)
 283                        if (!lro_check_tcp_conn(tmp, iph, tcph)) {
 284                                lro_desc = tmp;
 285                                goto out;
 286                        }
 287        }
 288
 289        for (i = 0; i < max_desc; i++) {
 290                if (!lro_arr[i].active) {
 291                        lro_desc = &lro_arr[i];
 292                        goto out;
 293                }
 294        }
 295
 296        LRO_INC_STATS(lro_mgr, no_desc);
 297out:
 298        return lro_desc;
 299}
 300
 301static void lro_flush(struct net_lro_mgr *lro_mgr,
 302                      struct net_lro_desc *lro_desc)
 303{
 304        if (lro_desc->pkt_aggr_cnt > 1)
 305                lro_update_tcp_ip_header(lro_desc);
 306
 307        skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
 308
 309        if (lro_mgr->features & LRO_F_NAPI)
 310                netif_receive_skb(lro_desc->parent);
 311        else
 312                netif_rx(lro_desc->parent);
 313
 314        LRO_INC_STATS(lro_mgr, flushed);
 315        lro_clear_desc(lro_desc);
 316}
 317
 318static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
 319                          void *priv)
 320{
 321        struct net_lro_desc *lro_desc;
 322        struct iphdr *iph;
 323        struct tcphdr *tcph;
 324        u64 flags;
 325        int vlan_hdr_len = 0;
 326
 327        if (!lro_mgr->get_skb_header ||
 328            lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
 329                                    &flags, priv))
 330                goto out;
 331
 332        if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
 333                goto out;
 334
 335        lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
 336        if (!lro_desc)
 337                goto out;
 338
 339        if ((skb->protocol == htons(ETH_P_8021Q)) &&
 340            !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
 341                vlan_hdr_len = VLAN_HLEN;
 342
 343        if (!lro_desc->active) { /* start new lro session */
 344                if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
 345                        goto out;
 346
 347                skb->ip_summed = lro_mgr->ip_summed_aggr;
 348                lro_init_desc(lro_desc, skb, iph, tcph);
 349                LRO_INC_STATS(lro_mgr, aggregated);
 350                return 0;
 351        }
 352
 353        if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
 354                goto out2;
 355
 356        if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
 357                goto out2;
 358
 359        lro_add_packet(lro_desc, skb, iph, tcph);
 360        LRO_INC_STATS(lro_mgr, aggregated);
 361
 362        if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
 363            lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
 364                lro_flush(lro_mgr, lro_desc);
 365
 366        return 0;
 367
 368out2: /* send aggregated SKBs to stack */
 369        lro_flush(lro_mgr, lro_desc);
 370
 371out:
 372        return 1;
 373}
 374
 375
 376static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
 377                                   struct skb_frag_struct *frags,
 378                                   int len, int true_size,
 379                                   void *mac_hdr,
 380                                   int hlen, __wsum sum,
 381                                   u32 ip_summed)
 382{
 383        struct sk_buff *skb;
 384        struct skb_frag_struct *skb_frags;
 385        int data_len = len;
 386        int hdr_len = min(len, hlen);
 387
 388        skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
 389        if (!skb)
 390                return NULL;
 391
 392        skb_reserve(skb, lro_mgr->frag_align_pad);
 393        skb->len = len;
 394        skb->data_len = len - hdr_len;
 395        skb->truesize += true_size;
 396        skb->tail += hdr_len;
 397
 398        memcpy(skb->data, mac_hdr, hdr_len);
 399
 400        skb_frags = skb_shinfo(skb)->frags;
 401        while (data_len > 0) {
 402                *skb_frags = *frags;
 403                data_len -= skb_frag_size(frags);
 404                skb_frags++;
 405                frags++;
 406                skb_shinfo(skb)->nr_frags++;
 407        }
 408
 409        skb_shinfo(skb)->frags[0].page_offset += hdr_len;
 410        skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
 411
 412        skb->ip_summed = ip_summed;
 413        skb->csum = sum;
 414        skb->protocol = eth_type_trans(skb, lro_mgr->dev);
 415        return skb;
 416}
 417
 418static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
 419                                          struct skb_frag_struct *frags,
 420                                          int len, int true_size,
 421                                          void *priv, __wsum sum)
 422{
 423        struct net_lro_desc *lro_desc;
 424        struct iphdr *iph;
 425        struct tcphdr *tcph;
 426        struct sk_buff *skb;
 427        u64 flags;
 428        void *mac_hdr;
 429        int mac_hdr_len;
 430        int hdr_len = LRO_MAX_PG_HLEN;
 431        int vlan_hdr_len = 0;
 432
 433        if (!lro_mgr->get_frag_header ||
 434            lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
 435                                     (void *)&tcph, &flags, priv)) {
 436                mac_hdr = skb_frag_address(frags);
 437                goto out1;
 438        }
 439
 440        if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
 441                goto out1;
 442
 443        hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
 444        mac_hdr_len = (int)((void *)(iph) - mac_hdr);
 445
 446        lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
 447        if (!lro_desc)
 448                goto out1;
 449
 450        if (!lro_desc->active) { /* start new lro session */
 451                if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
 452                        goto out1;
 453
 454                skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
 455                                  hdr_len, 0, lro_mgr->ip_summed_aggr);
 456                if (!skb)
 457                        goto out;
 458
 459                if ((skb->protocol == htons(ETH_P_8021Q)) &&
 460                    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
 461                        vlan_hdr_len = VLAN_HLEN;
 462
 463                iph = (void *)(skb->data + vlan_hdr_len);
 464                tcph = (void *)((u8 *)skb->data + vlan_hdr_len
 465                                + IP_HDR_LEN(iph));
 466
 467                lro_init_desc(lro_desc, skb, iph, tcph);
 468                LRO_INC_STATS(lro_mgr, aggregated);
 469                return NULL;
 470        }
 471
 472        if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
 473                goto out2;
 474
 475        if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
 476                goto out2;
 477
 478        lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
 479        LRO_INC_STATS(lro_mgr, aggregated);
 480
 481        if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
 482            lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
 483                lro_flush(lro_mgr, lro_desc);
 484
 485        return NULL;
 486
 487out2: /* send aggregated packets to the stack */
 488        lro_flush(lro_mgr, lro_desc);
 489
 490out1:  /* Original packet has to be posted to the stack */
 491        skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
 492                          hdr_len, sum, lro_mgr->ip_summed);
 493out:
 494        return skb;
 495}
 496
 497void lro_receive_skb(struct net_lro_mgr *lro_mgr,
 498                     struct sk_buff *skb,
 499                     void *priv)
 500{
 501        if (__lro_proc_skb(lro_mgr, skb, priv)) {
 502                if (lro_mgr->features & LRO_F_NAPI)
 503                        netif_receive_skb(skb);
 504                else
 505                        netif_rx(skb);
 506        }
 507}
 508EXPORT_SYMBOL(lro_receive_skb);
 509
 510void lro_receive_frags(struct net_lro_mgr *lro_mgr,
 511                       struct skb_frag_struct *frags,
 512                       int len, int true_size, void *priv, __wsum sum)
 513{
 514        struct sk_buff *skb;
 515
 516        skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
 517        if (!skb)
 518                return;
 519
 520        if (lro_mgr->features & LRO_F_NAPI)
 521                netif_receive_skb(skb);
 522        else
 523                netif_rx(skb);
 524}
 525EXPORT_SYMBOL(lro_receive_frags);
 526
 527void lro_flush_all(struct net_lro_mgr *lro_mgr)
 528{
 529        int i;
 530        struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
 531
 532        for (i = 0; i < lro_mgr->max_desc; i++) {
 533                if (lro_desc[i].active)
 534                        lro_flush(lro_mgr, &lro_desc[i]);
 535        }
 536}
 537EXPORT_SYMBOL(lro_flush_all);
 538
 539void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
 540                  struct iphdr *iph, struct tcphdr *tcph)
 541{
 542        struct net_lro_desc *lro_desc;
 543
 544        lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
 545        if (lro_desc->active)
 546                lro_flush(lro_mgr, lro_desc);
 547}
 548EXPORT_SYMBOL(lro_flush_pkt);
 549
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.