linux/drivers/net/sfc/tx.c
<<
>>
Prefs
   1/****************************************************************************
   2 * Driver for Solarflare Solarstorm network controllers and boards
   3 * Copyright 2005-2006 Fen Systems Ltd.
   4 * Copyright 2005-2008 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
  11#include <linux/pci.h>
  12#include <linux/tcp.h>
  13#include <linux/ip.h>
  14#include <linux/in.h>
  15#include <linux/if_ether.h>
  16#include <linux/highmem.h>
  17#include "net_driver.h"
  18#include "tx.h"
  19#include "efx.h"
  20#include "falcon.h"
  21#include "workarounds.h"
  22
  23/*
  24 * TX descriptor ring full threshold
  25 *
  26 * The tx_queue descriptor ring fill-level must fall below this value
  27 * before we restart the netif queue
  28 */
  29#define EFX_NETDEV_TX_THRESHOLD(_tx_queue)      \
  30        (_tx_queue->efx->type->txd_ring_mask / 2u)
  31
  32/* We want to be able to nest calls to netif_stop_queue(), since each
  33 * channel can have an individual stop on the queue.
  34 */
  35void efx_stop_queue(struct efx_nic *efx)
  36{
  37        spin_lock_bh(&efx->netif_stop_lock);
  38        EFX_TRACE(efx, "stop TX queue\n");
  39
  40        atomic_inc(&efx->netif_stop_count);
  41        netif_stop_queue(efx->net_dev);
  42
  43        spin_unlock_bh(&efx->netif_stop_lock);
  44}
  45
  46/* Wake netif's TX queue
  47 * We want to be able to nest calls to netif_stop_queue(), since each
  48 * channel can have an individual stop on the queue.
  49 */
  50void efx_wake_queue(struct efx_nic *efx)
  51{
  52        local_bh_disable();
  53        if (atomic_dec_and_lock(&efx->netif_stop_count,
  54                                &efx->netif_stop_lock)) {
  55                EFX_TRACE(efx, "waking TX queue\n");
  56                netif_wake_queue(efx->net_dev);
  57                spin_unlock(&efx->netif_stop_lock);
  58        }
  59        local_bh_enable();
  60}
  61
  62static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
  63                               struct efx_tx_buffer *buffer)
  64{
  65        if (buffer->unmap_len) {
  66                struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
  67                dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
  68                                         buffer->unmap_len);
  69                if (buffer->unmap_single)
  70                        pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len,
  71                                         PCI_DMA_TODEVICE);
  72                else
  73                        pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len,
  74                                       PCI_DMA_TODEVICE);
  75                buffer->unmap_len = 0;
  76                buffer->unmap_single = false;
  77        }
  78
  79        if (buffer->skb) {
  80                dev_kfree_skb_any((struct sk_buff *) buffer->skb);
  81                buffer->skb = NULL;
  82                EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
  83                          "complete\n", tx_queue->queue, read_ptr);
  84        }
  85}
  86
  87/**
  88 * struct efx_tso_header - a DMA mapped buffer for packet headers
  89 * @next: Linked list of free ones.
  90 *      The list is protected by the TX queue lock.
  91 * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
  92 * @dma_addr: The DMA address of the header below.
  93 *
  94 * This controls the memory used for a TSO header.  Use TSOH_DATA()
  95 * to find the packet header data.  Use TSOH_SIZE() to calculate the
  96 * total size required for a given packet header length.  TSO headers
  97 * in the free list are exactly %TSOH_STD_SIZE bytes in size.
  98 */
  99struct efx_tso_header {
 100        union {
 101                struct efx_tso_header *next;
 102                size_t unmap_len;
 103        };
 104        dma_addr_t dma_addr;
 105};
 106
 107static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 108                               struct sk_buff *skb);
 109static void efx_fini_tso(struct efx_tx_queue *tx_queue);
 110static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
 111                               struct efx_tso_header *tsoh);
 112
 113static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
 114                          struct efx_tx_buffer *buffer)
 115{
 116        if (buffer->tsoh) {
 117                if (likely(!buffer->tsoh->unmap_len)) {
 118                        buffer->tsoh->next = tx_queue->tso_headers_free;
 119                        tx_queue->tso_headers_free = buffer->tsoh;
 120                } else {
 121                        efx_tsoh_heap_free(tx_queue, buffer->tsoh);
 122                }
 123                buffer->tsoh = NULL;
 124        }
 125}
 126
 127
 128/*
 129 * Add a socket buffer to a TX queue
 130 *
 131 * This maps all fragments of a socket buffer for DMA and adds them to
 132 * the TX queue.  The queue's insert pointer will be incremented by
 133 * the number of fragments in the socket buffer.
 134 *
 135 * If any DMA mapping fails, any mapped fragments will be unmapped,
 136 * the queue's insert pointer will be restored to its original value.
 137 *
 138 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
 139 * You must hold netif_tx_lock() to call this function.
 140 */
 141static netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue,
 142                                         struct sk_buff *skb)
 143{
 144        struct efx_nic *efx = tx_queue->efx;
 145        struct pci_dev *pci_dev = efx->pci_dev;
 146        struct efx_tx_buffer *buffer;
 147        skb_frag_t *fragment;
 148        struct page *page;
 149        int page_offset;
 150        unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
 151        dma_addr_t dma_addr, unmap_addr = 0;
 152        unsigned int dma_len;
 153        bool unmap_single;
 154        int q_space, i = 0;
 155        netdev_tx_t rc = NETDEV_TX_OK;
 156
 157        EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
 158
 159        if (skb_shinfo((struct sk_buff *)skb)->gso_size)
 160                return efx_enqueue_skb_tso(tx_queue, skb);
 161
 162        /* Get size of the initial fragment */
 163        len = skb_headlen(skb);
 164
 165        /* Pad if necessary */
 166        if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
 167                EFX_BUG_ON_PARANOID(skb->data_len);
 168                len = 32 + 1;
 169                if (skb_pad(skb, len - skb->len))
 170                        return NETDEV_TX_OK;
 171        }
 172
 173        fill_level = tx_queue->insert_count - tx_queue->old_read_count;
 174        q_space = efx->type->txd_ring_mask - 1 - fill_level;
 175
 176        /* Map for DMA.  Use pci_map_single rather than pci_map_page
 177         * since this is more efficient on machines with sparse
 178         * memory.
 179         */
 180        unmap_single = true;
 181        dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
 182
 183        /* Process all fragments */
 184        while (1) {
 185                if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
 186                        goto pci_err;
 187
 188                /* Store fields for marking in the per-fragment final
 189                 * descriptor */
 190                unmap_len = len;
 191                unmap_addr = dma_addr;
 192
 193                /* Add to TX queue, splitting across DMA boundaries */
 194                do {
 195                        if (unlikely(q_space-- <= 0)) {
 196                                /* It might be that completions have
 197                                 * happened since the xmit path last
 198                                 * checked.  Update the xmit path's
 199                                 * copy of read_count.
 200                                 */
 201                                ++tx_queue->stopped;
 202                                /* This memory barrier protects the
 203                                 * change of stopped from the access
 204                                 * of read_count. */
 205                                smp_mb();
 206                                tx_queue->old_read_count =
 207                                        *(volatile unsigned *)
 208                                        &tx_queue->read_count;
 209                                fill_level = (tx_queue->insert_count
 210                                              - tx_queue->old_read_count);
 211                                q_space = (efx->type->txd_ring_mask - 1 -
 212                                           fill_level);
 213                                if (unlikely(q_space-- <= 0))
 214                                        goto stop;
 215                                smp_mb();
 216                                --tx_queue->stopped;
 217                        }
 218
 219                        insert_ptr = (tx_queue->insert_count &
 220                                      efx->type->txd_ring_mask);
 221                        buffer = &tx_queue->buffer[insert_ptr];
 222                        efx_tsoh_free(tx_queue, buffer);
 223                        EFX_BUG_ON_PARANOID(buffer->tsoh);
 224                        EFX_BUG_ON_PARANOID(buffer->skb);
 225                        EFX_BUG_ON_PARANOID(buffer->len);
 226                        EFX_BUG_ON_PARANOID(!buffer->continuation);
 227                        EFX_BUG_ON_PARANOID(buffer->unmap_len);
 228
 229                        dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
 230                        if (likely(dma_len > len))
 231                                dma_len = len;
 232
 233                        misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
 234                        if (misalign && dma_len + misalign > 512)
 235                                dma_len = 512 - misalign;
 236
 237                        /* Fill out per descriptor fields */
 238                        buffer->len = dma_len;
 239                        buffer->dma_addr = dma_addr;
 240                        len -= dma_len;
 241                        dma_addr += dma_len;
 242                        ++tx_queue->insert_count;
 243                } while (len);
 244
 245                /* Transfer ownership of the unmapping to the final buffer */
 246                buffer->unmap_single = unmap_single;
 247                buffer->unmap_len = unmap_len;
 248                unmap_len = 0;
 249
 250                /* Get address and size of next fragment */
 251                if (i >= skb_shinfo(skb)->nr_frags)
 252                        break;
 253                fragment = &skb_shinfo(skb)->frags[i];
 254                len = fragment->size;
 255                page = fragment->page;
 256                page_offset = fragment->page_offset;
 257                i++;
 258                /* Map for DMA */
 259                unmap_single = false;
 260                dma_addr = pci_map_page(pci_dev, page, page_offset, len,
 261                                        PCI_DMA_TODEVICE);
 262        }
 263
 264        /* Transfer ownership of the skb to the final buffer */
 265        buffer->skb = skb;
 266        buffer->continuation = false;
 267
 268        /* Pass off to hardware */
 269        falcon_push_buffers(tx_queue);
 270
 271        return NETDEV_TX_OK;
 272
 273 pci_err:
 274        EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
 275                   "fragments for DMA\n", tx_queue->queue, skb->len,
 276                   skb_shinfo(skb)->nr_frags + 1);
 277
 278        /* Mark the packet as transmitted, and free the SKB ourselves */
 279        dev_kfree_skb_any((struct sk_buff *)skb);
 280        goto unwind;
 281
 282 stop:
 283        rc = NETDEV_TX_BUSY;
 284
 285        if (tx_queue->stopped == 1)
 286                efx_stop_queue(efx);
 287
 288 unwind:
 289        /* Work backwards until we hit the original insert pointer value */
 290        while (tx_queue->insert_count != tx_queue->write_count) {
 291                --tx_queue->insert_count;
 292                insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
 293                buffer = &tx_queue->buffer[insert_ptr];
 294                efx_dequeue_buffer(tx_queue, buffer);
 295                buffer->len = 0;
 296        }
 297
 298        /* Free the fragment we were mid-way through pushing */
 299        if (unmap_len) {
 300                if (unmap_single)
 301                        pci_unmap_single(pci_dev, unmap_addr, unmap_len,
 302                                         PCI_DMA_TODEVICE);
 303                else
 304                        pci_unmap_page(pci_dev, unmap_addr, unmap_len,
 305                                       PCI_DMA_TODEVICE);
 306        }
 307
 308        return rc;
 309}
 310
 311/* Remove packets from the TX queue
 312 *
 313 * This removes packets from the TX queue, up to and including the
 314 * specified index.
 315 */
 316static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
 317                                unsigned int index)
 318{
 319        struct efx_nic *efx = tx_queue->efx;
 320        unsigned int stop_index, read_ptr;
 321        unsigned int mask = tx_queue->efx->type->txd_ring_mask;
 322
 323        stop_index = (index + 1) & mask;
 324        read_ptr = tx_queue->read_count & mask;
 325
 326        while (read_ptr != stop_index) {
 327                struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
 328                if (unlikely(buffer->len == 0)) {
 329                        EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
 330                                "completion id %x\n", tx_queue->queue,
 331                                read_ptr);
 332                        efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
 333                        return;
 334                }
 335
 336                efx_dequeue_buffer(tx_queue, buffer);
 337                buffer->continuation = true;
 338                buffer->len = 0;
 339
 340                ++tx_queue->read_count;
 341                read_ptr = tx_queue->read_count & mask;
 342        }
 343}
 344
 345/* Initiate a packet transmission on the specified TX queue.
 346 * Note that returning anything other than NETDEV_TX_OK will cause the
 347 * OS to free the skb.
 348 *
 349 * This function is split out from efx_hard_start_xmit to allow the
 350 * loopback test to direct packets via specific TX queues.  It is
 351 * therefore a non-static inline, so as not to penalise performance
 352 * for non-loopback transmissions.
 353 *
 354 * Context: netif_tx_lock held
 355 */
 356inline netdev_tx_t efx_xmit(struct efx_nic *efx,
 357                           struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 358{
 359        /* Map fragments for DMA and add to TX queue */
 360        return efx_enqueue_skb(tx_queue, skb);
 361}
 362
 363/* Initiate a packet transmission.  We use one channel per CPU
 364 * (sharing when we have more CPUs than channels).  On Falcon, the TX
 365 * completion events will be directed back to the CPU that transmitted
 366 * the packet, which should be cache-efficient.
 367 *
 368 * Context: non-blocking.
 369 * Note that returning anything other than NETDEV_TX_OK will cause the
 370 * OS to free the skb.
 371 */
 372netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 373                                      struct net_device *net_dev)
 374{
 375        struct efx_nic *efx = netdev_priv(net_dev);
 376        struct efx_tx_queue *tx_queue;
 377
 378        if (unlikely(efx->port_inhibited))
 379                return NETDEV_TX_BUSY;
 380
 381        if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
 382                tx_queue = &efx->tx_queue[EFX_TX_QUEUE_OFFLOAD_CSUM];
 383        else
 384                tx_queue = &efx->tx_queue[EFX_TX_QUEUE_NO_CSUM];
 385
 386        return efx_xmit(efx, tx_queue, skb);
 387}
 388
 389void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 390{
 391        unsigned fill_level;
 392        struct efx_nic *efx = tx_queue->efx;
 393
 394        EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask);
 395
 396        efx_dequeue_buffers(tx_queue, index);
 397
 398        /* See if we need to restart the netif queue.  This barrier
 399         * separates the update of read_count from the test of
 400         * stopped. */
 401        smp_mb();
 402        if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) {
 403                fill_level = tx_queue->insert_count - tx_queue->read_count;
 404                if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) {
 405                        EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
 406
 407                        /* Do this under netif_tx_lock(), to avoid racing
 408                         * with efx_xmit(). */
 409                        netif_tx_lock(efx->net_dev);
 410                        if (tx_queue->stopped) {
 411                                tx_queue->stopped = 0;
 412                                efx_wake_queue(efx);
 413                        }
 414                        netif_tx_unlock(efx->net_dev);
 415                }
 416        }
 417}
 418
 419int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 420{
 421        struct efx_nic *efx = tx_queue->efx;
 422        unsigned int txq_size;
 423        int i, rc;
 424
 425        EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
 426
 427        /* Allocate software ring */
 428        txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer);
 429        tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
 430        if (!tx_queue->buffer)
 431                return -ENOMEM;
 432        for (i = 0; i <= efx->type->txd_ring_mask; ++i)
 433                tx_queue->buffer[i].continuation = true;
 434
 435        /* Allocate hardware ring */
 436        rc = falcon_probe_tx(tx_queue);
 437        if (rc)
 438                goto fail;
 439
 440        return 0;
 441
 442 fail:
 443        kfree(tx_queue->buffer);
 444        tx_queue->buffer = NULL;
 445        return rc;
 446}
 447
 448void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 449{
 450        EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
 451
 452        tx_queue->insert_count = 0;
 453        tx_queue->write_count = 0;
 454        tx_queue->read_count = 0;
 455        tx_queue->old_read_count = 0;
 456        BUG_ON(tx_queue->stopped);
 457
 458        /* Set up TX descriptor ring */
 459        falcon_init_tx(tx_queue);
 460}
 461
 462void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
 463{
 464        struct efx_tx_buffer *buffer;
 465
 466        if (!tx_queue->buffer)
 467                return;
 468
 469        /* Free any buffers left in the ring */
 470        while (tx_queue->read_count != tx_queue->write_count) {
 471                buffer = &tx_queue->buffer[tx_queue->read_count &
 472                                           tx_queue->efx->type->txd_ring_mask];
 473                efx_dequeue_buffer(tx_queue, buffer);
 474                buffer->continuation = true;
 475                buffer->len = 0;
 476
 477                ++tx_queue->read_count;
 478        }
 479}
 480
 481void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
 482{
 483        EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
 484
 485        /* Flush TX queue, remove descriptor ring */
 486        falcon_fini_tx(tx_queue);
 487
 488        efx_release_tx_buffers(tx_queue);
 489
 490        /* Free up TSO header cache */
 491        efx_fini_tso(tx_queue);
 492
 493        /* Release queue's stop on port, if any */
 494        if (tx_queue->stopped) {
 495                tx_queue->stopped = 0;
 496                efx_wake_queue(tx_queue->efx);
 497        }
 498}
 499
 500void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 501{
 502        EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
 503        falcon_remove_tx(tx_queue);
 504
 505        kfree(tx_queue->buffer);
 506        tx_queue->buffer = NULL;
 507}
 508
 509
 510/* Efx TCP segmentation acceleration.
 511 *
 512 * Why?  Because by doing it here in the driver we can go significantly
 513 * faster than the GSO.
 514 *
 515 * Requires TX checksum offload support.
 516 */
 517
 518/* Number of bytes inserted at the start of a TSO header buffer,
 519 * similar to NET_IP_ALIGN.
 520 */
 521#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 522#define TSOH_OFFSET     0
 523#else
 524#define TSOH_OFFSET     NET_IP_ALIGN
 525#endif
 526
 527#define TSOH_BUFFER(tsoh)       ((u8 *)(tsoh + 1) + TSOH_OFFSET)
 528
 529/* Total size of struct efx_tso_header, buffer and padding */
 530#define TSOH_SIZE(hdr_len)                                      \
 531        (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
 532
 533/* Size of blocks on free list.  Larger blocks must be allocated from
 534 * the heap.
 535 */
 536#define TSOH_STD_SIZE           128
 537
 538#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
 539#define ETH_HDR_LEN(skb)  (skb_network_header(skb) - (skb)->data)
 540#define SKB_TCP_OFF(skb)  PTR_DIFF(tcp_hdr(skb), (skb)->data)
 541#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
 542
 543/**
 544 * struct tso_state - TSO state for an SKB
 545 * @out_len: Remaining length in current segment
 546 * @seqnum: Current sequence number
 547 * @ipv4_id: Current IPv4 ID, host endian
 548 * @packet_space: Remaining space in current packet
 549 * @dma_addr: DMA address of current position
 550 * @in_len: Remaining length in current SKB fragment
 551 * @unmap_len: Length of SKB fragment
 552 * @unmap_addr: DMA address of SKB fragment
 553 * @unmap_single: DMA single vs page mapping flag
 554 * @header_len: Number of bytes of header
 555 * @full_packet_size: Number of bytes to put in each outgoing segment
 556 *
 557 * The state used during segmentation.  It is put into this data structure
 558 * just to make it easy to pass into inline functions.
 559 */
 560struct tso_state {
 561        /* Output position */
 562        unsigned out_len;
 563        unsigned seqnum;
 564        unsigned ipv4_id;
 565        unsigned packet_space;
 566
 567        /* Input position */
 568        dma_addr_t dma_addr;
 569        unsigned in_len;
 570        unsigned unmap_len;
 571        dma_addr_t unmap_addr;
 572        bool unmap_single;
 573
 574        unsigned header_len;
 575        int full_packet_size;
 576};
 577
 578
 579/*
 580 * Verify that our various assumptions about sk_buffs and the conditions
 581 * under which TSO will be attempted hold true.
 582 */
 583static void efx_tso_check_safe(struct sk_buff *skb)
 584{
 585        __be16 protocol = skb->protocol;
 586
 587        EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
 588                            protocol);
 589        if (protocol == htons(ETH_P_8021Q)) {
 590                /* Find the encapsulated protocol; reset network header
 591                 * and transport header based on that. */
 592                struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 593                protocol = veh->h_vlan_encapsulated_proto;
 594                skb_set_network_header(skb, sizeof(*veh));
 595                if (protocol == htons(ETH_P_IP))
 596                        skb_set_transport_header(skb, sizeof(*veh) +
 597                                                 4 * ip_hdr(skb)->ihl);
 598        }
 599
 600        EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IP));
 601        EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
 602        EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
 603                             + (tcp_hdr(skb)->doff << 2u)) >
 604                            skb_headlen(skb));
 605}
 606
 607
 608/*
 609 * Allocate a page worth of efx_tso_header structures, and string them
 610 * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
 611 */
 612static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
 613{
 614
 615        struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
 616        struct efx_tso_header *tsoh;
 617        dma_addr_t dma_addr;
 618        u8 *base_kva, *kva;
 619
 620        base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
 621        if (base_kva == NULL) {
 622                EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO"
 623                        " headers\n");
 624                return -ENOMEM;
 625        }
 626
 627        /* pci_alloc_consistent() allocates pages. */
 628        EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
 629
 630        for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
 631                tsoh = (struct efx_tso_header *)kva;
 632                tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
 633                tsoh->next = tx_queue->tso_headers_free;
 634                tx_queue->tso_headers_free = tsoh;
 635        }
 636
 637        return 0;
 638}
 639
 640
 641/* Free up a TSO header, and all others in the same page. */
 642static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
 643                                struct efx_tso_header *tsoh,
 644                                struct pci_dev *pci_dev)
 645{
 646        struct efx_tso_header **p;
 647        unsigned long base_kva;
 648        dma_addr_t base_dma;
 649
 650        base_kva = (unsigned long)tsoh & PAGE_MASK;
 651        base_dma = tsoh->dma_addr & PAGE_MASK;
 652
 653        p = &tx_queue->tso_headers_free;
 654        while (*p != NULL) {
 655                if (((unsigned long)*p & PAGE_MASK) == base_kva)
 656                        *p = (*p)->next;
 657                else
 658                        p = &(*p)->next;
 659        }
 660
 661        pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
 662}
 663
 664static struct efx_tso_header *
 665efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
 666{
 667        struct efx_tso_header *tsoh;
 668
 669        tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
 670        if (unlikely(!tsoh))
 671                return NULL;
 672
 673        tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
 674                                        TSOH_BUFFER(tsoh), header_len,
 675                                        PCI_DMA_TODEVICE);
 676        if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
 677                                           tsoh->dma_addr))) {
 678                kfree(tsoh);
 679                return NULL;
 680        }
 681
 682        tsoh->unmap_len = header_len;
 683        return tsoh;
 684}
 685
 686static void
 687efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
 688{
 689        pci_unmap_single(tx_queue->efx->pci_dev,
 690                         tsoh->dma_addr, tsoh->unmap_len,
 691                         PCI_DMA_TODEVICE);
 692        kfree(tsoh);
 693}
 694
 695/**
 696 * efx_tx_queue_insert - push descriptors onto the TX queue
 697 * @tx_queue:           Efx TX queue
 698 * @dma_addr:           DMA address of fragment
 699 * @len:                Length of fragment
 700 * @final_buffer:       The final buffer inserted into the queue
 701 *
 702 * Push descriptors onto the TX queue.  Return 0 on success or 1 if
 703 * @tx_queue full.
 704 */
 705static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
 706                               dma_addr_t dma_addr, unsigned len,
 707                               struct efx_tx_buffer **final_buffer)
 708{
 709        struct efx_tx_buffer *buffer;
 710        struct efx_nic *efx = tx_queue->efx;
 711        unsigned dma_len, fill_level, insert_ptr, misalign;
 712        int q_space;
 713
 714        EFX_BUG_ON_PARANOID(len <= 0);
 715
 716        fill_level = tx_queue->insert_count - tx_queue->old_read_count;
 717        /* -1 as there is no way to represent all descriptors used */
 718        q_space = efx->type->txd_ring_mask - 1 - fill_level;
 719
 720        while (1) {
 721                if (unlikely(q_space-- <= 0)) {
 722                        /* It might be that completions have happened
 723                         * since the xmit path last checked.  Update
 724                         * the xmit path's copy of read_count.
 725                         */
 726                        ++tx_queue->stopped;
 727                        /* This memory barrier protects the change of
 728                         * stopped from the access of read_count. */
 729                        smp_mb();
 730                        tx_queue->old_read_count =
 731                                *(volatile unsigned *)&tx_queue->read_count;
 732                        fill_level = (tx_queue->insert_count
 733                                      - tx_queue->old_read_count);
 734                        q_space = efx->type->txd_ring_mask - 1 - fill_level;
 735                        if (unlikely(q_space-- <= 0)) {
 736                                *final_buffer = NULL;
 737                                return 1;
 738                        }
 739                        smp_mb();
 740                        --tx_queue->stopped;
 741                }
 742
 743                insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
 744                buffer = &tx_queue->buffer[insert_ptr];
 745                ++tx_queue->insert_count;
 746
 747                EFX_BUG_ON_PARANOID(tx_queue->insert_count -
 748                                    tx_queue->read_count >
 749                                    efx->type->txd_ring_mask);
 750
 751                efx_tsoh_free(tx_queue, buffer);
 752                EFX_BUG_ON_PARANOID(buffer->len);
 753                EFX_BUG_ON_PARANOID(buffer->unmap_len);
 754                EFX_BUG_ON_PARANOID(buffer->skb);
 755                EFX_BUG_ON_PARANOID(!buffer->continuation);
 756                EFX_BUG_ON_PARANOID(buffer->tsoh);
 757
 758                buffer->dma_addr = dma_addr;
 759
 760                /* Ensure we do not cross a boundary unsupported by H/W */
 761                dma_len = (~dma_addr & efx->type->tx_dma_mask) + 1;
 762
 763                misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
 764                if (misalign && dma_len + misalign > 512)
 765                        dma_len = 512 - misalign;
 766
 767                /* If there is enough space to send then do so */
 768                if (dma_len >= len)
 769                        break;
 770
 771                buffer->len = dma_len; /* Don't set the other members */
 772                dma_addr += dma_len;
 773                len -= dma_len;
 774        }
 775
 776        EFX_BUG_ON_PARANOID(!len);
 777        buffer->len = len;
 778        *final_buffer = buffer;
 779        return 0;
 780}
 781
 782
 783/*
 784 * Put a TSO header into the TX queue.
 785 *
 786 * This is special-cased because we know that it is small enough to fit in
 787 * a single fragment, and we know it doesn't cross a page boundary.  It
 788 * also allows us to not worry about end-of-packet etc.
 789 */
 790static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
 791                               struct efx_tso_header *tsoh, unsigned len)
 792{
 793        struct efx_tx_buffer *buffer;
 794
 795        buffer = &tx_queue->buffer[tx_queue->insert_count &
 796                                   tx_queue->efx->type->txd_ring_mask];
 797        efx_tsoh_free(tx_queue, buffer);
 798        EFX_BUG_ON_PARANOID(buffer->len);
 799        EFX_BUG_ON_PARANOID(buffer->unmap_len);
 800        EFX_BUG_ON_PARANOID(buffer->skb);
 801        EFX_BUG_ON_PARANOID(!buffer->continuation);
 802        EFX_BUG_ON_PARANOID(buffer->tsoh);
 803        buffer->len = len;
 804        buffer->dma_addr = tsoh->dma_addr;
 805        buffer->tsoh = tsoh;
 806
 807        ++tx_queue->insert_count;
 808}
 809
 810
 811/* Remove descriptors put into a tx_queue. */
 812static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
 813{
 814        struct efx_tx_buffer *buffer;
 815        dma_addr_t unmap_addr;
 816
 817        /* Work backwards until we hit the original insert pointer value */
 818        while (tx_queue->insert_count != tx_queue->write_count) {
 819                --tx_queue->insert_count;
 820                buffer = &tx_queue->buffer[tx_queue->insert_count &
 821                                           tx_queue->efx->type->txd_ring_mask];
 822                efx_tsoh_free(tx_queue, buffer);
 823                EFX_BUG_ON_PARANOID(buffer->skb);
 824                buffer->len = 0;
 825                buffer->continuation = true;
 826                if (buffer->unmap_len) {
 827                        unmap_addr = (buffer->dma_addr + buffer->len -
 828                                      buffer->unmap_len);
 829                        if (buffer->unmap_single)
 830                                pci_unmap_single(tx_queue->efx->pci_dev,
 831                                                 unmap_addr, buffer->unmap_len,
 832                                                 PCI_DMA_TODEVICE);
 833                        else
 834                                pci_unmap_page(tx_queue->efx->pci_dev,
 835                                               unmap_addr, buffer->unmap_len,
 836                                               PCI_DMA_TODEVICE);
 837                        buffer->unmap_len = 0;
 838                }
 839        }
 840}
 841
 842
 843/* Parse the SKB header and initialise state. */
 844static void tso_start(struct tso_state *st, const struct sk_buff *skb)
 845{
 846        /* All ethernet/IP/TCP headers combined size is TCP header size
 847         * plus offset of TCP header relative to start of packet.
 848         */
 849        st->header_len = ((tcp_hdr(skb)->doff << 2u)
 850                          + PTR_DIFF(tcp_hdr(skb), skb->data));
 851        st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
 852
 853        st->ipv4_id = ntohs(ip_hdr(skb)->id);
 854        st->seqnum = ntohl(tcp_hdr(skb)->seq);
 855
 856        EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
 857        EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
 858        EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
 859
 860        st->packet_space = st->full_packet_size;
 861        st->out_len = skb->len - st->header_len;
 862        st->unmap_len = 0;
 863        st->unmap_single = false;
 864}
 865
 866static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
 867                            skb_frag_t *frag)
 868{
 869        st->unmap_addr = pci_map_page(efx->pci_dev, frag->page,
 870                                      frag->page_offset, frag->size,
 871                                      PCI_DMA_TODEVICE);
 872        if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
 873                st->unmap_single = false;
 874                st->unmap_len = frag->size;
 875                st->in_len = frag->size;
 876                st->dma_addr = st->unmap_addr;
 877                return 0;
 878        }
 879        return -ENOMEM;
 880}
 881
 882static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
 883                                 const struct sk_buff *skb)
 884{
 885        int hl = st->header_len;
 886        int len = skb_headlen(skb) - hl;
 887
 888        st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl,
 889                                        len, PCI_DMA_TODEVICE);
 890        if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
 891                st->unmap_single = true;
 892                st->unmap_len = len;
 893                st->in_len = len;
 894                st->dma_addr = st->unmap_addr;
 895                return 0;
 896        }
 897        return -ENOMEM;
 898}
 899
 900
 901/**
 902 * tso_fill_packet_with_fragment - form descriptors for the current fragment
 903 * @tx_queue:           Efx TX queue
 904 * @skb:                Socket buffer
 905 * @st:                 TSO state
 906 *
 907 * Form descriptors for the current fragment, until we reach the end
 908 * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
 909 * space in @tx_queue.
 910 */
 911static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
 912                                         const struct sk_buff *skb,
 913                                         struct tso_state *st)
 914{
 915        struct efx_tx_buffer *buffer;
 916        int n, end_of_packet, rc;
 917
 918        if (st->in_len == 0)
 919                return 0;
 920        if (st->packet_space == 0)
 921                return 0;
 922
 923        EFX_BUG_ON_PARANOID(st->in_len <= 0);
 924        EFX_BUG_ON_PARANOID(st->packet_space <= 0);
 925
 926        n = min(st->in_len, st->packet_space);
 927
 928        st->packet_space -= n;
 929        st->out_len -= n;
 930        st->in_len -= n;
 931
 932        rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
 933        if (likely(rc == 0)) {
 934                if (st->out_len == 0)
 935                        /* Transfer ownership of the skb */
 936                        buffer->skb = skb;
 937
 938                end_of_packet = st->out_len == 0 || st->packet_space == 0;
 939                buffer->continuation = !end_of_packet;
 940
 941                if (st->in_len == 0) {
 942                        /* Transfer ownership of the pci mapping */
 943                        buffer->unmap_len = st->unmap_len;
 944                        buffer->unmap_single = st->unmap_single;
 945                        st->unmap_len = 0;
 946                }
 947        }
 948
 949        st->dma_addr += n;
 950        return rc;
 951}
 952
 953
 954/**
 955 * tso_start_new_packet - generate a new header and prepare for the new packet
 956 * @tx_queue:           Efx TX queue
 957 * @skb:                Socket buffer
 958 * @st:                 TSO state
 959 *
 960 * Generate a new header and prepare for the new packet.  Return 0 on
 961 * success, or -1 if failed to alloc header.
 962 */
 963static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
 964                                const struct sk_buff *skb,
 965                                struct tso_state *st)
 966{
 967        struct efx_tso_header *tsoh;
 968        struct iphdr *tsoh_iph;
 969        struct tcphdr *tsoh_th;
 970        unsigned ip_length;
 971        u8 *header;
 972
 973        /* Allocate a DMA-mapped header buffer. */
 974        if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
 975                if (tx_queue->tso_headers_free == NULL) {
 976                        if (efx_tsoh_block_alloc(tx_queue))
 977                                return -1;
 978                }
 979                EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
 980                tsoh = tx_queue->tso_headers_free;
 981                tx_queue->tso_headers_free = tsoh->next;
 982                tsoh->unmap_len = 0;
 983        } else {
 984                tx_queue->tso_long_headers++;
 985                tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
 986                if (unlikely(!tsoh))
 987                        return -1;
 988        }
 989
 990        header = TSOH_BUFFER(tsoh);
 991        tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
 992        tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb));
 993
 994        /* Copy and update the headers. */
 995        memcpy(header, skb->data, st->header_len);
 996
 997        tsoh_th->seq = htonl(st->seqnum);
 998        st->seqnum += skb_shinfo(skb)->gso_size;
 999        if (st->out_len > skb_shinfo(skb)->gso_size) {
1000                /* This packet will not finish the TSO burst. */
1001                ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
1002                tsoh_th->fin = 0;
1003                tsoh_th->psh = 0;
1004        } else {
1005                /* This packet will be the last in the TSO burst. */
1006                ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
1007                tsoh_th->fin = tcp_hdr(skb)->fin;
1008                tsoh_th->psh = tcp_hdr(skb)->psh;
1009        }
1010        tsoh_iph->tot_len = htons(ip_length);
1011
1012        /* Linux leaves suitable gaps in the IP ID space for us to fill. */
1013        tsoh_iph->id = htons(st->ipv4_id);
1014        st->ipv4_id++;
1015
1016        st->packet_space = skb_shinfo(skb)->gso_size;
1017        ++tx_queue->tso_packets;
1018
1019        /* Form a descriptor for this header. */
1020        efx_tso_put_header(tx_queue, tsoh, st->header_len);
1021
1022        return 0;
1023}
1024
1025
1026/**
1027 * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
1028 * @tx_queue:           Efx TX queue
1029 * @skb:                Socket buffer
1030 *
1031 * Context: You must hold netif_tx_lock() to call this function.
1032 *
1033 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
1034 * @skb was not enqueued.  In all cases @skb is consumed.  Return
1035 * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
1036 */
1037static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1038                               struct sk_buff *skb)
1039{
1040        struct efx_nic *efx = tx_queue->efx;
1041        int frag_i, rc, rc2 = NETDEV_TX_OK;
1042        struct tso_state state;
1043
1044        /* Verify TSO is safe - these checks should never fail. */
1045        efx_tso_check_safe(skb);
1046
1047        EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
1048
1049        tso_start(&state, skb);
1050
1051        /* Assume that skb header area contains exactly the headers, and
1052         * all payload is in the frag list.
1053         */
1054        if (skb_headlen(skb) == state.header_len) {
1055                /* Grab the first payload fragment. */
1056                EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
1057                frag_i = 0;
1058                rc = tso_get_fragment(&state, efx,
1059                                      skb_shinfo(skb)->frags + frag_i);
1060                if (rc)
1061                        goto mem_err;
1062        } else {
1063                rc = tso_get_head_fragment(&state, efx, skb);
1064                if (rc)
1065                        goto mem_err;
1066                frag_i = -1;
1067        }
1068
1069        if (tso_start_new_packet(tx_queue, skb, &state) < 0)
1070                goto mem_err;
1071
1072        while (1) {
1073                rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
1074                if (unlikely(rc))
1075                        goto stop;
1076
1077                /* Move onto the next fragment? */
1078                if (state.in_len == 0) {
1079                        if (++frag_i >= skb_shinfo(skb)->nr_frags)
1080                                /* End of payload reached. */
1081                                break;
1082                        rc = tso_get_fragment(&state, efx,
1083                                              skb_shinfo(skb)->frags + frag_i);
1084                        if (rc)
1085                                goto mem_err;
1086                }
1087
1088                /* Start at new packet? */
1089                if (state.packet_space == 0 &&
1090                    tso_start_new_packet(tx_queue, skb, &state) < 0)
1091                        goto mem_err;
1092        }
1093
1094        /* Pass off to hardware */
1095        falcon_push_buffers(tx_queue);
1096
1097        tx_queue->tso_bursts++;
1098        return NETDEV_TX_OK;
1099
1100 mem_err:
1101        EFX_ERR(efx, "Out of memory for TSO headers, or PCI mapping error\n");
1102        dev_kfree_skb_any((struct sk_buff *)skb);
1103        goto unwind;
1104
1105 stop:
1106        rc2 = NETDEV_TX_BUSY;
1107
1108        /* Stop the queue if it wasn't stopped before. */
1109        if (tx_queue->stopped == 1)
1110                efx_stop_queue(efx);
1111
1112 unwind:
1113        /* Free the DMA mapping we were in the process of writing out */
1114        if (state.unmap_len) {
1115                if (state.unmap_single)
1116                        pci_unmap_single(efx->pci_dev, state.unmap_addr,
1117                                         state.unmap_len, PCI_DMA_TODEVICE);
1118                else
1119                        pci_unmap_page(efx->pci_dev, state.unmap_addr,
1120                                       state.unmap_len, PCI_DMA_TODEVICE);
1121        }
1122
1123        efx_enqueue_unwind(tx_queue);
1124        return rc2;
1125}
1126
1127
1128/*
1129 * Free up all TSO datastructures associated with tx_queue. This
1130 * routine should be called only once the tx_queue is both empty and
1131 * will no longer be used.
1132 */
1133static void efx_fini_tso(struct efx_tx_queue *tx_queue)
1134{
1135        unsigned i;
1136
1137        if (tx_queue->buffer) {
1138                for (i = 0; i <= tx_queue->efx->type->txd_ring_mask; ++i)
1139                        efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
1140        }
1141
1142        while (tx_queue->tso_headers_free != NULL)
1143                efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
1144                                    tx_queue->efx->pci_dev);
1145}
1146
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.