linux/drivers/net/cxgb3/sge.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/skbuff.h>
  33#include <linux/netdevice.h>
  34#include <linux/etherdevice.h>
  35#include <linux/if_vlan.h>
  36#include <linux/ip.h>
  37#include <linux/tcp.h>
  38#include <linux/dma-mapping.h>
  39#include <net/arp.h>
  40#include "common.h"
  41#include "regs.h"
  42#include "sge_defs.h"
  43#include "t3_cpl.h"
  44#include "firmware_exports.h"
  45
  46#define USE_GTS 0
  47
  48#define SGE_RX_SM_BUF_SIZE 1536
  49
  50#define SGE_RX_COPY_THRES  256
  51#define SGE_RX_PULL_LEN    128
  52
  53#define SGE_PG_RSVD SMP_CACHE_BYTES
  54/*
  55 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
  56 * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
  57 * directly.
  58 */
  59#define FL0_PG_CHUNK_SIZE  2048
  60#define FL0_PG_ORDER 0
  61#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
  62#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
  63#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
  64#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
  65
  66#define SGE_RX_DROP_THRES 16
  67#define RX_RECLAIM_PERIOD (HZ/4)
  68
  69/*
  70 * Max number of Rx buffers we replenish at a time.
  71 */
  72#define MAX_RX_REFILL 16U
  73/*
  74 * Period of the Tx buffer reclaim timer.  This timer does not need to run
  75 * frequently as Tx buffers are usually reclaimed by new Tx packets.
  76 */
  77#define TX_RECLAIM_PERIOD (HZ / 4)
  78#define TX_RECLAIM_TIMER_CHUNK 64U
  79#define TX_RECLAIM_CHUNK 16U
  80
  81/* WR size in bytes */
  82#define WR_LEN (WR_FLITS * 8)
  83
  84/*
  85 * Types of Tx queues in each queue set.  Order here matters, do not change.
  86 */
  87enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
  88
  89/* Values for sge_txq.flags */
  90enum {
  91        TXQ_RUNNING = 1 << 0,   /* fetch engine is running */
  92        TXQ_LAST_PKT_DB = 1 << 1,       /* last packet rang the doorbell */
  93};
  94
  95struct tx_desc {
  96        __be64 flit[TX_DESC_FLITS];
  97};
  98
  99struct rx_desc {
 100        __be32 addr_lo;
 101        __be32 len_gen;
 102        __be32 gen2;
 103        __be32 addr_hi;
 104};
 105
 106struct tx_sw_desc {             /* SW state per Tx descriptor */
 107        struct sk_buff *skb;
 108        u8 eop;       /* set if last descriptor for packet */
 109        u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
 110        u8 fragidx;   /* first page fragment associated with descriptor */
 111        s8 sflit;     /* start flit of first SGL entry in descriptor */
 112};
 113
 114struct rx_sw_desc {                /* SW state per Rx descriptor */
 115        union {
 116                struct sk_buff *skb;
 117                struct fl_pg_chunk pg_chunk;
 118        };
 119        DECLARE_PCI_UNMAP_ADDR(dma_addr);
 120};
 121
 122struct rsp_desc {               /* response queue descriptor */
 123        struct rss_header rss_hdr;
 124        __be32 flags;
 125        __be32 len_cq;
 126        u8 imm_data[47];
 127        u8 intr_gen;
 128};
 129
 130/*
 131 * Holds unmapping information for Tx packets that need deferred unmapping.
 132 * This structure lives at skb->head and must be allocated by callers.
 133 */
 134struct deferred_unmap_info {
 135        struct pci_dev *pdev;
 136        dma_addr_t addr[MAX_SKB_FRAGS + 1];
 137};
 138
 139/*
 140 * Maps a number of flits to the number of Tx descriptors that can hold them.
 141 * The formula is
 142 *
 143 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
 144 *
 145 * HW allows up to 4 descriptors to be combined into a WR.
 146 */
 147static u8 flit_desc_map[] = {
 148        0,
 149#if SGE_NUM_GENBITS == 1
 150        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 151        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 152        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 153        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 154#elif SGE_NUM_GENBITS == 2
 155        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 156        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 157        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 158        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 159#else
 160# error "SGE_NUM_GENBITS must be 1 or 2"
 161#endif
 162};
 163
 164static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
 165{
 166        return container_of(q, struct sge_qset, fl[qidx]);
 167}
 168
 169static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
 170{
 171        return container_of(q, struct sge_qset, rspq);
 172}
 173
 174static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
 175{
 176        return container_of(q, struct sge_qset, txq[qidx]);
 177}
 178
 179/**
 180 *      refill_rspq - replenish an SGE response queue
 181 *      @adapter: the adapter
 182 *      @q: the response queue to replenish
 183 *      @credits: how many new responses to make available
 184 *
 185 *      Replenishes a response queue by making the supplied number of responses
 186 *      available to HW.
 187 */
 188static inline void refill_rspq(struct adapter *adapter,
 189                               const struct sge_rspq *q, unsigned int credits)
 190{
 191        rmb();
 192        t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
 193                     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 194}
 195
 196/**
 197 *      need_skb_unmap - does the platform need unmapping of sk_buffs?
 198 *
 199 *      Returns true if the platfrom needs sk_buff unmapping.  The compiler
 200 *      optimizes away unecessary code if this returns true.
 201 */
 202static inline int need_skb_unmap(void)
 203{
 204        /*
 205         * This structure is used to tell if the platfrom needs buffer
 206         * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
 207         */
 208        struct dummy {
 209                DECLARE_PCI_UNMAP_ADDR(addr);
 210        };
 211
 212        return sizeof(struct dummy) != 0;
 213}
 214
 215/**
 216 *      unmap_skb - unmap a packet main body and its page fragments
 217 *      @skb: the packet
 218 *      @q: the Tx queue containing Tx descriptors for the packet
 219 *      @cidx: index of Tx descriptor
 220 *      @pdev: the PCI device
 221 *
 222 *      Unmap the main body of an sk_buff and its page fragments, if any.
 223 *      Because of the fairly complicated structure of our SGLs and the desire
 224 *      to conserve space for metadata, the information necessary to unmap an
 225 *      sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
 226 *      descriptors (the physical addresses of the various data buffers), and
 227 *      the SW descriptor state (assorted indices).  The send functions
 228 *      initialize the indices for the first packet descriptor so we can unmap
 229 *      the buffers held in the first Tx descriptor here, and we have enough
 230 *      information at this point to set the state for the next Tx descriptor.
 231 *
 232 *      Note that it is possible to clean up the first descriptor of a packet
 233 *      before the send routines have written the next descriptors, but this
 234 *      race does not cause any problem.  We just end up writing the unmapping
 235 *      info for the descriptor first.
 236 */
 237static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 238                             unsigned int cidx, struct pci_dev *pdev)
 239{
 240        const struct sg_ent *sgp;
 241        struct tx_sw_desc *d = &q->sdesc[cidx];
 242        int nfrags, frag_idx, curflit, j = d->addr_idx;
 243
 244        sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
 245        frag_idx = d->fragidx;
 246
 247        if (frag_idx == 0 && skb_headlen(skb)) {
 248                pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
 249                                 skb_headlen(skb), PCI_DMA_TODEVICE);
 250                j = 1;
 251        }
 252
 253        curflit = d->sflit + 1 + j;
 254        nfrags = skb_shinfo(skb)->nr_frags;
 255
 256        while (frag_idx < nfrags && curflit < WR_FLITS) {
 257                pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
 258                               skb_shinfo(skb)->frags[frag_idx].size,
 259                               PCI_DMA_TODEVICE);
 260                j ^= 1;
 261                if (j == 0) {
 262                        sgp++;
 263                        curflit++;
 264                }
 265                curflit++;
 266                frag_idx++;
 267        }
 268
 269        if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
 270                d = cidx + 1 == q->size ? q->sdesc : d + 1;
 271                d->fragidx = frag_idx;
 272                d->addr_idx = j;
 273                d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
 274        }
 275}
 276
 277/**
 278 *      free_tx_desc - reclaims Tx descriptors and their buffers
 279 *      @adapter: the adapter
 280 *      @q: the Tx queue to reclaim descriptors from
 281 *      @n: the number of descriptors to reclaim
 282 *
 283 *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 284 *      Tx buffers.  Called with the Tx queue lock held.
 285 */
 286static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
 287                         unsigned int n)
 288{
 289        struct tx_sw_desc *d;
 290        struct pci_dev *pdev = adapter->pdev;
 291        unsigned int cidx = q->cidx;
 292
 293        const int need_unmap = need_skb_unmap() &&
 294                               q->cntxt_id >= FW_TUNNEL_SGEEC_START;
 295
 296        d = &q->sdesc[cidx];
 297        while (n--) {
 298                if (d->skb) {   /* an SGL is present */
 299                        if (need_unmap)
 300                                unmap_skb(d->skb, q, cidx, pdev);
 301                        if (d->eop)
 302                                kfree_skb(d->skb);
 303                }
 304                ++d;
 305                if (++cidx == q->size) {
 306                        cidx = 0;
 307                        d = q->sdesc;
 308                }
 309        }
 310        q->cidx = cidx;
 311}
 312
 313/**
 314 *      reclaim_completed_tx - reclaims completed Tx descriptors
 315 *      @adapter: the adapter
 316 *      @q: the Tx queue to reclaim completed descriptors from
 317 *      @chunk: maximum number of descriptors to reclaim
 318 *
 319 *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 320 *      and frees the associated buffers if possible.  Called with the Tx
 321 *      queue's lock held.
 322 */
 323static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
 324                                                struct sge_txq *q,
 325                                                unsigned int chunk)
 326{
 327        unsigned int reclaim = q->processed - q->cleaned;
 328
 329        reclaim = min(chunk, reclaim);
 330        if (reclaim) {
 331                free_tx_desc(adapter, q, reclaim);
 332                q->cleaned += reclaim;
 333                q->in_use -= reclaim;
 334        }
 335        return q->processed - q->cleaned;
 336}
 337
 338/**
 339 *      should_restart_tx - are there enough resources to restart a Tx queue?
 340 *      @q: the Tx queue
 341 *
 342 *      Checks if there are enough descriptors to restart a suspended Tx queue.
 343 */
 344static inline int should_restart_tx(const struct sge_txq *q)
 345{
 346        unsigned int r = q->processed - q->cleaned;
 347
 348        return q->in_use - r < (q->size >> 1);
 349}
 350
 351static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 352                          struct rx_sw_desc *d)
 353{
 354        if (q->use_pages && d->pg_chunk.page) {
 355                (*d->pg_chunk.p_cnt)--;
 356                if (!*d->pg_chunk.p_cnt)
 357                        pci_unmap_page(pdev,
 358                                       d->pg_chunk.mapping,
 359                                       q->alloc_size, PCI_DMA_FROMDEVICE);
 360
 361                put_page(d->pg_chunk.page);
 362                d->pg_chunk.page = NULL;
 363        } else {
 364                pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
 365                                 q->buf_size, PCI_DMA_FROMDEVICE);
 366                kfree_skb(d->skb);
 367                d->skb = NULL;
 368        }
 369}
 370
 371/**
 372 *      free_rx_bufs - free the Rx buffers on an SGE free list
 373 *      @pdev: the PCI device associated with the adapter
 374 *      @rxq: the SGE free list to clean up
 375 *
 376 *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
 377 *      this queue should be stopped before calling this function.
 378 */
 379static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 380{
 381        unsigned int cidx = q->cidx;
 382
 383        while (q->credits--) {
 384                struct rx_sw_desc *d = &q->sdesc[cidx];
 385
 386
 387                clear_rx_desc(pdev, q, d);
 388                if (++cidx == q->size)
 389                        cidx = 0;
 390        }
 391
 392        if (q->pg_chunk.page) {
 393                __free_pages(q->pg_chunk.page, q->order);
 394                q->pg_chunk.page = NULL;
 395        }
 396}
 397
 398/**
 399 *      add_one_rx_buf - add a packet buffer to a free-buffer list
 400 *      @va:  buffer start VA
 401 *      @len: the buffer length
 402 *      @d: the HW Rx descriptor to write
 403 *      @sd: the SW Rx descriptor to write
 404 *      @gen: the generation bit value
 405 *      @pdev: the PCI device associated with the adapter
 406 *
 407 *      Add a buffer of the given length to the supplied HW and SW Rx
 408 *      descriptors.
 409 */
 410static inline int add_one_rx_buf(void *va, unsigned int len,
 411                                 struct rx_desc *d, struct rx_sw_desc *sd,
 412                                 unsigned int gen, struct pci_dev *pdev)
 413{
 414        dma_addr_t mapping;
 415
 416        mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
 417        if (unlikely(pci_dma_mapping_error(pdev, mapping)))
 418                return -ENOMEM;
 419
 420        pci_unmap_addr_set(sd, dma_addr, mapping);
 421
 422        d->addr_lo = cpu_to_be32(mapping);
 423        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 424        wmb();
 425        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 426        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 427        return 0;
 428}
 429
 430static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
 431                                   unsigned int gen)
 432{
 433        d->addr_lo = cpu_to_be32(mapping);
 434        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 435        wmb();
 436        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 437        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 438        return 0;
 439}
 440
 441static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
 442                          struct rx_sw_desc *sd, gfp_t gfp,
 443                          unsigned int order)
 444{
 445        if (!q->pg_chunk.page) {
 446                dma_addr_t mapping;
 447
 448                q->pg_chunk.page = alloc_pages(gfp, order);
 449                if (unlikely(!q->pg_chunk.page))
 450                        return -ENOMEM;
 451                q->pg_chunk.va = page_address(q->pg_chunk.page);
 452                q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
 453                                    SGE_PG_RSVD;
 454                q->pg_chunk.offset = 0;
 455                mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
 456                                       0, q->alloc_size, PCI_DMA_FROMDEVICE);
 457                q->pg_chunk.mapping = mapping;
 458        }
 459        sd->pg_chunk = q->pg_chunk;
 460
 461        prefetch(sd->pg_chunk.p_cnt);
 462
 463        q->pg_chunk.offset += q->buf_size;
 464        if (q->pg_chunk.offset == (PAGE_SIZE << order))
 465                q->pg_chunk.page = NULL;
 466        else {
 467                q->pg_chunk.va += q->buf_size;
 468                get_page(q->pg_chunk.page);
 469        }
 470
 471        if (sd->pg_chunk.offset == 0)
 472                *sd->pg_chunk.p_cnt = 1;
 473        else
 474                *sd->pg_chunk.p_cnt += 1;
 475
 476        return 0;
 477}
 478
 479static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 480{
 481        if (q->pend_cred >= q->credits / 4) {
 482                q->pend_cred = 0;
 483                t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 484        }
 485}
 486
 487/**
 488 *      refill_fl - refill an SGE free-buffer list
 489 *      @adapter: the adapter
 490 *      @q: the free-list to refill
 491 *      @n: the number of new buffers to allocate
 492 *      @gfp: the gfp flags for allocating new buffers
 493 *
 494 *      (Re)populate an SGE free-buffer list with up to @n new packet buffers,
 495 *      allocated with the supplied gfp flags.  The caller must assure that
 496 *      @n does not exceed the queue's capacity.
 497 */
 498static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 499{
 500        struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 501        struct rx_desc *d = &q->desc[q->pidx];
 502        unsigned int count = 0;
 503
 504        while (n--) {
 505                dma_addr_t mapping;
 506                int err;
 507
 508                if (q->use_pages) {
 509                        if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
 510                                                    q->order))) {
 511nomem:                          q->alloc_failed++;
 512                                break;
 513                        }
 514                        mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
 515                        pci_unmap_addr_set(sd, dma_addr, mapping);
 516
 517                        add_one_rx_chunk(mapping, d, q->gen);
 518                        pci_dma_sync_single_for_device(adap->pdev, mapping,
 519                                                q->buf_size - SGE_PG_RSVD,
 520                                                PCI_DMA_FROMDEVICE);
 521                } else {
 522                        void *buf_start;
 523
 524                        struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 525                        if (!skb)
 526                                goto nomem;
 527
 528                        sd->skb = skb;
 529                        buf_start = skb->data;
 530                        err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
 531                                             q->gen, adap->pdev);
 532                        if (unlikely(err)) {
 533                                clear_rx_desc(adap->pdev, q, sd);
 534                                break;
 535                        }
 536                }
 537
 538                d++;
 539                sd++;
 540                if (++q->pidx == q->size) {
 541                        q->pidx = 0;
 542                        q->gen ^= 1;
 543                        sd = q->sdesc;
 544                        d = q->desc;
 545                }
 546                count++;
 547        }
 548
 549        q->credits += count;
 550        q->pend_cred += count;
 551        ring_fl_db(adap, q);
 552
 553        return count;
 554}
 555
 556static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 557{
 558        refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
 559                  GFP_ATOMIC | __GFP_COMP);
 560}
 561
 562/**
 563 *      recycle_rx_buf - recycle a receive buffer
 564 *      @adapter: the adapter
 565 *      @q: the SGE free list
 566 *      @idx: index of buffer to recycle
 567 *
 568 *      Recycles the specified buffer on the given free list by adding it at
 569 *      the next available slot on the list.
 570 */
 571static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 572                           unsigned int idx)
 573{
 574        struct rx_desc *from = &q->desc[idx];
 575        struct rx_desc *to = &q->desc[q->pidx];
 576
 577        q->sdesc[q->pidx] = q->sdesc[idx];
 578        to->addr_lo = from->addr_lo;    /* already big endian */
 579        to->addr_hi = from->addr_hi;    /* likewise */
 580        wmb();
 581        to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
 582        to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
 583
 584        if (++q->pidx == q->size) {
 585                q->pidx = 0;
 586                q->gen ^= 1;
 587        }
 588
 589        q->credits++;
 590        q->pend_cred++;
 591        ring_fl_db(adap, q);
 592}
 593
 594/**
 595 *      alloc_ring - allocate resources for an SGE descriptor ring
 596 *      @pdev: the PCI device
 597 *      @nelem: the number of descriptors
 598 *      @elem_size: the size of each descriptor
 599 *      @sw_size: the size of the SW state associated with each ring element
 600 *      @phys: the physical address of the allocated ring
 601 *      @metadata: address of the array holding the SW state for the ring
 602 *
 603 *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 604 *      free buffer lists, or response queues.  Each SGE ring requires
 605 *      space for its HW descriptors plus, optionally, space for the SW state
 606 *      associated with each HW entry (the metadata).  The function returns
 607 *      three values: the virtual address for the HW ring (the return value
 608 *      of the function), the physical address of the HW ring, and the address
 609 *      of the SW ring.
 610 */
 611static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 612                        size_t sw_size, dma_addr_t * phys, void *metadata)
 613{
 614        size_t len = nelem * elem_size;
 615        void *s = NULL;
 616        void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 617
 618        if (!p)
 619                return NULL;
 620        if (sw_size && metadata) {
 621                s = kcalloc(nelem, sw_size, GFP_KERNEL);
 622
 623                if (!s) {
 624                        dma_free_coherent(&pdev->dev, len, p, *phys);
 625                        return NULL;
 626                }
 627                *(void **)metadata = s;
 628        }
 629        memset(p, 0, len);
 630        return p;
 631}
 632
 633/**
 634 *      t3_reset_qset - reset a sge qset
 635 *      @q: the queue set
 636 *
 637 *      Reset the qset structure.
 638 *      the NAPI structure is preserved in the event of
 639 *      the qset's reincarnation, for example during EEH recovery.
 640 */
 641static void t3_reset_qset(struct sge_qset *q)
 642{
 643        if (q->adap &&
 644            !(q->adap->flags & NAPI_INIT)) {
 645                memset(q, 0, sizeof(*q));
 646                return;
 647        }
 648
 649        q->adap = NULL;
 650        memset(&q->rspq, 0, sizeof(q->rspq));
 651        memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
 652        memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 653        q->txq_stopped = 0;
 654        q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
 655        q->rx_reclaim_timer.function = NULL;
 656        q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
 657}
 658
 659
 660/**
 661 *      free_qset - free the resources of an SGE queue set
 662 *      @adapter: the adapter owning the queue set
 663 *      @q: the queue set
 664 *
 665 *      Release the HW and SW resources associated with an SGE queue set, such
 666 *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 667 *      queue set must be quiesced prior to calling this.
 668 */
 669static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 670{
 671        int i;
 672        struct pci_dev *pdev = adapter->pdev;
 673
 674        for (i = 0; i < SGE_RXQ_PER_SET; ++i)
 675                if (q->fl[i].desc) {
 676                        spin_lock_irq(&adapter->sge.reg_lock);
 677                        t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
 678                        spin_unlock_irq(&adapter->sge.reg_lock);
 679                        free_rx_bufs(pdev, &q->fl[i]);
 680                        kfree(q->fl[i].sdesc);
 681                        dma_free_coherent(&pdev->dev,
 682                                          q->fl[i].size *
 683                                          sizeof(struct rx_desc), q->fl[i].desc,
 684                                          q->fl[i].phys_addr);
 685                }
 686
 687        for (i = 0; i < SGE_TXQ_PER_SET; ++i)
 688                if (q->txq[i].desc) {
 689                        spin_lock_irq(&adapter->sge.reg_lock);
 690                        t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
 691                        spin_unlock_irq(&adapter->sge.reg_lock);
 692                        if (q->txq[i].sdesc) {
 693                                free_tx_desc(adapter, &q->txq[i],
 694                                             q->txq[i].in_use);
 695                                kfree(q->txq[i].sdesc);
 696                        }
 697                        dma_free_coherent(&pdev->dev,
 698                                          q->txq[i].size *
 699                                          sizeof(struct tx_desc),
 700                                          q->txq[i].desc, q->txq[i].phys_addr);
 701                        __skb_queue_purge(&q->txq[i].sendq);
 702                }
 703
 704        if (q->rspq.desc) {
 705                spin_lock_irq(&adapter->sge.reg_lock);
 706                t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
 707                spin_unlock_irq(&adapter->sge.reg_lock);
 708                dma_free_coherent(&pdev->dev,
 709                                  q->rspq.size * sizeof(struct rsp_desc),
 710                                  q->rspq.desc, q->rspq.phys_addr);
 711        }
 712
 713        t3_reset_qset(q);
 714}
 715
 716/**
 717 *      init_qset_cntxt - initialize an SGE queue set context info
 718 *      @qs: the queue set
 719 *      @id: the queue set id
 720 *
 721 *      Initializes the TIDs and context ids for the queues of a queue set.
 722 */
 723static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
 724{
 725        qs->rspq.cntxt_id = id;
 726        qs->fl[0].cntxt_id = 2 * id;
 727        qs->fl[1].cntxt_id = 2 * id + 1;
 728        qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 729        qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 730        qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 731        qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 732        qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 733}
 734
 735/**
 736 *      sgl_len - calculates the size of an SGL of the given capacity
 737 *      @n: the number of SGL entries
 738 *
 739 *      Calculates the number of flits needed for a scatter/gather list that
 740 *      can hold the given number of entries.
 741 */
 742static inline unsigned int sgl_len(unsigned int n)
 743{
 744        /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
 745        return (3 * n) / 2 + (n & 1);
 746}
 747
 748/**
 749 *      flits_to_desc - returns the num of Tx descriptors for the given flits
 750 *      @n: the number of flits
 751 *
 752 *      Calculates the number of Tx descriptors needed for the supplied number
 753 *      of flits.
 754 */
 755static inline unsigned int flits_to_desc(unsigned int n)
 756{
 757        BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
 758        return flit_desc_map[n];
 759}
 760
 761/**
 762 *      get_packet - return the next ingress packet buffer from a free list
 763 *      @adap: the adapter that received the packet
 764 *      @fl: the SGE free list holding the packet
 765 *      @len: the packet length including any SGE padding
 766 *      @drop_thres: # of remaining buffers before we start dropping packets
 767 *
 768 *      Get the next packet from a free list and complete setup of the
 769 *      sk_buff.  If the packet is small we make a copy and recycle the
 770 *      original buffer, otherwise we use the original buffer itself.  If a
 771 *      positive drop threshold is supplied packets are dropped and their
 772 *      buffers recycled if (a) the number of remaining buffers is under the
 773 *      threshold and the packet is too big to copy, or (b) the packet should
 774 *      be copied but there is no memory for the copy.
 775 */
 776static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 777                                  unsigned int len, unsigned int drop_thres)
 778{
 779        struct sk_buff *skb = NULL;
 780        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 781
 782        prefetch(sd->skb->data);
 783        fl->credits--;
 784
 785        if (len <= SGE_RX_COPY_THRES) {
 786                skb = alloc_skb(len, GFP_ATOMIC);
 787                if (likely(skb != NULL)) {
 788                        __skb_put(skb, len);
 789                        pci_dma_sync_single_for_cpu(adap->pdev,
 790                                            pci_unmap_addr(sd, dma_addr), len,
 791                                            PCI_DMA_FROMDEVICE);
 792                        memcpy(skb->data, sd->skb->data, len);
 793                        pci_dma_sync_single_for_device(adap->pdev,
 794                                            pci_unmap_addr(sd, dma_addr), len,
 795                                            PCI_DMA_FROMDEVICE);
 796                } else if (!drop_thres)
 797                        goto use_orig_buf;
 798recycle:
 799                recycle_rx_buf(adap, fl, fl->cidx);
 800                return skb;
 801        }
 802
 803        if (unlikely(fl->credits < drop_thres) &&
 804            refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
 805                      GFP_ATOMIC | __GFP_COMP) == 0)
 806                goto recycle;
 807
 808use_orig_buf:
 809        pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
 810                         fl->buf_size, PCI_DMA_FROMDEVICE);
 811        skb = sd->skb;
 812        skb_put(skb, len);
 813        __refill_fl(adap, fl);
 814        return skb;
 815}
 816
 817/**
 818 *      get_packet_pg - return the next ingress packet buffer from a free list
 819 *      @adap: the adapter that received the packet
 820 *      @fl: the SGE free list holding the packet
 821 *      @len: the packet length including any SGE padding
 822 *      @drop_thres: # of remaining buffers before we start dropping packets
 823 *
 824 *      Get the next packet from a free list populated with page chunks.
 825 *      If the packet is small we make a copy and recycle the original buffer,
 826 *      otherwise we attach the original buffer as a page fragment to a fresh
 827 *      sk_buff.  If a positive drop threshold is supplied packets are dropped
 828 *      and their buffers recycled if (a) the number of remaining buffers is
 829 *      under the threshold and the packet is too big to copy, or (b) there's
 830 *      no system memory.
 831 *
 832 *      Note: this function is similar to @get_packet but deals with Rx buffers
 833 *      that are page chunks rather than sk_buffs.
 834 */
 835static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 836                                     struct sge_rspq *q, unsigned int len,
 837                                     unsigned int drop_thres)
 838{
 839        struct sk_buff *newskb, *skb;
 840        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 841
 842        dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr);
 843
 844        newskb = skb = q->pg_skb;
 845        if (!skb && (len <= SGE_RX_COPY_THRES)) {
 846                newskb = alloc_skb(len, GFP_ATOMIC);
 847                if (likely(newskb != NULL)) {
 848                        __skb_put(newskb, len);
 849                        pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 850                                            PCI_DMA_FROMDEVICE);
 851                        memcpy(newskb->data, sd->pg_chunk.va, len);
 852                        pci_dma_sync_single_for_device(adap->pdev, dma_addr,
 853                                                       len,
 854                                                       PCI_DMA_FROMDEVICE);
 855                } else if (!drop_thres)
 856                        return NULL;
 857recycle:
 858                fl->credits--;
 859                recycle_rx_buf(adap, fl, fl->cidx);
 860                q->rx_recycle_buf++;
 861                return newskb;
 862        }
 863
 864        if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
 865                goto recycle;
 866
 867        prefetch(sd->pg_chunk.p_cnt);
 868
 869        if (!skb)
 870                newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
 871
 872        if (unlikely(!newskb)) {
 873                if (!drop_thres)
 874                        return NULL;
 875                goto recycle;
 876        }
 877
 878        pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 879                                    PCI_DMA_FROMDEVICE);
 880        (*sd->pg_chunk.p_cnt)--;
 881        if (!*sd->pg_chunk.p_cnt)
 882                pci_unmap_page(adap->pdev,
 883                               sd->pg_chunk.mapping,
 884                               fl->alloc_size,
 885                               PCI_DMA_FROMDEVICE);
 886        if (!skb) {
 887                __skb_put(newskb, SGE_RX_PULL_LEN);
 888                memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
 889                skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
 890                                   sd->pg_chunk.offset + SGE_RX_PULL_LEN,
 891                                   len - SGE_RX_PULL_LEN);
 892                newskb->len = len;
 893                newskb->data_len = len - SGE_RX_PULL_LEN;
 894                newskb->truesize += newskb->data_len;
 895        } else {
 896                skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
 897                                   sd->pg_chunk.page,
 898                                   sd->pg_chunk.offset, len);
 899                newskb->len += len;
 900                newskb->data_len += len;
 901                newskb->truesize += len;
 902        }
 903
 904        fl->credits--;
 905        /*
 906         * We do not refill FLs here, we let the caller do it to overlap a
 907         * prefetch.
 908         */
 909        return newskb;
 910}
 911
 912/**
 913 *      get_imm_packet - return the next ingress packet buffer from a response
 914 *      @resp: the response descriptor containing the packet data
 915 *
 916 *      Return a packet containing the immediate data of the given response.
 917 */
 918static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 919{
 920        struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
 921
 922        if (skb) {
 923                __skb_put(skb, IMMED_PKT_SIZE);
 924                skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 925        }
 926        return skb;
 927}
 928
 929/**
 930 *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 931 *      @skb: the packet
 932 *
 933 *      Returns the number of Tx descriptors needed for the given Ethernet
 934 *      packet.  Ethernet packets require addition of WR and CPL headers.
 935 */
 936static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 937{
 938        unsigned int flits;
 939
 940        if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
 941                return 1;
 942
 943        flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
 944        if (skb_shinfo(skb)->gso_size)
 945                flits++;
 946        return flits_to_desc(flits);
 947}
 948
 949/**
 950 *      make_sgl - populate a scatter/gather list for a packet
 951 *      @skb: the packet
 952 *      @sgp: the SGL to populate
 953 *      @start: start address of skb main body data to include in the SGL
 954 *      @len: length of skb main body data to include in the SGL
 955 *      @pdev: the PCI device
 956 *
 957 *      Generates a scatter/gather list for the buffers that make up a packet
 958 *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 959 *      appropriately.
 960 */
 961static inline unsigned int make_sgl(const struct sk_buff *skb,
 962                                    struct sg_ent *sgp, unsigned char *start,
 963                                    unsigned int len, struct pci_dev *pdev)
 964{
 965        dma_addr_t mapping;
 966        unsigned int i, j = 0, nfrags;
 967
 968        if (len) {
 969                mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 970                sgp->len[0] = cpu_to_be32(len);
 971                sgp->addr[0] = cpu_to_be64(mapping);
 972                j = 1;
 973        }
 974
 975        nfrags = skb_shinfo(skb)->nr_frags;
 976        for (i = 0; i < nfrags; i++) {
 977                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 978
 979                mapping = pci_map_page(pdev, frag->page, frag->page_offset,
 980                                       frag->size, PCI_DMA_TODEVICE);
 981                sgp->len[j] = cpu_to_be32(frag->size);
 982                sgp->addr[j] = cpu_to_be64(mapping);
 983                j ^= 1;
 984                if (j == 0)
 985                        ++sgp;
 986        }
 987        if (j)
 988                sgp->len[j] = 0;
 989        return ((nfrags + (len != 0)) * 3) / 2 + j;
 990}
 991
 992/**
 993 *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 994 *      @adap: the adapter
 995 *      @q: the Tx queue
 996 *
 997 *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
 998 *      where the HW is going to sleep just after we checked, however,
 999 *      then the interrupt handler will detect the outstanding TX packet
1000 *      and ring the doorbell for us.
1001 *
1002 *      When GTS is disabled we unconditionally ring the doorbell.
1003 */
1004static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1005{
1006#if USE_GTS
1007        clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1008        if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1009                set_bit(TXQ_LAST_PKT_DB, &q->flags);
1010                t3_write_reg(adap, A_SG_KDOORBELL,
1011                             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1012        }
1013#else
1014        wmb();                  /* write descriptors before telling HW */
1015        t3_write_reg(adap, A_SG_KDOORBELL,
1016                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1017#endif
1018}
1019
1020static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1021{
1022#if SGE_NUM_GENBITS == 2
1023        d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1024#endif
1025}
1026
1027/**
1028 *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
1029 *      @ndesc: number of Tx descriptors spanned by the SGL
1030 *      @skb: the packet corresponding to the WR
1031 *      @d: first Tx descriptor to be written
1032 *      @pidx: index of above descriptors
1033 *      @q: the SGE Tx queue
1034 *      @sgl: the SGL
1035 *      @flits: number of flits to the start of the SGL in the first descriptor
1036 *      @sgl_flits: the SGL size in flits
1037 *      @gen: the Tx descriptor generation
1038 *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
1039 *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
1040 *
1041 *      Write a work request header and an associated SGL.  If the SGL is
1042 *      small enough to fit into one Tx descriptor it has already been written
1043 *      and we just need to write the WR header.  Otherwise we distribute the
1044 *      SGL across the number of descriptors it spans.
1045 */
1046static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1047                             struct tx_desc *d, unsigned int pidx,
1048                             const struct sge_txq *q,
1049                             const struct sg_ent *sgl,
1050                             unsigned int flits, unsigned int sgl_flits,
1051                             unsigned int gen, __be32 wr_hi,
1052                             __be32 wr_lo)
1053{
1054        struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1055        struct tx_sw_desc *sd = &q->sdesc[pidx];
1056
1057        sd->skb = skb;
1058        if (need_skb_unmap()) {
1059                sd->fragidx = 0;
1060                sd->addr_idx = 0;
1061                sd->sflit = flits;
1062        }
1063
1064        if (likely(ndesc == 1)) {
1065                sd->eop = 1;
1066                wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1067                                   V_WR_SGLSFLT(flits)) | wr_hi;
1068                wmb();
1069                wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1070                                   V_WR_GEN(gen)) | wr_lo;
1071                wr_gen2(d, gen);
1072        } else {
1073                unsigned int ogen = gen;
1074                const u64 *fp = (const u64 *)sgl;
1075                struct work_request_hdr *wp = wrp;
1076
1077                wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1078                                   V_WR_SGLSFLT(flits)) | wr_hi;
1079
1080                while (sgl_flits) {
1081                        unsigned int avail = WR_FLITS - flits;
1082
1083                        if (avail > sgl_flits)
1084                                avail = sgl_flits;
1085                        memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1086                        sgl_flits -= avail;
1087                        ndesc--;
1088                        if (!sgl_flits)
1089                                break;
1090
1091                        fp += avail;
1092                        d++;
1093                        sd->eop = 0;
1094                        sd++;
1095                        if (++pidx == q->size) {
1096                                pidx = 0;
1097                                gen ^= 1;
1098                                d = q->desc;
1099                                sd = q->sdesc;
1100                        }
1101
1102                        sd->skb = skb;
1103                        wrp = (struct work_request_hdr *)d;
1104                        wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1105                                           V_WR_SGLSFLT(1)) | wr_hi;
1106                        wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1107                                                        sgl_flits + 1)) |
1108                                           V_WR_GEN(gen)) | wr_lo;
1109                        wr_gen2(d, gen);
1110                        flits = 1;
1111                }
1112                sd->eop = 1;
1113                wrp->wr_hi |= htonl(F_WR_EOP);
1114                wmb();
1115                wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1116                wr_gen2((struct tx_desc *)wp, ogen);
1117                WARN_ON(ndesc != 0);
1118        }
1119}
1120
1121/**
1122 *      write_tx_pkt_wr - write a TX_PKT work request
1123 *      @adap: the adapter
1124 *      @skb: the packet to send
1125 *      @pi: the egress interface
1126 *      @pidx: index of the first Tx descriptor to write
1127 *      @gen: the generation value to use
1128 *      @q: the Tx queue
1129 *      @ndesc: number of descriptors the packet will occupy
1130 *      @compl: the value of the COMPL bit to use
1131 *
1132 *      Generate a TX_PKT work request to send the supplied packet.
1133 */
1134static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1135                            const struct port_info *pi,
1136                            unsigned int pidx, unsigned int gen,
1137                            struct sge_txq *q, unsigned int ndesc,
1138                            unsigned int compl)
1139{
1140        unsigned int flits, sgl_flits, cntrl, tso_info;
1141        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1142        struct tx_desc *d = &q->desc[pidx];
1143        struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1144
1145        cpl->len = htonl(skb->len);
1146        cntrl = V_TXPKT_INTF(pi->port_id);
1147
1148        if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1149                cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1150
1151        tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1152        if (tso_info) {
1153                int eth_type;
1154                struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1155
1156                d->flit[2] = 0;
1157                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1158                hdr->cntrl = htonl(cntrl);
1159                eth_type = skb_network_offset(skb) == ETH_HLEN ?
1160                    CPL_ETH_II : CPL_ETH_II_VLAN;
1161                tso_info |= V_LSO_ETH_TYPE(eth_type) |
1162                    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1163                    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1164                hdr->lso_info = htonl(tso_info);
1165                flits = 3;
1166        } else {
1167                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1168                cntrl |= F_TXPKT_IPCSUM_DIS;    /* SW calculates IP csum */
1169                cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1170                cpl->cntrl = htonl(cntrl);
1171
1172                if (skb->len <= WR_LEN - sizeof(*cpl)) {
1173                        q->sdesc[pidx].skb = NULL;
1174                        if (!skb->data_len)
1175                                skb_copy_from_linear_data(skb, &d->flit[2],
1176                                                          skb->len);
1177                        else
1178                                skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1179
1180                        flits = (skb->len + 7) / 8 + 2;
1181                        cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1182                                              V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1183                                              | F_WR_SOP | F_WR_EOP | compl);
1184                        wmb();
1185                        cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1186                                              V_WR_TID(q->token));
1187                        wr_gen2(d, gen);
1188                        kfree_skb(skb);
1189                        return;
1190                }
1191
1192                flits = 2;
1193        }
1194
1195        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1196        sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1197
1198        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1199                         htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1200                         htonl(V_WR_TID(q->token)));
1201}
1202
1203static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1204                                    struct sge_qset *qs, struct sge_txq *q)
1205{
1206        netif_tx_stop_queue(txq);
1207        set_bit(TXQ_ETH, &qs->txq_stopped);
1208        q->stops++;
1209}
1210
1211/**
1212 *      eth_xmit - add a packet to the Ethernet Tx queue
1213 *      @skb: the packet
1214 *      @dev: the egress net device
1215 *
1216 *      Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1217 */
1218int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1219{
1220        int qidx;
1221        unsigned int ndesc, pidx, credits, gen, compl;
1222        const struct port_info *pi = netdev_priv(dev);
1223        struct adapter *adap = pi->adapter;
1224        struct netdev_queue *txq;
1225        struct sge_qset *qs;
1226        struct sge_txq *q;
1227
1228        /*
1229         * The chip min packet length is 9 octets but play safe and reject
1230         * anything shorter than an Ethernet header.
1231         */
1232        if (unlikely(skb->len < ETH_HLEN)) {
1233                dev_kfree_skb(skb);
1234                return NETDEV_TX_OK;
1235        }
1236
1237        qidx = skb_get_queue_mapping(skb);
1238        qs = &pi->qs[qidx];
1239        q = &qs->txq[TXQ_ETH];
1240        txq = netdev_get_tx_queue(dev, qidx);
1241
1242        spin_lock(&q->lock);
1243        reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1244
1245        credits = q->size - q->in_use;
1246        ndesc = calc_tx_descs(skb);
1247
1248        if (unlikely(credits < ndesc)) {
1249                t3_stop_tx_queue(txq, qs, q);
1250                dev_err(&adap->pdev->dev,
1251                        "%s: Tx ring %u full while queue awake!\n",
1252                        dev->name, q->cntxt_id & 7);
1253                spin_unlock(&q->lock);
1254                return NETDEV_TX_BUSY;
1255        }
1256
1257        q->in_use += ndesc;
1258        if (unlikely(credits - ndesc < q->stop_thres)) {
1259                t3_stop_tx_queue(txq, qs, q);
1260
1261                if (should_restart_tx(q) &&
1262                    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1263                        q->restarts++;
1264                        netif_tx_wake_queue(txq);
1265                }
1266        }
1267
1268        gen = q->gen;
1269        q->unacked += ndesc;
1270        compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1271        q->unacked &= 7;
1272        pidx = q->pidx;
1273        q->pidx += ndesc;
1274        if (q->pidx >= q->size) {
1275                q->pidx -= q->size;
1276                q->gen ^= 1;
1277        }
1278
1279        /* update port statistics */
1280        if (skb->ip_summed == CHECKSUM_COMPLETE)
1281                qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1282        if (skb_shinfo(skb)->gso_size)
1283                qs->port_stats[SGE_PSTAT_TSO]++;
1284        if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1285                qs->port_stats[SGE_PSTAT_VLANINS]++;
1286
1287        dev->trans_start = jiffies;
1288        spin_unlock(&q->lock);
1289
1290        /*
1291         * We do not use Tx completion interrupts to free DMAd Tx packets.
1292         * This is good for performamce but means that we rely on new Tx
1293         * packets arriving to run the destructors of completed packets,
1294         * which open up space in their sockets' send queues.  Sometimes
1295         * we do not get such new packets causing Tx to stall.  A single
1296         * UDP transmitter is a good example of this situation.  We have
1297         * a clean up timer that periodically reclaims completed packets
1298         * but it doesn't run often enough (nor do we want it to) to prevent
1299         * lengthy stalls.  A solution to this problem is to run the
1300         * destructor early, after the packet is queued but before it's DMAd.
1301         * A cons is that we lie to socket memory accounting, but the amount
1302         * of extra memory is reasonable (limited by the number of Tx
1303         * descriptors), the packets do actually get freed quickly by new
1304         * packets almost always, and for protocols like TCP that wait for
1305         * acks to really free up the data the extra memory is even less.
1306         * On the positive side we run the destructors on the sending CPU
1307         * rather than on a potentially different completing CPU, usually a
1308         * good thing.  We also run them without holding our Tx queue lock,
1309         * unlike what reclaim_completed_tx() would otherwise do.
1310         *
1311         * Run the destructor before telling the DMA engine about the packet
1312         * to make sure it doesn't complete and get freed prematurely.
1313         */
1314        if (likely(!skb_shared(skb)))
1315                skb_orphan(skb);
1316
1317        write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1318        check_ring_tx_db(adap, q);
1319        return NETDEV_TX_OK;
1320}
1321
1322/**
1323 *      write_imm - write a packet into a Tx descriptor as immediate data
1324 *      @d: the Tx descriptor to write
1325 *      @skb: the packet
1326 *      @len: the length of packet data to write as immediate data
1327 *      @gen: the generation bit value to write
1328 *
1329 *      Writes a packet as immediate data into a Tx descriptor.  The packet
1330 *      contains a work request at its beginning.  We must write the packet
1331 *      carefully so the SGE doesn't read it accidentally before it's written
1332 *      in its entirety.
1333 */
1334static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1335                             unsigned int len, unsigned int gen)
1336{
1337        struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1338        struct work_request_hdr *to = (struct work_request_hdr *)d;
1339
1340        if (likely(!skb->data_len))
1341                memcpy(&to[1], &from[1], len - sizeof(*from));
1342        else
1343                skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1344
1345        to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1346                                        V_WR_BCNTLFLT(len & 7));
1347        wmb();
1348        to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1349                                        V_WR_LEN((len + 7) / 8));
1350        wr_gen2(d, gen);
1351        kfree_skb(skb);
1352}
1353
1354/**
1355 *      check_desc_avail - check descriptor availability on a send queue
1356 *      @adap: the adapter
1357 *      @q: the send queue
1358 *      @skb: the packet needing the descriptors
1359 *      @ndesc: the number of Tx descriptors needed
1360 *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1361 *
1362 *      Checks if the requested number of Tx descriptors is available on an
1363 *      SGE send queue.  If the queue is already suspended or not enough
1364 *      descriptors are available the packet is queued for later transmission.
1365 *      Must be called with the Tx queue locked.
1366 *
1367 *      Returns 0 if enough descriptors are available, 1 if there aren't
1368 *      enough descriptors and the packet has been queued, and 2 if the caller
1369 *      needs to retry because there weren't enough descriptors at the
1370 *      beginning of the call but some freed up in the mean time.
1371 */
1372static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1373                                   struct sk_buff *skb, unsigned int ndesc,
1374                                   unsigned int qid)
1375{
1376        if (unlikely(!skb_queue_empty(&q->sendq))) {
1377              addq_exit:__skb_queue_tail(&q->sendq, skb);
1378                return 1;
1379        }
1380        if (unlikely(q->size - q->in_use < ndesc)) {
1381                struct sge_qset *qs = txq_to_qset(q, qid);
1382
1383                set_bit(qid, &qs->txq_stopped);
1384                smp_mb__after_clear_bit();
1385
1386                if (should_restart_tx(q) &&
1387                    test_and_clear_bit(qid, &qs->txq_stopped))
1388                        return 2;
1389
1390                q->stops++;
1391                goto addq_exit;
1392        }
1393        return 0;
1394}
1395
1396/**
1397 *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1398 *      @q: the SGE control Tx queue
1399 *
1400 *      This is a variant of reclaim_completed_tx() that is used for Tx queues
1401 *      that send only immediate data (presently just the control queues) and
1402 *      thus do not have any sk_buffs to release.
1403 */
1404static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1405{
1406        unsigned int reclaim = q->processed - q->cleaned;
1407
1408        q->in_use -= reclaim;
1409        q->cleaned += reclaim;
1410}
1411
1412static inline int immediate(const struct sk_buff *skb)
1413{
1414        return skb->len <= WR_LEN;
1415}
1416
1417/**
1418 *      ctrl_xmit - send a packet through an SGE control Tx queue
1419 *      @adap: the adapter
1420 *      @q: the control queue
1421 *      @skb: the packet
1422 *
1423 *      Send a packet through an SGE control Tx queue.  Packets sent through
1424 *      a control queue must fit entirely as immediate data in a single Tx
1425 *      descriptor and have no page fragments.
1426 */
1427static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1428                     struct sk_buff *skb)
1429{
1430        int ret;
1431        struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1432
1433        if (unlikely(!immediate(skb))) {
1434                WARN_ON(1);
1435                dev_kfree_skb(skb);
1436                return NET_XMIT_SUCCESS;
1437        }
1438
1439        wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1440        wrp->wr_lo = htonl(V_WR_TID(q->token));
1441
1442        spin_lock(&q->lock);
1443      again:reclaim_completed_tx_imm(q);
1444
1445        ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1446        if (unlikely(ret)) {
1447                if (ret == 1) {
1448                        spin_unlock(&q->lock);
1449                        return NET_XMIT_CN;
1450                }
1451                goto again;
1452        }
1453
1454        write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1455
1456        q->in_use++;
1457        if (++q->pidx >= q->size) {
1458                q->pidx = 0;
1459                q->gen ^= 1;
1460        }
1461        spin_unlock(&q->lock);
1462        wmb();
1463        t3_write_reg(adap, A_SG_KDOORBELL,
1464                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1465        return NET_XMIT_SUCCESS;
1466}
1467
1468/**
1469 *      restart_ctrlq - restart a suspended control queue
1470 *      @qs: the queue set cotaining the control queue
1471 *
1472 *      Resumes transmission on a suspended Tx control queue.
1473 */
1474static void restart_ctrlq(unsigned long data)
1475{
1476        struct sk_buff *skb;
1477        struct sge_qset *qs = (struct sge_qset *)data;
1478        struct sge_txq *q = &qs->txq[TXQ_CTRL];
1479
1480        spin_lock(&q->lock);
1481      again:reclaim_completed_tx_imm(q);
1482
1483        while (q->in_use < q->size &&
1484               (skb = __skb_dequeue(&q->sendq)) != NULL) {
1485
1486                write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1487
1488                if (++q->pidx >= q->size) {
1489                        q->pidx = 0;
1490                        q->gen ^= 1;
1491                }
1492                q->in_use++;
1493        }
1494
1495        if (!skb_queue_empty(&q->sendq)) {
1496                set_bit(TXQ_CTRL, &qs->txq_stopped);
1497                smp_mb__after_clear_bit();
1498
1499                if (should_restart_tx(q) &&
1500                    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1501                        goto again;
1502                q->stops++;
1503        }
1504
1505        spin_unlock(&q->lock);
1506        wmb();
1507        t3_write_reg(qs->adap, A_SG_KDOORBELL,
1508                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1509}
1510
1511/*
1512 * Send a management message through control queue 0
1513 */
1514int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1515{
1516        int ret;
1517        local_bh_disable();
1518        ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1519        local_bh_enable();
1520
1521        return ret;
1522}
1523
1524/**
1525 *      deferred_unmap_destructor - unmap a packet when it is freed
1526 *      @skb: the packet
1527 *
1528 *      This is the packet destructor used for Tx packets that need to remain
1529 *      mapped until they are freed rather than until their Tx descriptors are
1530 *      freed.
1531 */
1532static void deferred_unmap_destructor(struct sk_buff *skb)
1533{
1534        int i;
1535        const dma_addr_t *p;
1536        const struct skb_shared_info *si;
1537        const struct deferred_unmap_info *dui;
1538
1539        dui = (struct deferred_unmap_info *)skb->head;
1540        p = dui->addr;
1541
1542        if (skb->tail - skb->transport_header)
1543                pci_unmap_single(dui->pdev, *p++,
1544                                 skb->tail - skb->transport_header,
1545                                 PCI_DMA_TODEVICE);
1546
1547        si = skb_shinfo(skb);
1548        for (i = 0; i < si->nr_frags; i++)
1549                pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1550                               PCI_DMA_TODEVICE);
1551}
1552
1553static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1554                                     const struct sg_ent *sgl, int sgl_flits)
1555{
1556        dma_addr_t *p;
1557        struct deferred_unmap_info *dui;
1558
1559        dui = (struct deferred_unmap_info *)skb->head;
1560        dui->pdev = pdev;
1561        for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1562                *p++ = be64_to_cpu(sgl->addr[0]);
1563                *p++ = be64_to_cpu(sgl->addr[1]);
1564        }
1565        if (sgl_flits)
1566                *p = be64_to_cpu(sgl->addr[0]);
1567}
1568
1569/**
1570 *      write_ofld_wr - write an offload work request
1571 *      @adap: the adapter
1572 *      @skb: the packet to send
1573 *      @q: the Tx queue
1574 *      @pidx: index of the first Tx descriptor to write
1575 *      @gen: the generation value to use
1576 *      @ndesc: number of descriptors the packet will occupy
1577 *
1578 *      Write an offload work request to send the supplied packet.  The packet
1579 *      data already carry the work request with most fields populated.
1580 */
1581static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1582                          struct sge_txq *q, unsigned int pidx,
1583                          unsigned int gen, unsigned int ndesc)
1584{
1585        unsigned int sgl_flits, flits;
1586        struct work_request_hdr *from;
1587        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1588        struct tx_desc *d = &q->desc[pidx];
1589
1590        if (immediate(skb)) {
1591                q->sdesc[pidx].skb = NULL;
1592                write_imm(d, skb, skb->len, gen);
1593                return;
1594        }
1595
1596        /* Only TX_DATA builds SGLs */
1597
1598        from = (struct work_request_hdr *)skb->data;
1599        memcpy(&d->flit[1], &from[1],
1600               skb_transport_offset(skb) - sizeof(*from));
1601
1602        flits = skb_transport_offset(skb) / 8;
1603        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1604        sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1605                             skb->tail - skb->transport_header,
1606                             adap->pdev);
1607        if (need_skb_unmap()) {
1608                setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1609                skb->destructor = deferred_unmap_destructor;
1610        }
1611
1612        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1613                         gen, from->wr_hi, from->wr_lo);
1614}
1615
1616/**
1617 *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1618 *      @skb: the packet
1619 *
1620 *      Returns the number of Tx descriptors needed for the given offload
1621 *      packet.  These packets are already fully constructed.
1622 */
1623static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1624{
1625        unsigned int flits, cnt;
1626
1627        if (skb->len <= WR_LEN)
1628                return 1;       /* packet fits as immediate data */
1629
1630        flits = skb_transport_offset(skb) / 8;  /* headers */
1631        cnt = skb_shinfo(skb)->nr_frags;
1632        if (skb->tail != skb->transport_header)
1633                cnt++;
1634        return flits_to_desc(flits + sgl_len(cnt));
1635}
1636
1637/**
1638 *      ofld_xmit - send a packet through an offload queue
1639 *      @adap: the adapter
1640 *      @q: the Tx offload queue
1641 *      @skb: the packet
1642 *
1643 *      Send an offload packet through an SGE offload queue.
1644 */
1645static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1646                     struct sk_buff *skb)
1647{
1648        int ret;
1649        unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1650
1651        spin_lock(&q->lock);
1652again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1653
1654        ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1655        if (unlikely(ret)) {
1656                if (ret == 1) {
1657                        skb->priority = ndesc;  /* save for restart */
1658                        spin_unlock(&q->lock);
1659                        return NET_XMIT_CN;
1660                }
1661                goto again;
1662        }
1663
1664        gen = q->gen;
1665        q->in_use += ndesc;
1666        pidx = q->pidx;
1667        q->pidx += ndesc;
1668        if (q->pidx >= q->size) {
1669                q->pidx -= q->size;
1670                q->gen ^= 1;
1671        }
1672        spin_unlock(&q->lock);
1673
1674        write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1675        check_ring_tx_db(adap, q);
1676        return NET_XMIT_SUCCESS;
1677}
1678
1679/**
1680 *      restart_offloadq - restart a suspended offload queue
1681 *      @qs: the queue set cotaining the offload queue
1682 *
1683 *      Resumes transmission on a suspended Tx offload queue.
1684 */
1685static void restart_offloadq(unsigned long data)
1686{
1687        struct sk_buff *skb;
1688        struct sge_qset *qs = (struct sge_qset *)data;
1689        struct sge_txq *q = &qs->txq[TXQ_OFLD];
1690        const struct port_info *pi = netdev_priv(qs->netdev);
1691        struct adapter *adap = pi->adapter;
1692
1693        spin_lock(&q->lock);
1694again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1695
1696        while ((skb = skb_peek(&q->sendq)) != NULL) {
1697                unsigned int gen, pidx;
1698                unsigned int ndesc = skb->priority;
1699
1700                if (unlikely(q->size - q->in_use < ndesc)) {
1701                        set_bit(TXQ_OFLD, &qs->txq_stopped);
1702                        smp_mb__after_clear_bit();
1703
1704                        if (should_restart_tx(q) &&
1705                            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1706                                goto again;
1707                        q->stops++;
1708                        break;
1709                }
1710
1711                gen = q->gen;
1712                q->in_use += ndesc;
1713                pidx = q->pidx;
1714                q->pidx += ndesc;
1715                if (q->pidx >= q->size) {
1716                        q->pidx -= q->size;
1717                        q->gen ^= 1;
1718                }
1719                __skb_unlink(skb, &q->sendq);
1720                spin_unlock(&q->lock);
1721
1722                write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1723                spin_lock(&q->lock);
1724        }
1725        spin_unlock(&q->lock);
1726
1727#if USE_GTS
1728        set_bit(TXQ_RUNNING, &q->flags);
1729        set_bit(TXQ_LAST_PKT_DB, &q->flags);
1730#endif
1731        wmb();
1732        t3_write_reg(adap, A_SG_KDOORBELL,
1733                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1734}
1735
1736/**
1737 *      queue_set - return the queue set a packet should use
1738 *      @skb: the packet
1739 *
1740 *      Maps a packet to the SGE queue set it should use.  The desired queue
1741 *      set is carried in bits 1-3 in the packet's priority.
1742 */
1743static inline int queue_set(const struct sk_buff *skb)
1744{
1745        return skb->priority >> 1;
1746}
1747
1748/**
1749 *      is_ctrl_pkt - return whether an offload packet is a control packet
1750 *      @skb: the packet
1751 *
1752 *      Determines whether an offload packet should use an OFLD or a CTRL
1753 *      Tx queue.  This is indicated by bit 0 in the packet's priority.
1754 */
1755static inline int is_ctrl_pkt(const struct sk_buff *skb)
1756{
1757        return skb->priority & 1;
1758}
1759
1760/**
1761 *      t3_offload_tx - send an offload packet
1762 *      @tdev: the offload device to send to
1763 *      @skb: the packet
1764 *
1765 *      Sends an offload packet.  We use the packet priority to select the
1766 *      appropriate Tx queue as follows: bit 0 indicates whether the packet
1767 *      should be sent as regular or control, bits 1-3 select the queue set.
1768 */
1769int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1770{
1771        struct adapter *adap = tdev2adap(tdev);
1772        struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1773
1774        if (unlikely(is_ctrl_pkt(skb)))
1775                return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1776
1777        return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1778}
1779
1780/**
1781 *      offload_enqueue - add an offload packet to an SGE offload receive queue
1782 *      @q: the SGE response queue
1783 *      @skb: the packet
1784 *
1785 *      Add a new offload packet to an SGE response queue's offload packet
1786 *      queue.  If the packet is the first on the queue it schedules the RX
1787 *      softirq to process the queue.
1788 */
1789static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1790{
1791        int was_empty = skb_queue_empty(&q->rx_queue);
1792
1793        __skb_queue_tail(&q->rx_queue, skb);
1794
1795        if (was_empty) {
1796                struct sge_qset *qs = rspq_to_qset(q);
1797
1798                napi_schedule(&qs->napi);
1799        }
1800}
1801
1802/**
1803 *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1804 *      @tdev: the offload device that will be receiving the packets
1805 *      @q: the SGE response queue that assembled the bundle
1806 *      @skbs: the partial bundle
1807 *      @n: the number of packets in the bundle
1808 *
1809 *      Delivers a (partial) bundle of Rx offload packets to an offload device.
1810 */
1811static inline void deliver_partial_bundle(struct t3cdev *tdev,
1812                                          struct sge_rspq *q,
1813                                          struct sk_buff *skbs[], int n)
1814{
1815        if (n) {
1816                q->offload_bundles++;
1817                tdev->recv(tdev, skbs, n);
1818        }
1819}
1820
1821/**
1822 *      ofld_poll - NAPI handler for offload packets in interrupt mode
1823 *      @dev: the network device doing the polling
1824 *      @budget: polling budget
1825 *
1826 *      The NAPI handler for offload packets when a response queue is serviced
1827 *      by the hard interrupt handler, i.e., when it's operating in non-polling
1828 *      mode.  Creates small packet batches and sends them through the offload
1829 *      receive handler.  Batches need to be of modest size as we do prefetches
1830 *      on the packets in each.
1831 */
1832static int ofld_poll(struct napi_struct *napi, int budget)
1833{
1834        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1835        struct sge_rspq *q = &qs->rspq;
1836        struct adapter *adapter = qs->adap;
1837        int work_done = 0;
1838
1839        while (work_done < budget) {
1840                struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1841                struct sk_buff_head queue;
1842                int ngathered;
1843
1844                spin_lock_irq(&q->lock);
1845                __skb_queue_head_init(&queue);
1846                skb_queue_splice_init(&q->rx_queue, &queue);
1847                if (skb_queue_empty(&queue)) {
1848                        napi_complete(napi);
1849                        spin_unlock_irq(&q->lock);
1850                        return work_done;
1851                }
1852                spin_unlock_irq(&q->lock);
1853
1854                ngathered = 0;
1855                skb_queue_walk_safe(&queue, skb, tmp) {
1856                        if (work_done >= budget)
1857                                break;
1858                        work_done++;
1859
1860                        __skb_unlink(skb, &queue);
1861                        prefetch(skb->data);
1862                        skbs[ngathered] = skb;
1863                        if (++ngathered == RX_BUNDLE_SIZE) {
1864                                q->offload_bundles++;
1865                                adapter->tdev.recv(&adapter->tdev, skbs,
1866                                                   ngathered);
1867                                ngathered = 0;
1868                        }
1869                }
1870                if (!skb_queue_empty(&queue)) {
1871                        /* splice remaining packets back onto Rx queue */
1872                        spin_lock_irq(&q->lock);
1873                        skb_queue_splice(&queue, &q->rx_queue);
1874                        spin_unlock_irq(&q->lock);
1875                }
1876                deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1877        }
1878
1879        return work_done;
1880}
1881
1882/**
1883 *      rx_offload - process a received offload packet
1884 *      @tdev: the offload device receiving the packet
1885 *      @rq: the response queue that received the packet
1886 *      @skb: the packet
1887 *      @rx_gather: a gather list of packets if we are building a bundle
1888 *      @gather_idx: index of the next available slot in the bundle
1889 *
1890 *      Process an ingress offload pakcet and add it to the offload ingress
1891 *      queue.  Returns the index of the next available slot in the bundle.
1892 */
1893static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1894                             struct sk_buff *skb, struct sk_buff *rx_gather[],
1895                             unsigned int gather_idx)
1896{
1897        skb_reset_mac_header(skb);
1898        skb_reset_network_header(skb);
1899        skb_reset_transport_header(skb);
1900
1901        if (rq->polling) {
1902                rx_gather[gather_idx++] = skb;
1903                if (gather_idx == RX_BUNDLE_SIZE) {
1904                        tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1905                        gather_idx = 0;
1906                        rq->offload_bundles++;
1907                }
1908        } else
1909                offload_enqueue(rq, skb);
1910
1911        return gather_idx;
1912}
1913
1914/**
1915 *      restart_tx - check whether to restart suspended Tx queues
1916 *      @qs: the queue set to resume
1917 *
1918 *      Restarts suspended Tx queues of an SGE queue set if they have enough
1919 *      free resources to resume operation.
1920 */
1921static void restart_tx(struct sge_qset *qs)
1922{
1923        if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1924            should_restart_tx(&qs->txq[TXQ_ETH]) &&
1925            test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1926                qs->txq[TXQ_ETH].restarts++;
1927                if (netif_running(qs->netdev))
1928                        netif_tx_wake_queue(qs->tx_q);
1929        }
1930
1931        if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1932            should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1933            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1934                qs->txq[TXQ_OFLD].restarts++;
1935                tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1936        }
1937        if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1938            should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1939            test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1940                qs->txq[TXQ_CTRL].restarts++;
1941                tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1942        }
1943}
1944
1945/**
1946 *      cxgb3_arp_process - process an ARP request probing a private IP address
1947 *      @adapter: the adapter
1948 *      @skb: the skbuff containing the ARP request
1949 *
1950 *      Check if the ARP request is probing the private IP address
1951 *      dedicated to iSCSI, generate an ARP reply if so.
1952 */
1953static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1954{
1955        struct net_device *dev = skb->dev;
1956        struct port_info *pi;
1957        struct arphdr *arp;
1958        unsigned char *arp_ptr;
1959        unsigned char *sha;
1960        __be32 sip, tip;
1961
1962        if (!dev)
1963                return;
1964
1965        skb_reset_network_header(skb);
1966        arp = arp_hdr(skb);
1967
1968        if (arp->ar_op != htons(ARPOP_REQUEST))
1969                return;
1970
1971        arp_ptr = (unsigned char *)(arp + 1);
1972        sha = arp_ptr;
1973        arp_ptr += dev->addr_len;
1974        memcpy(&sip, arp_ptr, sizeof(sip));
1975        arp_ptr += sizeof(sip);
1976        arp_ptr += dev->addr_len;
1977        memcpy(&tip, arp_ptr, sizeof(tip));
1978
1979        pi = netdev_priv(dev);
1980        if (tip != pi->iscsi_ipv4addr)
1981                return;
1982
1983        arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1984                 dev->dev_addr, sha);
1985
1986}
1987
1988static inline int is_arp(struct sk_buff *skb)
1989{
1990        return skb->protocol == htons(ETH_P_ARP);
1991}
1992
1993/**
1994 *      rx_eth - process an ingress ethernet packet
1995 *      @adap: the adapter
1996 *      @rq: the response queue that received the packet
1997 *      @skb: the packet
1998 *      @pad: amount of padding at the start of the buffer
1999 *
2000 *      Process an ingress ethernet pakcet and deliver it to the stack.
2001 *      The padding is 2 if the packet was delivered in an Rx buffer and 0
2002 *      if it was immediate data in a response.
2003 */
2004static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2005                   struct sk_buff *skb, int pad, int lro)
2006{
2007        struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2008        struct sge_qset *qs = rspq_to_qset(rq);
2009        struct port_info *pi;
2010
2011        skb_pull(skb, sizeof(*p) + pad);
2012        skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2013        pi = netdev_priv(skb->dev);
2014        if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
2015            p->csum == htons(0xffff) && !p->fragment) {
2016                qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2017                skb->ip_summed = CHECKSUM_UNNECESSARY;
2018        } else
2019                skb->ip_summed = CHECKSUM_NONE;
2020        skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2021
2022        if (unlikely(p->vlan_valid)) {
2023                struct vlan_group *grp = pi->vlan_grp;
2024
2025                qs->port_stats[SGE_PSTAT_VLANEX]++;
2026                if (likely(grp))
2027                        if (lro)
2028                                vlan_gro_receive(&qs->napi, grp,
2029                                                 ntohs(p->vlan), skb);
2030                        else {
2031                                if (unlikely(pi->iscsi_ipv4addr &&
2032                                    is_arp(skb))) {
2033                                        unsigned short vtag = ntohs(p->vlan) &
2034                                                                VLAN_VID_MASK;
2035                                        skb->dev = vlan_group_get_device(grp,
2036                                                                         vtag);
2037                                        cxgb3_arp_process(adap, skb);
2038                                }
2039                                __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
2040                                                  rq->polling);
2041                        }
2042                else
2043                        dev_kfree_skb_any(skb);
2044        } else if (rq->polling) {
2045                if (lro)
2046                        napi_gro_receive(&qs->napi, skb);
2047                else {
2048                        if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
2049                                cxgb3_arp_process(adap, skb);
2050                        netif_receive_skb(skb);
2051                }
2052        } else
2053                netif_rx(skb);
2054}
2055
2056static inline int is_eth_tcp(u32 rss)
2057{
2058        return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2059}
2060
2061/**
2062 *      lro_add_page - add a page chunk to an LRO session
2063 *      @adap: the adapter
2064 *      @qs: the associated queue set
2065 *      @fl: the free list containing the page chunk to add
2066 *      @len: packet length
2067 *      @complete: Indicates the last fragment of a frame
2068 *
2069 *      Add a received packet contained in a page chunk to an existing LRO
2070 *      session.
2071 */
2072static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2073                         struct sge_fl *fl, int len, int complete)
2074{
2075        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2076        struct cpl_rx_pkt *cpl;
2077        struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2078        int nr_frags = qs->lro_frag_tbl.nr_frags;
2079        int frag_len = qs->lro_frag_tbl.len;
2080        int offset = 0;
2081
2082        if (!nr_frags) {
2083                offset = 2 + sizeof(struct cpl_rx_pkt);
2084                qs->lro_va = cpl = sd->pg_chunk.va + 2;
2085        }
2086
2087        fl->credits--;
2088
2089        len -= offset;
2090        pci_dma_sync_single_for_cpu(adap->pdev,
2091                                    pci_unmap_addr(sd, dma_addr),
2092                                    fl->buf_size - SGE_PG_RSVD,
2093                                    PCI_DMA_FROMDEVICE);
2094
2095        (*sd->pg_chunk.p_cnt)--;
2096        if (!*sd->pg_chunk.p_cnt)
2097                pci_unmap_page(adap->pdev,
2098                               sd->pg_chunk.mapping,
2099                               fl->alloc_size,
2100                               PCI_DMA_FROMDEVICE);
2101
2102        prefetch(qs->lro_va);
2103
2104        rx_frag += nr_frags;
2105        rx_frag->page = sd->pg_chunk.page;
2106        rx_frag->page_offset = sd->pg_chunk.offset + offset;
2107        rx_frag->size = len;
2108        frag_len += len;
2109        qs->lro_frag_tbl.nr_frags++;
2110        qs->lro_frag_tbl.len = frag_len;
2111
2112
2113        if (!complete)
2114                return;
2115
2116        qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
2117        cpl = qs->lro_va;
2118
2119        if (unlikely(cpl->vlan_valid)) {
2120                struct net_device *dev = qs->netdev;
2121                struct port_info *pi = netdev_priv(dev);
2122                struct vlan_group *grp = pi->vlan_grp;
2123
2124                if (likely(grp != NULL)) {
2125                        vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2126                                       &qs->lro_frag_tbl);
2127                        goto out;
2128                }
2129        }
2130        napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
2131
2132out:
2133        qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
2134}
2135
2136/**
2137 *      handle_rsp_cntrl_info - handles control information in a response
2138 *      @qs: the queue set corresponding to the response
2139 *      @flags: the response control flags
2140 *
2141 *      Handles the control information of an SGE response, such as GTS
2142 *      indications and completion credits for the queue set's Tx queues.
2143 *      HW coalesces credits, we don't do any extra SW coalescing.
2144 */
2145static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2146{
2147        unsigned int credits;
2148
2149#if USE_GTS
2150        if (flags & F_RSPD_TXQ0_GTS)
2151                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2152#endif
2153
2154        credits = G_RSPD_TXQ0_CR(flags);
2155        if (credits)
2156                qs->txq[TXQ_ETH].processed += credits;
2157
2158        credits = G_RSPD_TXQ2_CR(flags);
2159        if (credits)
2160                qs->txq[TXQ_CTRL].processed += credits;
2161
2162# if USE_GTS
2163        if (flags & F_RSPD_TXQ1_GTS)
2164                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2165# endif
2166        credits = G_RSPD_TXQ1_CR(flags);
2167        if (credits)
2168                qs->txq[TXQ_OFLD].processed += credits;
2169}
2170
2171/**
2172 *      check_ring_db - check if we need to ring any doorbells
2173 *      @adapter: the adapter
2174 *      @qs: the queue set whose Tx queues are to be examined
2175 *      @sleeping: indicates which Tx queue sent GTS
2176 *
2177 *      Checks if some of a queue set's Tx queues need to ring their doorbells
2178 *      to resume transmission after idling while they still have unprocessed
2179 *      descriptors.
2180 */
2181static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2182                          unsigned int sleeping)
2183{
2184        if (sleeping & F_RSPD_TXQ0_GTS) {
2185                struct sge_txq *txq = &qs->txq[TXQ_ETH];
2186
2187                if (txq->cleaned + txq->in_use != txq->processed &&
2188                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2189                        set_bit(TXQ_RUNNING, &txq->flags);
2190                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2191                                     V_EGRCNTX(txq->cntxt_id));
2192                }
2193        }
2194
2195        if (sleeping & F_RSPD_TXQ1_GTS) {
2196                struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2197
2198                if (txq->cleaned + txq->in_use != txq->processed &&
2199                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2200                        set_bit(TXQ_RUNNING, &txq->flags);
2201                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2202                                     V_EGRCNTX(txq->cntxt_id));
2203                }
2204        }
2205}
2206
2207/**
2208 *      is_new_response - check if a response is newly written
2209 *      @r: the response descriptor
2210 *      @q: the response queue
2211 *
2212 *      Returns true if a response descriptor contains a yet unprocessed
2213 *      response.
2214 */
2215static inline int is_new_response(const struct rsp_desc *r,
2216                                  const struct sge_rspq *q)
2217{
2218        return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2219}
2220
2221static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2222{
2223        q->pg_skb = NULL;
2224        q->rx_recycle_buf = 0;
2225}
2226
2227#define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2228#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2229                        V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2230                        V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2231                        V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2232
2233/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2234#define NOMEM_INTR_DELAY 2500
2235
2236/**
2237 *      process_responses - process responses from an SGE response queue
2238 *      @adap: the adapter
2239 *      @qs: the queue set to which the response queue belongs
2240 *      @budget: how many responses can be processed in this round
2241 *
2242 *      Process responses from an SGE response queue up to the supplied budget.
2243 *      Responses include received packets as well as credits and other events
2244 *      for the queues that belong to the response queue's queue set.
2245 *      A negative budget is effectively unlimited.
2246 *
2247 *      Additionally choose the interrupt holdoff time for the next interrupt
2248 *      on this queue.  If the system is under memory shortage use a fairly
2249 *      long delay to help recovery.
2250 */
2251static int process_responses(struct adapter *adap, struct sge_qset *qs,
2252                             int budget)
2253{
2254        struct sge_rspq *q = &qs->rspq;
2255        struct rsp_desc *r = &q->desc[q->cidx];
2256        int budget_left = budget;
2257        unsigned int sleeping = 0;
2258        struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2259        int ngathered = 0;
2260
2261        q->next_holdoff = q->holdoff_tmr;
2262
2263        while (likely(budget_left && is_new_response(r, q))) {
2264                int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2265                struct sk_buff *skb = NULL;
2266                u32 len, flags = ntohl(r->flags);
2267                __be32 rss_hi = *(const __be32 *)r,
2268                       rss_lo = r->rss_hdr.rss_hash_val;
2269
2270                eth = r->rss_hdr.opcode == CPL_RX_PKT;
2271
2272                if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2273                        skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2274                        if (!skb)
2275                                goto no_mem;
2276
2277                        memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2278                        skb->data[0] = CPL_ASYNC_NOTIF;
2279                        rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2280                        q->async_notif++;
2281                } else if (flags & F_RSPD_IMM_DATA_VALID) {
2282                        skb = get_imm_packet(r);
2283                        if (unlikely(!skb)) {
2284no_mem:
2285                                q->next_holdoff = NOMEM_INTR_DELAY;
2286                                q->nomem++;
2287                                /* consume one credit since we tried */
2288                                budget_left--;
2289                                break;
2290                        }
2291                        q->imm_data++;
2292                        ethpad = 0;
2293                } else if ((len = ntohl(r->len_cq)) != 0) {
2294                        struct sge_fl *fl;
2295
2296                        lro &= eth && is_eth_tcp(rss_hi);
2297
2298                        fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2299                        if (fl->use_pages) {
2300                                void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2301
2302                                prefetch(&qs->lro_frag_tbl);
2303
2304                                prefetch(addr);
2305#if L1_CACHE_BYTES < 128
2306                                prefetch(addr + L1_CACHE_BYTES);
2307#endif
2308                                __refill_fl(adap, fl);
2309                                if (lro > 0) {
2310                                        lro_add_page(adap, qs, fl,
2311                                                     G_RSPD_LEN(len),
2312                                                     flags & F_RSPD_EOP);
2313                                         goto next_fl;
2314                                }
2315
2316                                skb = get_packet_pg(adap, fl, q,
2317                                                    G_RSPD_LEN(len),
2318                                                    eth ?
2319                                                    SGE_RX_DROP_THRES : 0);
2320                                q->pg_skb = skb;
2321                        } else
2322                                skb = get_packet(adap, fl, G_RSPD_LEN(len),
2323                                                 eth ? SGE_RX_DROP_THRES : 0);
2324                        if (unlikely(!skb)) {
2325                                if (!eth)
2326                                        goto no_mem;
2327                                q->rx_drops++;
2328                        } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2329                                __skb_pull(skb, 2);
2330next_fl:
2331                        if (++fl->cidx == fl->size)
2332                                fl->cidx = 0;
2333                } else
2334                        q->pure_rsps++;
2335
2336                if (flags & RSPD_CTRL_MASK) {
2337                        sleeping |= flags & RSPD_GTS_MASK;
2338                        handle_rsp_cntrl_info(qs, flags);
2339                }
2340
2341                r++;
2342                if (unlikely(++q->cidx == q->size)) {
2343                        q->cidx = 0;
2344                        q->gen ^= 1;
2345                        r = q->desc;
2346                }
2347                prefetch(r);
2348
2349                if (++q->credits >= (q->size / 4)) {
2350                        refill_rspq(adap, q, q->credits);
2351                        q->credits = 0;
2352                }
2353
2354                packet_complete = flags &
2355                                  (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2356                                   F_RSPD_ASYNC_NOTIF);
2357
2358                if (skb != NULL && packet_complete) {
2359                        if (eth)
2360                                rx_eth(adap, q, skb, ethpad, lro);
2361                        else {
2362                                q->offload_pkts++;
2363                                /* Preserve the RSS info in csum & priority */
2364                                skb->csum = rss_hi;
2365                                skb->priority = rss_lo;
2366                                ngathered = rx_offload(&adap->tdev, q, skb,
2367                                                       offload_skbs,
2368                                                       ngathered);
2369                        }
2370
2371                        if (flags & F_RSPD_EOP)
2372                                clear_rspq_bufstate(q);
2373                }
2374                --budget_left;
2375        }
2376
2377        deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2378
2379        if (sleeping)
2380                check_ring_db(adap, qs, sleeping);
2381
2382        smp_mb();               /* commit Tx queue .processed updates */
2383        if (unlikely(qs->txq_stopped != 0))
2384                restart_tx(qs);
2385
2386        budget -= budget_left;
2387        return budget;
2388}
2389
2390static inline int is_pure_response(const struct rsp_desc *r)
2391{
2392        __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2393
2394        return (n | r->len_cq) == 0;
2395}
2396
2397/**
2398 *      napi_rx_handler - the NAPI handler for Rx processing
2399 *      @napi: the napi instance
2400 *      @budget: how many packets we can process in this round
2401 *
2402 *      Handler for new data events when using NAPI.
2403 */
2404static int napi_rx_handler(struct napi_struct *napi, int budget)
2405{
2406        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2407        struct adapter *adap = qs->adap;
2408        int work_done = process_responses(adap, qs, budget);
2409
2410        if (likely(work_done < budget)) {
2411                napi_complete(napi);
2412
2413                /*
2414                 * Because we don't atomically flush the following
2415                 * write it is possible that in very rare cases it can
2416                 * reach the device in a way that races with a new
2417                 * response being written plus an error interrupt
2418                 * causing the NAPI interrupt handler below to return
2419                 * unhandled status to the OS.  To protect against
2420                 * this would require flushing the write and doing
2421                 * both the write and the flush with interrupts off.
2422                 * Way too expensive and unjustifiable given the
2423                 * rarity of the race.
2424                 *
2425                 * The race cannot happen at all with MSI-X.
2426                 */
2427                t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2428                             V_NEWTIMER(qs->rspq.next_holdoff) |
2429                             V_NEWINDEX(qs->rspq.cidx));
2430        }
2431        return work_done;
2432}
2433
2434/*
2435 * Returns true if the device is already scheduled for polling.
2436 */
2437static inline int napi_is_scheduled(struct napi_struct *napi)
2438{
2439        return test_bit(NAPI_STATE_SCHED, &napi->state);
2440}
2441
2442/**
2443 *      process_pure_responses - process pure responses from a response queue
2444 *      @adap: the adapter
2445 *      @qs: the queue set owning the response queue
2446 *      @r: the first pure response to process
2447 *
2448 *      A simpler version of process_responses() that handles only pure (i.e.,
2449 *      non data-carrying) responses.  Such respones are too light-weight to
2450 *      justify calling a softirq under NAPI, so we handle them specially in
2451 *      the interrupt handler.  The function is called with a pointer to a
2452 *      response, which the caller must ensure is a valid pure response.
2453 *
2454 *      Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2455 */
2456static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2457                                  struct rsp_desc *r)
2458{
2459        struct sge_rspq *q = &qs->rspq;
2460        unsigned int sleeping = 0;
2461
2462        do {
2463                u32 flags = ntohl(r->flags);
2464
2465                r++;
2466                if (unlikely(++q->cidx == q->size)) {
2467                        q->cidx = 0;
2468                        q->gen ^= 1;
2469                        r = q->desc;
2470                }
2471                prefetch(r);
2472
2473                if (flags & RSPD_CTRL_MASK) {
2474                        sleeping |= flags & RSPD_GTS_MASK;
2475                        handle_rsp_cntrl_info(qs, flags);
2476                }
2477
2478                q->pure_rsps++;
2479                if (++q->credits >= (q->size / 4)) {
2480                        refill_rspq(adap, q, q->credits);
2481                        q->credits = 0;
2482                }
2483        } while (is_new_response(r, q) && is_pure_response(r));
2484
2485        if (sleeping)
2486                check_ring_db(adap, qs, sleeping);
2487
2488        smp_mb();               /* commit Tx queue .processed updates */
2489        if (unlikely(qs->txq_stopped != 0))
2490                restart_tx(qs);
2491
2492        return is_new_response(r, q);
2493}
2494
2495/**
2496 *      handle_responses - decide what to do with new responses in NAPI mode
2497 *      @adap: the adapter
2498 *      @q: the response queue
2499 *
2500 *      This is used by the NAPI interrupt handlers to decide what to do with
2501 *      new SGE responses.  If there are no new responses it returns -1.  If
2502 *      there are new responses and they are pure (i.e., non-data carrying)
2503 *      it handles them straight in hard interrupt context as they are very
2504 *      cheap and don't deliver any packets.  Finally, if there are any data
2505 *      signaling responses it schedules the NAPI handler.  Returns 1 if it
2506 *      schedules NAPI, 0 if all new responses were pure.
2507 *
2508 *      The caller must ascertain NAPI is not already running.
2509 */
2510static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2511{
2512        struct sge_qset *qs = rspq_to_qset(q);
2513        struct rsp_desc *r = &q->desc[q->cidx];
2514
2515        if (!is_new_response(r, q))
2516                return -1;
2517        if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2518                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2519                             V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2520                return 0;
2521        }
2522        napi_schedule(&qs->napi);
2523        return 1;
2524}
2525
2526/*
2527 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2528 * (i.e., response queue serviced in hard interrupt).
2529 */
2530irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2531{
2532        struct sge_qset *qs = cookie;
2533        struct adapter *adap = qs->adap;
2534        struct sge_rspq *q = &qs->rspq;
2535
2536        spin_lock(&q->lock);
2537        if (process_responses(adap, qs, -1) == 0)
2538                q->unhandled_irqs++;
2539        t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2540                     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2541        spin_unlock(&q->lock);
2542        return IRQ_HANDLED;
2543}
2544
2545/*
2546 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2547 * (i.e., response queue serviced by NAPI polling).
2548 */
2549static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2550{
2551        struct sge_qset *qs = cookie;
2552        struct sge_rspq *q = &qs->rspq;
2553
2554        spin_lock(&q->lock);
2555
2556        if (handle_responses(qs->adap, q) < 0)
2557                q->unhandled_irqs++;
2558        spin_unlock(&q->lock);
2559        return IRQ_HANDLED;
2560}
2561
2562/*
2563 * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2564 * SGE response queues as well as error and other async events as they all use
2565 * the same MSI vector.  We use one SGE response queue per port in this mode
2566 * and protect all response queues with queue 0's lock.
2567 */
2568static irqreturn_t t3_intr_msi(int irq, void *cookie)
2569{
2570        int new_packets = 0;
2571        struct adapter *adap = cookie;
2572        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2573
2574        spin_lock(&q->lock);
2575
2576        if (process_responses(adap, &adap->sge.qs[0], -1)) {
2577                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2578                             V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2579                new_packets = 1;
2580        }
2581
2582        if (adap->params.nports == 2 &&
2583            process_responses(adap, &adap->sge.qs[1], -1)) {
2584                struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2585
2586                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2587                             V_NEWTIMER(q1->next_holdoff) |
2588                             V_NEWINDEX(q1->cidx));
2589                new_packets = 1;
2590        }
2591
2592        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2593                q->unhandled_irqs++;
2594
2595        spin_unlock(&q->lock);
2596        return IRQ_HANDLED;
2597}
2598
2599static int rspq_check_napi(struct sge_qset *qs)
2600{
2601        struct sge_rspq *q = &qs->rspq;
2602
2603        if (!napi_is_scheduled(&qs->napi) &&
2604            is_new_response(&q->desc[q->cidx], q)) {
2605                napi_schedule(&qs->napi);
2606                return 1;
2607        }
2608        return 0;
2609}
2610
2611/*
2612 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2613 * by NAPI polling).  Handles data events from SGE response queues as well as
2614 * error and other async events as they all use the same MSI vector.  We use
2615 * one SGE response queue per port in this mode and protect all response
2616 * queues with queue 0's lock.
2617 */
2618static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2619{
2620        int new_packets;
2621        struct adapter *adap = cookie;
2622        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2623
2624        spin_lock(&q->lock);
2625
2626        new_packets = rspq_check_napi(&adap->sge.qs[0]);
2627        if (adap->params.nports == 2)
2628                new_packets += rspq_check_napi(&adap->sge.qs[1]);
2629        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2630                q->unhandled_irqs++;
2631
2632        spin_unlock(&q->lock);
2633        return IRQ_HANDLED;
2634}
2635
2636/*
2637 * A helper function that processes responses and issues GTS.
2638 */
2639static inline int process_responses_gts(struct adapter *adap,
2640                                        struct sge_rspq *rq)
2641{
2642        int work;
2643
2644        work = process_responses(adap, rspq_to_qset(rq), -1);
2645        t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2646                     V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2647        return work;
2648}
2649
2650/*
2651 * The legacy INTx interrupt handler.  This needs to handle data events from
2652 * SGE response queues as well as error and other async events as they all use
2653 * the same interrupt pin.  We use one SGE response queue per port in this mode
2654 * and protect all response queues with queue 0's lock.
2655 */
2656static irqreturn_t t3_intr(int irq, void *cookie)
2657{
2658        int work_done, w0, w1;
2659        struct adapter *adap = cookie;
2660        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2661        struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2662
2663        spin_lock(&q0->lock);
2664
2665        w0 = is_new_response(&q0->desc[q0->cidx], q0);
2666        w1 = adap->params.nports == 2 &&
2667            is_new_response(&q1->desc[q1->cidx], q1);
2668
2669        if (likely(w0 | w1)) {
2670                t3_write_reg(adap, A_PL_CLI, 0);
2671                t3_read_reg(adap, A_PL_CLI);    /* flush */
2672
2673                if (likely(w0))
2674                        process_responses_gts(adap, q0);
2675
2676                if (w1)
2677                        process_responses_gts(adap, q1);
2678
2679                work_done = w0 | w1;
2680        } else
2681                work_done = t3_slow_intr_handler(adap);
2682
2683        spin_unlock(&q0->lock);
2684        return IRQ_RETVAL(work_done != 0);
2685}
2686
2687/*
2688 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2689 * Handles data events from SGE response queues as well as error and other
2690 * async events as they all use the same interrupt pin.  We use one SGE
2691 * response queue per port in this mode and protect all response queues with
2692 * queue 0's lock.
2693 */
2694static irqreturn_t t3b_intr(int irq, void *cookie)
2695{
2696        u32 map;
2697        struct adapter *adap = cookie;
2698        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2699
2700        t3_write_reg(adap, A_PL_CLI, 0);
2701        map = t3_read_reg(adap, A_SG_DATA_INTR);
2702
2703        if (unlikely(!map))     /* shared interrupt, most likely */
2704                return IRQ_NONE;
2705
2706        spin_lock(&q0->lock);
2707
2708        if (unlikely(map & F_ERRINTR))
2709                t3_slow_intr_handler(adap);
2710
2711        if (likely(map & 1))
2712                process_responses_gts(adap, q0);
2713
2714        if (map & 2)
2715                process_responses_gts(adap, &adap->sge.qs[1].rspq);
2716
2717        spin_unlock(&q0->lock);
2718        return IRQ_HANDLED;
2719}
2720
2721/*
2722 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2723 * Handles data events from SGE response queues as well as error and other
2724 * async events as they all use the same interrupt pin.  We use one SGE
2725 * response queue per port in this mode and protect all response queues with
2726 * queue 0's lock.
2727 */
2728static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2729{
2730        u32 map;
2731        struct adapter *adap = cookie;
2732        struct sge_qset *qs0 = &adap->sge.qs[0];
2733        struct sge_rspq *q0 = &qs0->rspq;
2734
2735        t3_write_reg(adap, A_PL_CLI, 0);
2736        map = t3_read_reg(adap, A_SG_DATA_INTR);
2737
2738        if (unlikely(!map))     /* shared interrupt, most likely */
2739                return IRQ_NONE;
2740
2741        spin_lock(&q0->lock);
2742
2743        if (unlikely(map & F_ERRINTR))
2744                t3_slow_intr_handler(adap);
2745
2746        if (likely(map & 1))
2747                napi_schedule(&qs0->napi);
2748
2749        if (map & 2)
2750                napi_schedule(&adap->sge.qs[1].napi);
2751
2752        spin_unlock(&q0->lock);
2753        return IRQ_HANDLED;
2754}
2755
2756/**
2757 *      t3_intr_handler - select the top-level interrupt handler
2758 *      @adap: the adapter
2759 *      @polling: whether using NAPI to service response queues
2760 *
2761 *      Selects the top-level interrupt handler based on the type of interrupts
2762 *      (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2763 *      response queues.
2764 */
2765irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2766{
2767        if (adap->flags & USING_MSIX)
2768                return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2769        if (adap->flags & USING_MSI)
2770                return polling ? t3_intr_msi_napi : t3_intr_msi;
2771        if (adap->params.rev > 0)
2772                return polling ? t3b_intr_napi : t3b_intr;
2773        return t3_intr;
2774}
2775
2776#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2777                    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2778                    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2779                    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2780                    F_HIRCQPARITYERROR)
2781#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2782#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2783                      F_RSPQDISABLED)
2784
2785/**
2786 *      t3_sge_err_intr_handler - SGE async event interrupt handler
2787 *      @adapter: the adapter
2788 *
2789 *      Interrupt handler for SGE asynchronous (non-data) events.
2790 */
2791void t3_sge_err_intr_handler(struct adapter *adapter)
2792{
2793        unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2794                                 ~F_FLEMPTY;
2795
2796        if (status & SGE_PARERR)
2797                CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2798                         status & SGE_PARERR);
2799        if (status & SGE_FRAMINGERR)
2800                CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2801                         status & SGE_FRAMINGERR);
2802
2803        if (status & F_RSPQCREDITOVERFOW)
2804                CH_ALERT(adapter, "SGE response queue credit overflow\n");
2805
2806        if (status & F_RSPQDISABLED) {
2807                v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2808
2809                CH_ALERT(adapter,
2810                         "packet delivered to disabled response queue "
2811                         "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2812        }
2813
2814        if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2815                CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2816                         status & F_HIPIODRBDROPERR ? "high" : "lo");
2817
2818        t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2819        if (status &  SGE_FATALERR)
2820                t3_fatal_err(adapter);
2821}
2822
2823/**
2824 *      sge_timer_tx - perform periodic maintenance of an SGE qset
2825 *      @data: the SGE queue set to maintain
2826 *
2827 *      Runs periodically from a timer to perform maintenance of an SGE queue
2828 *      set.  It performs two tasks:
2829 *
2830 *      Cleans up any completed Tx descriptors that may still be pending.
2831 *      Normal descriptor cleanup happens when new packets are added to a Tx
2832 *      queue so this timer is relatively infrequent and does any cleanup only
2833 *      if the Tx queue has not seen any new packets in a while.  We make a
2834 *      best effort attempt to reclaim descriptors, in that we don't wait
2835 *      around if we cannot get a queue's lock (which most likely is because
2836 *      someone else is queueing new packets and so will also handle the clean
2837 *      up).  Since control queues use immediate data exclusively we don't
2838 *      bother cleaning them up here.
2839 *
2840 */
2841static void sge_timer_tx(unsigned long data)
2842{
2843        struct sge_qset *qs = (struct sge_qset *)data;
2844        struct port_info *pi = netdev_priv(qs->netdev);
2845        struct adapter *adap = pi->adapter;
2846        unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2847        unsigned long next_period;
2848
2849        if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2850                tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2851                                                    TX_RECLAIM_TIMER_CHUNK);
2852                spin_unlock(&qs->txq[TXQ_ETH].lock);
2853        }
2854        if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2855                tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2856                                                     TX_RECLAIM_TIMER_CHUNK);
2857                spin_unlock(&qs->txq[TXQ_OFLD].lock);
2858        }
2859
2860        next_period = TX_RECLAIM_PERIOD >>
2861                      (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2862                       TX_RECLAIM_TIMER_CHUNK);
2863        mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2864}
2865
2866/*
2867 *      sge_timer_rx - perform periodic maintenance of an SGE qset
2868 *      @data: the SGE queue set to maintain
2869 *
2870 *      a) Replenishes Rx queues that have run out due to memory shortage.
2871 *      Normally new Rx buffers are added when existing ones are consumed but
2872 *      when out of memory a queue can become empty.  We try to add only a few
2873 *      buffers here, the queue will be replenished fully as these new buffers
2874 *      are used up if memory shortage has subsided.
2875 *
2876 *      b) Return coalesced response queue credits in case a response queue is
2877 *      starved.
2878 *
2879 */
2880static void sge_timer_rx(unsigned long data)
2881{
2882        spinlock_t *lock;
2883        struct sge_qset *qs = (struct sge_qset *)data;
2884        struct port_info *pi = netdev_priv(qs->netdev);
2885        struct adapter *adap = pi->adapter;
2886        u32 status;
2887
2888        lock = adap->params.rev > 0 ?
2889               &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2890
2891        if (!spin_trylock_irq(lock))
2892                goto out;
2893
2894        if (napi_is_scheduled(&qs->napi))
2895                goto unlock;
2896
2897        if (adap->params.rev < 4) {
2898                status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2899
2900                if (status & (1 << qs->rspq.cntxt_id)) {
2901                        qs->rspq.starved++;
2902                        if (qs->rspq.credits) {
2903                                qs->rspq.credits--;
2904                                refill_rspq(adap, &qs->rspq, 1);
2905                                qs->rspq.restarted++;
2906                                t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2907                                             1 << qs->rspq.cntxt_id);
2908                        }
2909                }
2910        }
2911
2912        if (qs->fl[0].credits < qs->fl[0].size)
2913                __refill_fl(adap, &qs->fl[0]);
2914        if (qs->fl[1].credits < qs->fl[1].size)
2915                __refill_fl(adap, &qs->fl[1]);
2916
2917unlock:
2918        spin_unlock_irq(lock);
2919out:
2920        mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
2921}
2922
2923/**
2924 *      t3_update_qset_coalesce - update coalescing settings for a queue set
2925 *      @qs: the SGE queue set
2926 *      @p: new queue set parameters
2927 *
2928 *      Update the coalescing settings for an SGE queue set.  Nothing is done
2929 *      if the queue set is not initialized yet.
2930 */
2931void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2932{
2933        qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2934        qs->rspq.polling = p->polling;
2935        qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2936}
2937
2938/**
2939 *      t3_sge_alloc_qset - initialize an SGE queue set
2940 *      @adapter: the adapter
2941 *      @id: the queue set id
2942 *      @nports: how many Ethernet ports will be using this queue set
2943 *      @irq_vec_idx: the IRQ vector index for response queue interrupts
2944 *      @p: configuration parameters for this queue set
2945 *      @ntxq: number of Tx queues for the queue set
2946 *      @netdev: net device associated with this queue set
2947 *      @netdevq: net device TX queue associated with this queue set
2948 *
2949 *      Allocate resources and initialize an SGE queue set.  A queue set
2950 *      comprises a response queue, two Rx free-buffer queues, and up to 3
2951 *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
2952 *      queue, offload queue, and control queue.
2953 */
2954int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2955                      int irq_vec_idx, const struct qset_params *p,
2956                      int ntxq, struct net_device *dev,
2957                      struct netdev_queue *netdevq)
2958{
2959        int i, avail, ret = -ENOMEM;
2960        struct sge_qset *q = &adapter->sge.qs[id];
2961
2962        init_qset_cntxt(q, id);
2963        setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
2964        setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
2965
2966        q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2967                                   sizeof(struct rx_desc),
2968                                   sizeof(struct rx_sw_desc),
2969                                   &q->fl[0].phys_addr, &q->fl[0].sdesc);
2970        if (!q->fl[0].desc)
2971                goto err;
2972
2973        q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2974                                   sizeof(struct rx_desc),
2975                                   sizeof(struct rx_sw_desc),
2976                                   &q->fl[1].phys_addr, &q->fl[1].sdesc);
2977        if (!q->fl[1].desc)
2978                goto err;
2979
2980        q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2981                                  sizeof(struct rsp_desc), 0,
2982                                  &q->rspq.phys_addr, NULL);
2983        if (!q->rspq.desc)
2984                goto err;
2985
2986        for (i = 0; i < ntxq; ++i) {
2987                /*
2988                 * The control queue always uses immediate data so does not
2989                 * need to keep track of any sk_buffs.
2990                 */
2991                size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2992
2993                q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2994                                            sizeof(struct tx_desc), sz,
2995                                            &q->txq[i].phys_addr,
2996                                            &q->txq[i].sdesc);
2997                if (!q->txq[i].desc)
2998                        goto err;
2999
3000                q->txq[i].gen = 1;
3001                q->txq[i].size = p->txq_size[i];
3002                spin_lock_init(&q->txq[i].lock);
3003                skb_queue_head_init(&q->txq[i].sendq);
3004        }
3005
3006        tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
3007                     (unsigned long)q);
3008        tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
3009                     (unsigned long)q);
3010
3011        q->fl[0].gen = q->fl[1].gen = 1;
3012        q->fl[0].size = p->fl_size;
3013        q->fl[1].size = p->jumbo_size;
3014
3015        q->rspq.gen = 1;
3016        q->rspq.size = p->rspq_size;
3017        spin_lock_init(&q->rspq.lock);
3018        skb_queue_head_init(&q->rspq.rx_queue);
3019
3020        q->txq[TXQ_ETH].stop_thres = nports *
3021            flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3022
3023#if FL0_PG_CHUNK_SIZE > 0
3024        q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3025#else
3026        q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3027#endif
3028#if FL1_PG_CHUNK_SIZE > 0
3029        q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3030#else
3031        q->fl[1].buf_size = is_offload(adapter) ?
3032                (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3033                MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3034#endif
3035
3036        q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3037        q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3038        q->fl[0].order = FL0_PG_ORDER;
3039        q->fl[1].order = FL1_PG_ORDER;
3040        q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3041        q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3042
3043        spin_lock_irq(&adapter->sge.reg_lock);
3044
3045        /* FL threshold comparison uses < */
3046        ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3047                                   q->rspq.phys_addr, q->rspq.size,
3048                                   q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3049        if (ret)
3050                goto err_unlock;
3051
3052        for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3053                ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3054                                          q->fl[i].phys_addr, q->fl[i].size,
3055                                          q->fl[i].buf_size - SGE_PG_RSVD,
3056                                          p->cong_thres, 1, 0);
3057                if (ret)
3058                        goto err_unlock;
3059        }
3060
3061        ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3062                                 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3063                                 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3064                                 1, 0);
3065        if (ret)
3066                goto err_unlock;
3067
3068        if (ntxq > 1) {
3069                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3070                                         USE_GTS, SGE_CNTXT_OFLD, id,
3071                                         q->txq[TXQ_OFLD].phys_addr,
3072                                         q->txq[TXQ_OFLD].size, 0, 1, 0);
3073                if (ret)
3074                        goto err_unlock;
3075        }
3076
3077        if (ntxq > 2) {
3078                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3079                                         SGE_CNTXT_CTRL, id,
3080                                         q->txq[TXQ_CTRL].phys_addr,
3081                                         q->txq[TXQ_CTRL].size,
3082                                         q->txq[TXQ_CTRL].token, 1, 0);
3083                if (ret)
3084                        goto err_unlock;
3085        }
3086
3087        spin_unlock_irq(&adapter->sge.reg_lock);
3088
3089        q->adap = adapter;
3090        q->netdev = dev;
3091        q->tx_q = netdevq;
3092        t3_update_qset_coalesce(q, p);
3093
3094        avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3095                          GFP_KERNEL | __GFP_COMP);
3096        if (!avail) {
3097                CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3098                goto err;
3099        }
3100        if (avail < q->fl[0].size)
3101                CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3102                        avail);
3103
3104        avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3105                          GFP_KERNEL | __GFP_COMP);
3106        if (avail < q->fl[1].size)
3107                CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3108                        avail);
3109        refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3110
3111        t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3112                     V_NEWTIMER(q->rspq.holdoff_tmr));
3113
3114        return 0;
3115
3116err_unlock:
3117        spin_unlock_irq(&adapter->sge.reg_lock);
3118err:
3119        t3_free_qset(adapter, q);
3120        return ret;
3121}
3122
3123/**
3124 *      t3_start_sge_timers - start SGE timer call backs
3125 *      @adap: the adapter
3126 *
3127 *      Starts each SGE queue set's timer call back
3128 */
3129void t3_start_sge_timers(struct adapter *adap)
3130{
3131        int i;
3132
3133        for (i = 0; i < SGE_QSETS; ++i) {
3134                struct sge_qset *q = &adap->sge.qs[i];
3135
3136        if (q->tx_reclaim_timer.function)
3137                mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3138
3139        if (q->rx_reclaim_timer.function)
3140                mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3141        }
3142}
3143
3144/**
3145 *      t3_stop_sge_timers - stop SGE timer call backs
3146 *      @adap: the adapter
3147 *
3148 *      Stops each SGE queue set's timer call back
3149 */
3150void t3_stop_sge_timers(struct adapter *adap)
3151{
3152        int i;
3153
3154        for (i = 0; i < SGE_QSETS; ++i) {
3155                struct sge_qset *q = &adap->sge.qs[i];
3156
3157                if (q->tx_reclaim_timer.function)
3158                        del_timer_sync(&q->tx_reclaim_timer);
3159                if (q->rx_reclaim_timer.function)
3160                        del_timer_sync(&q->rx_reclaim_timer);
3161        }
3162}
3163
3164/**
3165 *      t3_free_sge_resources - free SGE resources
3166 *      @adap: the adapter
3167 *
3168 *      Frees resources used by the SGE queue sets.
3169 */
3170void t3_free_sge_resources(struct adapter *adap)
3171{
3172        int i;
3173
3174        for (i = 0; i < SGE_QSETS; ++i)
3175                t3_free_qset(adap, &adap->sge.qs[i]);
3176}
3177
3178/**
3179 *      t3_sge_start - enable SGE
3180 *      @adap: the adapter
3181 *
3182 *      Enables the SGE for DMAs.  This is the last step in starting packet
3183 *      transfers.
3184 */
3185void t3_sge_start(struct adapter *adap)
3186{
3187        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3188}
3189
3190/**
3191 *      t3_sge_stop - disable SGE operation
3192 *      @adap: the adapter
3193 *
3194 *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
3195 *      from error interrupts) or from normal process context.  In the latter
3196 *      case it also disables any pending queue restart tasklets.  Note that
3197 *      if it is called in interrupt context it cannot disable the restart
3198 *      tasklets as it cannot wait, however the tasklets will have no effect
3199 *      since the doorbells are disabled and the driver will call this again
3200 *      later from process context, at which time the tasklets will be stopped
3201 *      if they are still running.
3202 */
3203void t3_sge_stop(struct adapter *adap)
3204{
3205        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3206        if (!in_interrupt()) {
3207                int i;
3208
3209                for (i = 0; i < SGE_QSETS; ++i) {
3210                        struct sge_qset *qs = &adap->sge.qs[i];
3211
3212                        tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3213                        tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3214                }
3215        }
3216}
3217
3218/**
3219 *      t3_sge_init - initialize SGE
3220 *      @adap: the adapter
3221 *      @p: the SGE parameters
3222 *
3223 *      Performs SGE initialization needed every time after a chip reset.
3224 *      We do not initialize any of the queue sets here, instead the driver
3225 *      top-level must request those individually.  We also do not enable DMA
3226 *      here, that should be done after the queues have been set up.
3227 */
3228void t3_sge_init(struct adapter *adap, struct sge_params *p)
3229{
3230        unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3231
3232        ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3233            F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3234            V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3235            V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3236#if SGE_NUM_GENBITS == 1
3237        ctrl |= F_EGRGENCTRL;
3238#endif
3239        if (adap->params.rev > 0) {
3240                if (!(adap->flags & (USING_MSIX | USING_MSI)))
3241                        ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3242        }
3243        t3_write_reg(adap, A_SG_CONTROL, ctrl);
3244        t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3245                     V_LORCQDRBTHRSH(512));
3246        t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3247        t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3248                     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3249        t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3250                     adap->params.rev < T3_REV_C ? 1000 : 500);
3251        t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3252        t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3253        t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3254        t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3255        t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3256}
3257
3258/**
3259 *      t3_sge_prep - one-time SGE initialization
3260 *      @adap: the associated adapter
3261 *      @p: SGE parameters
3262 *
3263 *      Performs one-time initialization of SGE SW state.  Includes determining
3264 *      defaults for the assorted SGE parameters, which admins can change until
3265 *      they are used to initialize the SGE.
3266 */
3267void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3268{
3269        int i;
3270
3271        p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3272            SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3273
3274        for (i = 0; i < SGE_QSETS; ++i) {
3275                struct qset_params *q = p->qset + i;
3276
3277                q->polling = adap->params.rev > 0;
3278                q->coalesce_usecs = 5;
3279                q->rspq_size = 1024;
3280                q->fl_size = 1024;
3281                q->jumbo_size = 512;
3282                q->txq_size[TXQ_ETH] = 1024;
3283                q->txq_size[TXQ_OFLD] = 1024;
3284                q->txq_size[TXQ_CTRL] = 256;
3285                q->cong_thres = 0;
3286        }
3287
3288        spin_lock_init(&adap->sge.reg_lock);
3289}
3290
3291/**
3292 *      t3_get_desc - dump an SGE descriptor for debugging purposes
3293 *      @qs: the queue set
3294 *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3295 *      @idx: the descriptor index in the queue
3296 *      @data: where to dump the descriptor contents
3297 *
3298 *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3299 *      size of the descriptor.
3300 */
3301int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3302                unsigned char *data)
3303{
3304        if (qnum >= 6)
3305                return -EINVAL;
3306
3307        if (qnum < 3) {
3308                if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3309                        return -EINVAL;
3310                memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3311                return sizeof(struct tx_desc);
3312        }
3313
3314        if (qnum == 3) {
3315                if (!qs->rspq.desc || idx >= qs->rspq.size)
3316                        return -EINVAL;
3317                memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3318                return sizeof(struct rsp_desc);
3319        }
3320
3321        qnum -= 4;
3322        if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3323                return -EINVAL;
3324        memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3325        return sizeof(struct rx_desc);
3326}
3327
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.