linux/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Huawei HiNIC PCI Express Linux driver
   4 * Copyright(c) 2017 Huawei Technologies Co., Ltd
   5 */
   6
   7#include <linux/kernel.h>
   8#include <linux/types.h>
   9#include <linux/pci.h>
  10#include <linux/device.h>
  11#include <linux/dma-mapping.h>
  12#include <linux/vmalloc.h>
  13#include <linux/errno.h>
  14#include <linux/sizes.h>
  15#include <linux/atomic.h>
  16#include <linux/skbuff.h>
  17#include <linux/io.h>
  18#include <asm/barrier.h>
  19#include <asm/byteorder.h>
  20
  21#include "hinic_common.h"
  22#include "hinic_hw_if.h"
  23#include "hinic_hw_wqe.h"
  24#include "hinic_hw_wq.h"
  25#include "hinic_hw_qp_ctxt.h"
  26#include "hinic_hw_qp.h"
  27#include "hinic_hw_io.h"
  28
  29#define SQ_DB_OFF               SZ_2K
  30
  31/* The number of cache line to prefetch Until threshold state */
  32#define WQ_PREFETCH_MAX         2
  33/* The number of cache line to prefetch After threshold state */
  34#define WQ_PREFETCH_MIN         1
  35/* Threshold state */
  36#define WQ_PREFETCH_THRESHOLD   256
  37
  38/* sizes of the SQ/RQ ctxt */
  39#define Q_CTXT_SIZE             48
  40#define CTXT_RSVD               240
  41
  42#define SQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
  43                (((max_rqs) + (max_sqs)) * CTXT_RSVD + (q_id) * Q_CTXT_SIZE)
  44
  45#define RQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
  46                (((max_rqs) + (max_sqs)) * CTXT_RSVD + \
  47                 (max_sqs + (q_id)) * Q_CTXT_SIZE)
  48
  49#define SIZE_16BYTES(size)              (ALIGN(size, 16) >> 4)
  50#define SIZE_8BYTES(size)               (ALIGN(size, 8) >> 3)
  51#define SECT_SIZE_FROM_8BYTES(size)     ((size) << 3)
  52
  53#define SQ_DB_PI_HI_SHIFT       8
  54#define SQ_DB_PI_HI(prod_idx)   ((prod_idx) >> SQ_DB_PI_HI_SHIFT)
  55
  56#define SQ_DB_PI_LOW_MASK       0xFF
  57#define SQ_DB_PI_LOW(prod_idx)  ((prod_idx) & SQ_DB_PI_LOW_MASK)
  58
  59#define SQ_DB_ADDR(sq, pi)      ((u64 *)((sq)->db_base) + SQ_DB_PI_LOW(pi))
  60
  61#define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
  62#define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
  63
  64enum sq_wqe_type {
  65        SQ_NORMAL_WQE = 0,
  66};
  67
  68enum rq_completion_fmt {
  69        RQ_COMPLETE_SGE = 1
  70};
  71
  72void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
  73                             enum hinic_qp_ctxt_type ctxt_type,
  74                             u16 num_queues, u16 max_queues)
  75{
  76        u16 max_sqs = max_queues;
  77        u16 max_rqs = max_queues;
  78
  79        qp_ctxt_hdr->num_queues = num_queues;
  80        qp_ctxt_hdr->queue_type = ctxt_type;
  81
  82        if (ctxt_type == HINIC_QP_CTXT_TYPE_SQ)
  83                qp_ctxt_hdr->addr_offset = SQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
  84        else
  85                qp_ctxt_hdr->addr_offset = RQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
  86
  87        qp_ctxt_hdr->addr_offset = SIZE_16BYTES(qp_ctxt_hdr->addr_offset);
  88
  89        hinic_cpu_to_be32(qp_ctxt_hdr, sizeof(*qp_ctxt_hdr));
  90}
  91
  92void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
  93                           struct hinic_sq *sq, u16 global_qid)
  94{
  95        u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
  96        u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
  97        u16 pi_start, ci_start;
  98        struct hinic_wq *wq;
  99
 100        wq = sq->wq;
 101        ci_start = atomic_read(&wq->cons_idx);
 102        pi_start = atomic_read(&wq->prod_idx);
 103
 104        /* Read the first page paddr from the WQ page paddr ptrs */
 105        wq_page_addr = be64_to_cpu(*wq->block_vaddr);
 106
 107        wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
 108        wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
 109        wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
 110
 111        /* If only one page, use 0-level CLA */
 112        if (wq->num_q_pages == 1)
 113                wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq_page_addr);
 114        else
 115                wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
 116
 117        wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
 118        wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
 119
 120        sq_ctxt->ceq_attr = HINIC_SQ_CTXT_CEQ_ATTR_SET(global_qid,
 121                                                       GLOBAL_SQ_ID) |
 122                            HINIC_SQ_CTXT_CEQ_ATTR_SET(0, EN);
 123
 124        sq_ctxt->ci_wrapped = HINIC_SQ_CTXT_CI_SET(ci_start, IDX) |
 125                              HINIC_SQ_CTXT_CI_SET(1, WRAPPED);
 126
 127        sq_ctxt->wq_hi_pfn_pi =
 128                        HINIC_SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) |
 129                        HINIC_SQ_CTXT_WQ_PAGE_SET(pi_start, PI);
 130
 131        sq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
 132
 133        sq_ctxt->pref_cache =
 134                HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
 135                HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
 136                HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
 137
 138        sq_ctxt->pref_wrapped = 1;
 139
 140        sq_ctxt->pref_wq_hi_pfn_ci =
 141                HINIC_SQ_CTXT_PREF_SET(ci_start, CI) |
 142                HINIC_SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN);
 143
 144        sq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
 145
 146        sq_ctxt->wq_block_hi_pfn =
 147                HINIC_SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
 148
 149        sq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
 150
 151        hinic_cpu_to_be32(sq_ctxt, sizeof(*sq_ctxt));
 152}
 153
 154void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
 155                           struct hinic_rq *rq, u16 global_qid)
 156{
 157        u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
 158        u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
 159        u16 pi_start, ci_start;
 160        struct hinic_wq *wq;
 161
 162        wq = rq->wq;
 163        ci_start = atomic_read(&wq->cons_idx);
 164        pi_start = atomic_read(&wq->prod_idx);
 165
 166        /* Read the first page paddr from the WQ page paddr ptrs */
 167        wq_page_addr = be64_to_cpu(*wq->block_vaddr);
 168
 169        wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
 170        wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
 171        wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
 172
 173        wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
 174        wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
 175        wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
 176
 177        rq_ctxt->ceq_attr = HINIC_RQ_CTXT_CEQ_ATTR_SET(0, EN) |
 178                            HINIC_RQ_CTXT_CEQ_ATTR_SET(1, WRAPPED);
 179
 180        rq_ctxt->pi_intr_attr = HINIC_RQ_CTXT_PI_SET(pi_start, IDX) |
 181                                HINIC_RQ_CTXT_PI_SET(rq->msix_entry, INTR);
 182
 183        rq_ctxt->wq_hi_pfn_ci = HINIC_RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi,
 184                                                          HI_PFN) |
 185                                HINIC_RQ_CTXT_WQ_PAGE_SET(ci_start, CI);
 186
 187        rq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
 188
 189        rq_ctxt->pref_cache =
 190                HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
 191                HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
 192                HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
 193
 194        rq_ctxt->pref_wrapped = 1;
 195
 196        rq_ctxt->pref_wq_hi_pfn_ci =
 197                HINIC_RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN) |
 198                HINIC_RQ_CTXT_PREF_SET(ci_start, CI);
 199
 200        rq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
 201
 202        rq_ctxt->pi_paddr_hi = upper_32_bits(rq->pi_dma_addr);
 203        rq_ctxt->pi_paddr_lo = lower_32_bits(rq->pi_dma_addr);
 204
 205        rq_ctxt->wq_block_hi_pfn =
 206                HINIC_RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
 207
 208        rq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
 209
 210        hinic_cpu_to_be32(rq_ctxt, sizeof(*rq_ctxt));
 211}
 212
 213/**
 214 * alloc_sq_skb_arr - allocate sq array for saved skb
 215 * @sq: HW Send Queue
 216 *
 217 * Return 0 - Success, negative - Failure
 218 **/
 219static int alloc_sq_skb_arr(struct hinic_sq *sq)
 220{
 221        struct hinic_wq *wq = sq->wq;
 222        size_t skb_arr_size;
 223
 224        skb_arr_size = wq->q_depth * sizeof(*sq->saved_skb);
 225        sq->saved_skb = vzalloc(skb_arr_size);
 226        if (!sq->saved_skb)
 227                return -ENOMEM;
 228
 229        return 0;
 230}
 231
 232/**
 233 * free_sq_skb_arr - free sq array for saved skb
 234 * @sq: HW Send Queue
 235 **/
 236static void free_sq_skb_arr(struct hinic_sq *sq)
 237{
 238        vfree(sq->saved_skb);
 239}
 240
 241/**
 242 * alloc_rq_skb_arr - allocate rq array for saved skb
 243 * @rq: HW Receive Queue
 244 *
 245 * Return 0 - Success, negative - Failure
 246 **/
 247static int alloc_rq_skb_arr(struct hinic_rq *rq)
 248{
 249        struct hinic_wq *wq = rq->wq;
 250        size_t skb_arr_size;
 251
 252        skb_arr_size = wq->q_depth * sizeof(*rq->saved_skb);
 253        rq->saved_skb = vzalloc(skb_arr_size);
 254        if (!rq->saved_skb)
 255                return -ENOMEM;
 256
 257        return 0;
 258}
 259
 260/**
 261 * free_rq_skb_arr - free rq array for saved skb
 262 * @rq: HW Receive Queue
 263 **/
 264static void free_rq_skb_arr(struct hinic_rq *rq)
 265{
 266        vfree(rq->saved_skb);
 267}
 268
 269/**
 270 * hinic_init_sq - Initialize HW Send Queue
 271 * @sq: HW Send Queue
 272 * @hwif: HW Interface for accessing HW
 273 * @wq: Work Queue for the data of the SQ
 274 * @entry: msix entry for sq
 275 * @ci_addr: address for reading the current HW consumer index
 276 * @ci_dma_addr: dma address for reading the current HW consumer index
 277 * @db_base: doorbell base address
 278 *
 279 * Return 0 - Success, negative - Failure
 280 **/
 281int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
 282                  struct hinic_wq *wq, struct msix_entry *entry,
 283                  void *ci_addr, dma_addr_t ci_dma_addr,
 284                  void __iomem *db_base)
 285{
 286        sq->hwif = hwif;
 287
 288        sq->wq = wq;
 289
 290        sq->irq = entry->vector;
 291        sq->msix_entry = entry->entry;
 292
 293        sq->hw_ci_addr = ci_addr;
 294        sq->hw_ci_dma_addr = ci_dma_addr;
 295
 296        sq->db_base = db_base + SQ_DB_OFF;
 297
 298        return alloc_sq_skb_arr(sq);
 299}
 300
 301/**
 302 * hinic_clean_sq - Clean HW Send Queue's Resources
 303 * @sq: Send Queue
 304 **/
 305void hinic_clean_sq(struct hinic_sq *sq)
 306{
 307        free_sq_skb_arr(sq);
 308}
 309
 310/**
 311 * alloc_rq_cqe - allocate rq completion queue elements
 312 * @rq: HW Receive Queue
 313 *
 314 * Return 0 - Success, negative - Failure
 315 **/
 316static int alloc_rq_cqe(struct hinic_rq *rq)
 317{
 318        struct hinic_hwif *hwif = rq->hwif;
 319        struct pci_dev *pdev = hwif->pdev;
 320        size_t cqe_dma_size, cqe_size;
 321        struct hinic_wq *wq = rq->wq;
 322        int j, i;
 323
 324        cqe_size = wq->q_depth * sizeof(*rq->cqe);
 325        rq->cqe = vzalloc(cqe_size);
 326        if (!rq->cqe)
 327                return -ENOMEM;
 328
 329        cqe_dma_size = wq->q_depth * sizeof(*rq->cqe_dma);
 330        rq->cqe_dma = vzalloc(cqe_dma_size);
 331        if (!rq->cqe_dma)
 332                goto err_cqe_dma_arr_alloc;
 333
 334        for (i = 0; i < wq->q_depth; i++) {
 335                rq->cqe[i] = dma_alloc_coherent(&pdev->dev,
 336                                                sizeof(*rq->cqe[i]),
 337                                                &rq->cqe_dma[i], GFP_KERNEL);
 338                if (!rq->cqe[i])
 339                        goto err_cqe_alloc;
 340        }
 341
 342        return 0;
 343
 344err_cqe_alloc:
 345        for (j = 0; j < i; j++)
 346                dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[j]), rq->cqe[j],
 347                                  rq->cqe_dma[j]);
 348
 349        vfree(rq->cqe_dma);
 350
 351err_cqe_dma_arr_alloc:
 352        vfree(rq->cqe);
 353        return -ENOMEM;
 354}
 355
 356/**
 357 * free_rq_cqe - free rq completion queue elements
 358 * @rq: HW Receive Queue
 359 **/
 360static void free_rq_cqe(struct hinic_rq *rq)
 361{
 362        struct hinic_hwif *hwif = rq->hwif;
 363        struct pci_dev *pdev = hwif->pdev;
 364        struct hinic_wq *wq = rq->wq;
 365        int i;
 366
 367        for (i = 0; i < wq->q_depth; i++)
 368                dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[i]), rq->cqe[i],
 369                                  rq->cqe_dma[i]);
 370
 371        vfree(rq->cqe_dma);
 372        vfree(rq->cqe);
 373}
 374
 375/**
 376 * hinic_init_rq - Initialize HW Receive Queue
 377 * @rq: HW Receive Queue
 378 * @hwif: HW Interface for accessing HW
 379 * @wq: Work Queue for the data of the RQ
 380 * @entry: msix entry for rq
 381 *
 382 * Return 0 - Success, negative - Failure
 383 **/
 384int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
 385                  struct hinic_wq *wq, struct msix_entry *entry)
 386{
 387        struct pci_dev *pdev = hwif->pdev;
 388        size_t pi_size;
 389        int err;
 390
 391        rq->hwif = hwif;
 392
 393        rq->wq = wq;
 394
 395        rq->irq = entry->vector;
 396        rq->msix_entry = entry->entry;
 397
 398        rq->buf_sz = HINIC_RX_BUF_SZ;
 399
 400        err = alloc_rq_skb_arr(rq);
 401        if (err) {
 402                dev_err(&pdev->dev, "Failed to allocate rq priv data\n");
 403                return err;
 404        }
 405
 406        err = alloc_rq_cqe(rq);
 407        if (err) {
 408                dev_err(&pdev->dev, "Failed to allocate rq cqe\n");
 409                goto err_alloc_rq_cqe;
 410        }
 411
 412        /* HW requirements: Must be at least 32 bit */
 413        pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
 414        rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
 415                                              &rq->pi_dma_addr, GFP_KERNEL);
 416        if (!rq->pi_virt_addr) {
 417                err = -ENOMEM;
 418                goto err_pi_virt;
 419        }
 420
 421        return 0;
 422
 423err_pi_virt:
 424        free_rq_cqe(rq);
 425
 426err_alloc_rq_cqe:
 427        free_rq_skb_arr(rq);
 428        return err;
 429}
 430
 431/**
 432 * hinic_clean_rq - Clean HW Receive Queue's Resources
 433 * @rq: HW Receive Queue
 434 **/
 435void hinic_clean_rq(struct hinic_rq *rq)
 436{
 437        struct hinic_hwif *hwif = rq->hwif;
 438        struct pci_dev *pdev = hwif->pdev;
 439        size_t pi_size;
 440
 441        pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
 442        dma_free_coherent(&pdev->dev, pi_size, rq->pi_virt_addr,
 443                          rq->pi_dma_addr);
 444
 445        free_rq_cqe(rq);
 446        free_rq_skb_arr(rq);
 447}
 448
 449/**
 450 * hinic_get_sq_free_wqebbs - return number of free wqebbs for use
 451 * @sq: send queue
 452 *
 453 * Return number of free wqebbs
 454 **/
 455int hinic_get_sq_free_wqebbs(struct hinic_sq *sq)
 456{
 457        struct hinic_wq *wq = sq->wq;
 458
 459        return atomic_read(&wq->delta) - 1;
 460}
 461
 462/**
 463 * hinic_get_rq_free_wqebbs - return number of free wqebbs for use
 464 * @rq: recv queue
 465 *
 466 * Return number of free wqebbs
 467 **/
 468int hinic_get_rq_free_wqebbs(struct hinic_rq *rq)
 469{
 470        struct hinic_wq *wq = rq->wq;
 471
 472        return atomic_read(&wq->delta) - 1;
 473}
 474
 475static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
 476                            int nr_descs)
 477{
 478        u32 ctrl_size, task_size, bufdesc_size;
 479
 480        ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
 481        task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
 482        bufdesc_size = nr_descs * sizeof(struct hinic_sq_bufdesc);
 483        bufdesc_size = SIZE_8BYTES(bufdesc_size);
 484
 485        ctrl->ctrl_info = HINIC_SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
 486                          HINIC_SQ_CTRL_SET(task_size, TASKSECT_LEN)        |
 487                          HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
 488                          HINIC_SQ_CTRL_SET(ctrl_size, LEN);
 489
 490        ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
 491                                             QUEUE_INFO_MSS) |
 492                           HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
 493}
 494
 495static void sq_prepare_task(struct hinic_sq_task *task)
 496{
 497        task->pkt_info0 = 0;
 498        task->pkt_info1 = 0;
 499        task->pkt_info2 = 0;
 500
 501        task->ufo_v6_identify = 0;
 502
 503        task->pkt_info4 = HINIC_SQ_TASK_INFO4_SET(HINIC_L2TYPE_ETH, L2TYPE);
 504
 505        task->zero_pad = 0;
 506}
 507
 508void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
 509{
 510        task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
 511}
 512
 513void hinic_task_set_outter_l3(struct hinic_sq_task *task,
 514                              enum hinic_l3_offload_type l3_type,
 515                              u32 network_len)
 516{
 517        task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
 518                           HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
 519}
 520
 521void hinic_task_set_inner_l3(struct hinic_sq_task *task,
 522                             enum hinic_l3_offload_type l3_type,
 523                             u32 network_len)
 524{
 525        task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
 526        task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
 527}
 528
 529void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
 530                              enum hinic_l4_tunnel_type l4_type,
 531                              u32 tunnel_len)
 532{
 533        task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
 534                           HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
 535}
 536
 537void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
 538                           enum hinic_l4_offload_type l4_offload,
 539                           u32 l4_len, u32 offset)
 540{
 541        u32 tcp_udp_cs = 0, sctp = 0;
 542        u32 mss = HINIC_MSS_DEFAULT;
 543
 544        if (l4_offload == TCP_OFFLOAD_ENABLE ||
 545            l4_offload == UDP_OFFLOAD_ENABLE)
 546                tcp_udp_cs = 1;
 547        else if (l4_offload == SCTP_OFFLOAD_ENABLE)
 548                sctp = 1;
 549
 550        task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
 551        task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
 552
 553        *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
 554                       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
 555                       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
 556
 557        *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
 558        *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
 559}
 560
 561void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
 562                            enum hinic_l4_offload_type l4_offload,
 563                            u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
 564{
 565        u32 tso = 0, ufo = 0;
 566
 567        if (l4_offload == TCP_OFFLOAD_ENABLE)
 568                tso = 1;
 569        else if (l4_offload == UDP_OFFLOAD_ENABLE)
 570                ufo = 1;
 571
 572        task->ufo_v6_identify = ip_ident;
 573
 574        task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
 575        task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
 576        task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
 577
 578        *queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
 579                       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
 580                       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
 581                       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
 582
 583        /* set MSS value */
 584        *queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
 585        *queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
 586}
 587
 588/**
 589 * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
 590 * @sq: send queue
 591 * @prod_idx: pi value
 592 * @sq_wqe: wqe to prepare
 593 * @sges: sges for use by the wqe for send for buf addresses
 594 * @nr_sges: number of sges
 595 **/
 596void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
 597                          struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
 598                          int nr_sges)
 599{
 600        int i;
 601
 602        sq_prepare_ctrl(&sq_wqe->ctrl, prod_idx, nr_sges);
 603
 604        sq_prepare_task(&sq_wqe->task);
 605
 606        for (i = 0; i < nr_sges; i++)
 607                sq_wqe->buf_descs[i].sge = sges[i];
 608}
 609
 610/**
 611 * sq_prepare_db - prepare doorbell to write
 612 * @sq: send queue
 613 * @prod_idx: pi value for the doorbell
 614 * @cos: cos of the doorbell
 615 *
 616 * Return db value
 617 **/
 618static u32 sq_prepare_db(struct hinic_sq *sq, u16 prod_idx, unsigned int cos)
 619{
 620        struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
 621        u8 hi_prod_idx = SQ_DB_PI_HI(SQ_MASKED_IDX(sq, prod_idx));
 622
 623        /* Data should be written to HW in Big Endian Format */
 624        return cpu_to_be32(HINIC_SQ_DB_INFO_SET(hi_prod_idx, PI_HI)     |
 625                           HINIC_SQ_DB_INFO_SET(HINIC_DB_SQ_TYPE, TYPE) |
 626                           HINIC_SQ_DB_INFO_SET(HINIC_DATA_PATH, PATH)  |
 627                           HINIC_SQ_DB_INFO_SET(cos, COS)               |
 628                           HINIC_SQ_DB_INFO_SET(qp->q_id, QID));
 629}
 630
 631/**
 632 * hinic_sq_write_db- write doorbell
 633 * @sq: send queue
 634 * @prod_idx: pi value for the doorbell
 635 * @wqe_size: wqe size
 636 * @cos: cos of the wqe
 637 **/
 638void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
 639                       unsigned int cos)
 640{
 641        struct hinic_wq *wq = sq->wq;
 642
 643        /* increment prod_idx to the next */
 644        prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
 645        prod_idx = SQ_MASKED_IDX(sq, prod_idx);
 646
 647        wmb();  /* Write all before the doorbell */
 648
 649        writel(sq_prepare_db(sq, prod_idx, cos), SQ_DB_ADDR(sq, prod_idx));
 650}
 651
 652/**
 653 * hinic_sq_get_wqe - get wqe ptr in the current pi and update the pi
 654 * @sq: sq to get wqe from
 655 * @wqe_size: wqe size
 656 * @prod_idx: returned pi
 657 *
 658 * Return wqe pointer
 659 **/
 660struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
 661                                      unsigned int wqe_size, u16 *prod_idx)
 662{
 663        struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(sq->wq, wqe_size,
 664                                                    prod_idx);
 665
 666        if (IS_ERR(hw_wqe))
 667                return NULL;
 668
 669        return &hw_wqe->sq_wqe;
 670}
 671
 672/**
 673 * hinic_sq_return_wqe - return the wqe to the sq
 674 * @sq: send queue
 675 * @wqe_size: the size of the wqe
 676 **/
 677void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
 678{
 679        hinic_return_wqe(sq->wq, wqe_size);
 680}
 681
 682/**
 683 * hinic_sq_write_wqe - write the wqe to the sq
 684 * @sq: send queue
 685 * @prod_idx: pi of the wqe
 686 * @sq_wqe: the wqe to write
 687 * @skb: skb to save
 688 * @wqe_size: the size of the wqe
 689 **/
 690void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
 691                        struct hinic_sq_wqe *sq_wqe,
 692                        struct sk_buff *skb, unsigned int wqe_size)
 693{
 694        struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)sq_wqe;
 695
 696        sq->saved_skb[prod_idx] = skb;
 697
 698        /* The data in the HW should be in Big Endian Format */
 699        hinic_cpu_to_be32(sq_wqe, wqe_size);
 700
 701        hinic_write_wqe(sq->wq, hw_wqe, wqe_size);
 702}
 703
 704/**
 705 * hinic_sq_read_wqebb - read wqe ptr in the current ci and update the ci, the
 706 * wqe only have one wqebb
 707 * @sq: send queue
 708 * @skb: return skb that was saved
 709 * @wqe_size: the wqe size ptr
 710 * @cons_idx: consumer index of the wqe
 711 *
 712 * Return wqe in ci position
 713 **/
 714struct hinic_sq_wqe *hinic_sq_read_wqebb(struct hinic_sq *sq,
 715                                         struct sk_buff **skb,
 716                                         unsigned int *wqe_size, u16 *cons_idx)
 717{
 718        struct hinic_hw_wqe *hw_wqe;
 719        struct hinic_sq_wqe *sq_wqe;
 720        struct hinic_sq_ctrl *ctrl;
 721        unsigned int buf_sect_len;
 722        u32 ctrl_info;
 723
 724        /* read the ctrl section for getting wqe size */
 725        hw_wqe = hinic_read_wqe(sq->wq, sizeof(*ctrl), cons_idx);
 726        if (IS_ERR(hw_wqe))
 727                return NULL;
 728
 729        *skb = sq->saved_skb[*cons_idx];
 730
 731        sq_wqe = &hw_wqe->sq_wqe;
 732        ctrl = &sq_wqe->ctrl;
 733        ctrl_info = be32_to_cpu(ctrl->ctrl_info);
 734        buf_sect_len = HINIC_SQ_CTRL_GET(ctrl_info, BUFDESC_SECT_LEN);
 735
 736        *wqe_size = sizeof(*ctrl) + sizeof(sq_wqe->task);
 737        *wqe_size += SECT_SIZE_FROM_8BYTES(buf_sect_len);
 738        *wqe_size = ALIGN(*wqe_size, sq->wq->wqebb_size);
 739
 740        return &hw_wqe->sq_wqe;
 741}
 742
 743/**
 744 * hinic_sq_read_wqe - read wqe ptr in the current ci and update the ci
 745 * @sq: send queue
 746 * @skb: return skb that was saved
 747 * @wqe_size: the size of the wqe
 748 * @cons_idx: consumer index of the wqe
 749 *
 750 * Return wqe in ci position
 751 **/
 752struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
 753                                       struct sk_buff **skb,
 754                                       unsigned int wqe_size, u16 *cons_idx)
 755{
 756        struct hinic_hw_wqe *hw_wqe;
 757
 758        hw_wqe = hinic_read_wqe(sq->wq, wqe_size, cons_idx);
 759        *skb = sq->saved_skb[*cons_idx];
 760
 761        return &hw_wqe->sq_wqe;
 762}
 763
 764/**
 765 * hinic_sq_put_wqe - release the ci for new wqes
 766 * @sq: send queue
 767 * @wqe_size: the size of the wqe
 768 **/
 769void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size)
 770{
 771        hinic_put_wqe(sq->wq, wqe_size);
 772}
 773
 774/**
 775 * hinic_sq_get_sges - get sges from the wqe
 776 * @sq_wqe: wqe to get the sges from its buffer addresses
 777 * @sges: returned sges
 778 * @nr_sges: number sges to return
 779 **/
 780void hinic_sq_get_sges(struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
 781                       int nr_sges)
 782{
 783        int i;
 784
 785        for (i = 0; i < nr_sges && i < HINIC_MAX_SQ_BUFDESCS; i++) {
 786                sges[i] = sq_wqe->buf_descs[i].sge;
 787                hinic_be32_to_cpu(&sges[i], sizeof(sges[i]));
 788        }
 789}
 790
 791/**
 792 * hinic_rq_get_wqe - get wqe ptr in the current pi and update the pi
 793 * @rq: rq to get wqe from
 794 * @wqe_size: wqe size
 795 * @prod_idx: returned pi
 796 *
 797 * Return wqe pointer
 798 **/
 799struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
 800                                      unsigned int wqe_size, u16 *prod_idx)
 801{
 802        struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(rq->wq, wqe_size,
 803                                                    prod_idx);
 804
 805        if (IS_ERR(hw_wqe))
 806                return NULL;
 807
 808        return &hw_wqe->rq_wqe;
 809}
 810
 811/**
 812 * hinic_rq_write_wqe - write the wqe to the rq
 813 * @rq: recv queue
 814 * @prod_idx: pi of the wqe
 815 * @rq_wqe: the wqe to write
 816 * @skb: skb to save
 817 **/
 818void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
 819                        struct hinic_rq_wqe *rq_wqe, struct sk_buff *skb)
 820{
 821        struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)rq_wqe;
 822
 823        rq->saved_skb[prod_idx] = skb;
 824
 825        /* The data in the HW should be in Big Endian Format */
 826        hinic_cpu_to_be32(rq_wqe, sizeof(*rq_wqe));
 827
 828        hinic_write_wqe(rq->wq, hw_wqe, sizeof(*rq_wqe));
 829}
 830
 831/**
 832 * hinic_rq_read_wqe - read wqe ptr in the current ci and update the ci
 833 * @rq: recv queue
 834 * @wqe_size: the size of the wqe
 835 * @skb: return saved skb
 836 * @cons_idx: consumer index of the wqe
 837 *
 838 * Return wqe in ci position
 839 **/
 840struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
 841                                       unsigned int wqe_size,
 842                                       struct sk_buff **skb, u16 *cons_idx)
 843{
 844        struct hinic_hw_wqe *hw_wqe;
 845        struct hinic_rq_cqe *cqe;
 846        int rx_done;
 847        u32 status;
 848
 849        hw_wqe = hinic_read_wqe(rq->wq, wqe_size, cons_idx);
 850        if (IS_ERR(hw_wqe))
 851                return NULL;
 852
 853        cqe = rq->cqe[*cons_idx];
 854
 855        status = be32_to_cpu(cqe->status);
 856
 857        rx_done = HINIC_RQ_CQE_STATUS_GET(status, RXDONE);
 858        if (!rx_done)
 859                return NULL;
 860
 861        *skb = rq->saved_skb[*cons_idx];
 862
 863        return &hw_wqe->rq_wqe;
 864}
 865
 866/**
 867 * hinic_rq_read_next_wqe - increment ci and read the wqe in ci position
 868 * @rq: recv queue
 869 * @wqe_size: the size of the wqe
 870 * @skb: return saved skb
 871 * @cons_idx: consumer index in the wq
 872 *
 873 * Return wqe in incremented ci position
 874 **/
 875struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
 876                                            unsigned int wqe_size,
 877                                            struct sk_buff **skb,
 878                                            u16 *cons_idx)
 879{
 880        struct hinic_wq *wq = rq->wq;
 881        struct hinic_hw_wqe *hw_wqe;
 882        unsigned int num_wqebbs;
 883
 884        wqe_size = ALIGN(wqe_size, wq->wqebb_size);
 885        num_wqebbs = wqe_size / wq->wqebb_size;
 886
 887        *cons_idx = RQ_MASKED_IDX(rq, *cons_idx + num_wqebbs);
 888
 889        *skb = rq->saved_skb[*cons_idx];
 890
 891        hw_wqe = hinic_read_wqe_direct(wq, *cons_idx);
 892
 893        return &hw_wqe->rq_wqe;
 894}
 895
 896/**
 897 * hinic_put_wqe - release the ci for new wqes
 898 * @rq: recv queue
 899 * @cons_idx: consumer index of the wqe
 900 * @wqe_size: the size of the wqe
 901 **/
 902void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
 903                      unsigned int wqe_size)
 904{
 905        struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
 906        u32 status = be32_to_cpu(cqe->status);
 907
 908        status = HINIC_RQ_CQE_STATUS_CLEAR(status, RXDONE);
 909
 910        /* Rx WQE size is 1 WQEBB, no wq shadow*/
 911        cqe->status = cpu_to_be32(status);
 912
 913        wmb();          /* clear done flag */
 914
 915        hinic_put_wqe(rq->wq, wqe_size);
 916}
 917
 918/**
 919 * hinic_rq_get_sge - get sge from the wqe
 920 * @rq: recv queue
 921 * @rq_wqe: wqe to get the sge from its buf address
 922 * @cons_idx: consumer index
 923 * @sge: returned sge
 924 **/
 925void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *rq_wqe,
 926                      u16 cons_idx, struct hinic_sge *sge)
 927{
 928        struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
 929        u32 len = be32_to_cpu(cqe->len);
 930
 931        sge->hi_addr = be32_to_cpu(rq_wqe->buf_desc.hi_addr);
 932        sge->lo_addr = be32_to_cpu(rq_wqe->buf_desc.lo_addr);
 933        sge->len = HINIC_RQ_CQE_SGE_GET(len, LEN);
 934}
 935
 936/**
 937 * hinic_rq_prepare_wqe - prepare wqe before insert to the queue
 938 * @rq: recv queue
 939 * @prod_idx: pi value
 940 * @rq_wqe: the wqe
 941 * @sge: sge for use by the wqe for recv buf address
 942 **/
 943void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
 944                          struct hinic_rq_wqe *rq_wqe, struct hinic_sge *sge)
 945{
 946        struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
 947        struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
 948        struct hinic_rq_cqe *cqe = rq->cqe[prod_idx];
 949        struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
 950        dma_addr_t cqe_dma = rq->cqe_dma[prod_idx];
 951
 952        ctrl->ctrl_info =
 953                HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)), LEN) |
 954                HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)),
 955                                  COMPLETE_LEN)                    |
 956                HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)),
 957                                  BUFDESC_SECT_LEN)                |
 958                HINIC_RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
 959
 960        hinic_set_sge(&cqe_sect->sge, cqe_dma, sizeof(*cqe));
 961
 962        buf_desc->hi_addr = sge->hi_addr;
 963        buf_desc->lo_addr = sge->lo_addr;
 964}
 965
 966/**
 967 * hinic_rq_update - update pi of the rq
 968 * @rq: recv queue
 969 * @prod_idx: pi value
 970 **/
 971void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx)
 972{
 973        *rq->pi_virt_addr = cpu_to_be16(RQ_MASKED_IDX(rq, prod_idx + 1));
 974}
 975