linux/drivers/net/ethernet/sfc/efx_channels.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/****************************************************************************
   3 * Driver for Solarflare network controllers and boards
   4 * Copyright 2018 Solarflare Communications Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation, incorporated herein by reference.
   9 */
  10
  11#include "net_driver.h"
  12#include <linux/module.h>
  13#include "efx_channels.h"
  14#include "efx.h"
  15#include "efx_common.h"
  16#include "tx_common.h"
  17#include "rx_common.h"
  18#include "nic.h"
  19#include "sriov.h"
  20#include "workarounds.h"
  21
  22/* This is the first interrupt mode to try out of:
  23 * 0 => MSI-X
  24 * 1 => MSI
  25 * 2 => legacy
  26 */
  27unsigned int efx_interrupt_mode = EFX_INT_MODE_MSIX;
  28
  29/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
  30 * i.e. the number of CPUs among which we may distribute simultaneous
  31 * interrupt handling.
  32 *
  33 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
  34 * The default (0) means to assign an interrupt to each core.
  35 */
  36unsigned int rss_cpus;
  37
  38static unsigned int irq_adapt_low_thresh = 8000;
  39module_param(irq_adapt_low_thresh, uint, 0644);
  40MODULE_PARM_DESC(irq_adapt_low_thresh,
  41                 "Threshold score for reducing IRQ moderation");
  42
  43static unsigned int irq_adapt_high_thresh = 16000;
  44module_param(irq_adapt_high_thresh, uint, 0644);
  45MODULE_PARM_DESC(irq_adapt_high_thresh,
  46                 "Threshold score for increasing IRQ moderation");
  47
  48/* This is the weight assigned to each of the (per-channel) virtual
  49 * NAPI devices.
  50 */
  51static int napi_weight = 64;
  52
  53/***************
  54 * Housekeeping
  55 ***************/
  56
  57int efx_channel_dummy_op_int(struct efx_channel *channel)
  58{
  59        return 0;
  60}
  61
  62void efx_channel_dummy_op_void(struct efx_channel *channel)
  63{
  64}
  65
  66static const struct efx_channel_type efx_default_channel_type = {
  67        .pre_probe              = efx_channel_dummy_op_int,
  68        .post_remove            = efx_channel_dummy_op_void,
  69        .get_name               = efx_get_channel_name,
  70        .copy                   = efx_copy_channel,
  71        .want_txqs              = efx_default_channel_want_txqs,
  72        .keep_eventq            = false,
  73        .want_pio               = true,
  74};
  75
  76/*************
  77 * INTERRUPTS
  78 *************/
  79
  80static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
  81{
  82        cpumask_var_t thread_mask;
  83        unsigned int count;
  84        int cpu;
  85
  86        if (rss_cpus) {
  87                count = rss_cpus;
  88        } else {
  89                if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
  90                        netif_warn(efx, probe, efx->net_dev,
  91                                   "RSS disabled due to allocation failure\n");
  92                        return 1;
  93                }
  94
  95                count = 0;
  96                for_each_online_cpu(cpu) {
  97                        if (!cpumask_test_cpu(cpu, thread_mask)) {
  98                                ++count;
  99                                cpumask_or(thread_mask, thread_mask,
 100                                           topology_sibling_cpumask(cpu));
 101                        }
 102                }
 103
 104                free_cpumask_var(thread_mask);
 105        }
 106
 107        if (count > EFX_MAX_RX_QUEUES) {
 108                netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
 109                               "Reducing number of rx queues from %u to %u.\n",
 110                               count, EFX_MAX_RX_QUEUES);
 111                count = EFX_MAX_RX_QUEUES;
 112        }
 113
 114        /* If RSS is requested for the PF *and* VFs then we can't write RSS
 115         * table entries that are inaccessible to VFs
 116         */
 117#ifdef CONFIG_SFC_SRIOV
 118        if (efx->type->sriov_wanted) {
 119                if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
 120                    count > efx_vf_size(efx)) {
 121                        netif_warn(efx, probe, efx->net_dev,
 122                                   "Reducing number of RSS channels from %u to %u for "
 123                                   "VF support. Increase vf-msix-limit to use more "
 124                                   "channels on the PF.\n",
 125                                   count, efx_vf_size(efx));
 126                        count = efx_vf_size(efx);
 127                }
 128        }
 129#endif
 130
 131        return count;
 132}
 133
 134static int efx_allocate_msix_channels(struct efx_nic *efx,
 135                                      unsigned int max_channels,
 136                                      unsigned int extra_channels,
 137                                      unsigned int parallelism)
 138{
 139        unsigned int n_channels = parallelism;
 140        int vec_count;
 141        int tx_per_ev;
 142        int n_xdp_tx;
 143        int n_xdp_ev;
 144
 145        if (efx_separate_tx_channels)
 146                n_channels *= 2;
 147        n_channels += extra_channels;
 148
 149        /* To allow XDP transmit to happen from arbitrary NAPI contexts
 150         * we allocate a TX queue per CPU. We share event queues across
 151         * multiple tx queues, assuming tx and ev queues are both
 152         * maximum size.
 153         */
 154        tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
 155        tx_per_ev = min(tx_per_ev, EFX_MAX_TXQ_PER_CHANNEL);
 156        n_xdp_tx = num_possible_cpus();
 157        n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
 158
 159        vec_count = pci_msix_vec_count(efx->pci_dev);
 160        if (vec_count < 0)
 161                return vec_count;
 162
 163        max_channels = min_t(unsigned int, vec_count, max_channels);
 164
 165        /* Check resources.
 166         * We need a channel per event queue, plus a VI per tx queue.
 167         * This may be more pessimistic than it needs to be.
 168         */
 169        if (n_channels + n_xdp_ev > max_channels) {
 170                netif_err(efx, drv, efx->net_dev,
 171                          "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
 172                          n_xdp_ev, n_channels, max_channels);
 173                efx->n_xdp_channels = 0;
 174                efx->xdp_tx_per_channel = 0;
 175                efx->xdp_tx_queue_count = 0;
 176        } else if (n_channels + n_xdp_tx > efx->max_vis) {
 177                netif_err(efx, drv, efx->net_dev,
 178                          "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
 179                          n_xdp_tx, n_channels, efx->max_vis);
 180                efx->n_xdp_channels = 0;
 181                efx->xdp_tx_per_channel = 0;
 182                efx->xdp_tx_queue_count = 0;
 183        } else {
 184                efx->n_xdp_channels = n_xdp_ev;
 185                efx->xdp_tx_per_channel = tx_per_ev;
 186                efx->xdp_tx_queue_count = n_xdp_tx;
 187                n_channels += n_xdp_ev;
 188                netif_dbg(efx, drv, efx->net_dev,
 189                          "Allocating %d TX and %d event queues for XDP\n",
 190                          n_xdp_tx, n_xdp_ev);
 191        }
 192
 193        if (vec_count < n_channels) {
 194                netif_err(efx, drv, efx->net_dev,
 195                          "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
 196                          vec_count, n_channels);
 197                netif_err(efx, drv, efx->net_dev,
 198                          "WARNING: Performance may be reduced.\n");
 199                n_channels = vec_count;
 200        }
 201
 202        n_channels = min(n_channels, max_channels);
 203
 204        efx->n_channels = n_channels;
 205
 206        /* Ignore XDP tx channels when creating rx channels. */
 207        n_channels -= efx->n_xdp_channels;
 208
 209        if (efx_separate_tx_channels) {
 210                efx->n_tx_channels =
 211                        min(max(n_channels / 2, 1U),
 212                            efx->max_tx_channels);
 213                efx->tx_channel_offset =
 214                        n_channels - efx->n_tx_channels;
 215                efx->n_rx_channels =
 216                        max(n_channels -
 217                            efx->n_tx_channels, 1U);
 218        } else {
 219                efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
 220                efx->tx_channel_offset = 0;
 221                efx->n_rx_channels = n_channels;
 222        }
 223
 224        efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
 225        efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
 226
 227        efx->xdp_channel_offset = n_channels;
 228
 229        netif_dbg(efx, drv, efx->net_dev,
 230                  "Allocating %u RX channels\n",
 231                  efx->n_rx_channels);
 232
 233        return efx->n_channels;
 234}
 235
 236/* Probe the number and type of interrupts we are able to obtain, and
 237 * the resulting numbers of channels and RX queues.
 238 */
 239int efx_probe_interrupts(struct efx_nic *efx)
 240{
 241        unsigned int extra_channels = 0;
 242        unsigned int rss_spread;
 243        unsigned int i, j;
 244        int rc;
 245
 246        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
 247                if (efx->extra_channel_type[i])
 248                        ++extra_channels;
 249
 250        if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
 251                unsigned int parallelism = efx_wanted_parallelism(efx);
 252                struct msix_entry xentries[EFX_MAX_CHANNELS];
 253                unsigned int n_channels;
 254
 255                rc = efx_allocate_msix_channels(efx, efx->max_channels,
 256                                                extra_channels, parallelism);
 257                if (rc >= 0) {
 258                        n_channels = rc;
 259                        for (i = 0; i < n_channels; i++)
 260                                xentries[i].entry = i;
 261                        rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
 262                                                   n_channels);
 263                }
 264                if (rc < 0) {
 265                        /* Fall back to single channel MSI */
 266                        netif_err(efx, drv, efx->net_dev,
 267                                  "could not enable MSI-X\n");
 268                        if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
 269                                efx->interrupt_mode = EFX_INT_MODE_MSI;
 270                        else
 271                                return rc;
 272                } else if (rc < n_channels) {
 273                        netif_err(efx, drv, efx->net_dev,
 274                                  "WARNING: Insufficient MSI-X vectors"
 275                                  " available (%d < %u).\n", rc, n_channels);
 276                        netif_err(efx, drv, efx->net_dev,
 277                                  "WARNING: Performance may be reduced.\n");
 278                        n_channels = rc;
 279                }
 280
 281                if (rc > 0) {
 282                        for (i = 0; i < efx->n_channels; i++)
 283                                efx_get_channel(efx, i)->irq =
 284                                        xentries[i].vector;
 285                }
 286        }
 287
 288        /* Try single interrupt MSI */
 289        if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
 290                efx->n_channels = 1;
 291                efx->n_rx_channels = 1;
 292                efx->n_tx_channels = 1;
 293                efx->n_xdp_channels = 0;
 294                efx->xdp_channel_offset = efx->n_channels;
 295                rc = pci_enable_msi(efx->pci_dev);
 296                if (rc == 0) {
 297                        efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
 298                } else {
 299                        netif_err(efx, drv, efx->net_dev,
 300                                  "could not enable MSI\n");
 301                        if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
 302                                efx->interrupt_mode = EFX_INT_MODE_LEGACY;
 303                        else
 304                                return rc;
 305                }
 306        }
 307
 308        /* Assume legacy interrupts */
 309        if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
 310                efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
 311                efx->n_rx_channels = 1;
 312                efx->n_tx_channels = 1;
 313                efx->n_xdp_channels = 0;
 314                efx->xdp_channel_offset = efx->n_channels;
 315                efx->legacy_irq = efx->pci_dev->irq;
 316        }
 317
 318        /* Assign extra channels if possible, before XDP channels */
 319        efx->n_extra_tx_channels = 0;
 320        j = efx->xdp_channel_offset;
 321        for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
 322                if (!efx->extra_channel_type[i])
 323                        continue;
 324                if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
 325                        efx->extra_channel_type[i]->handle_no_channel(efx);
 326                } else {
 327                        --j;
 328                        efx_get_channel(efx, j)->type =
 329                                efx->extra_channel_type[i];
 330                        if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
 331                                efx->n_extra_tx_channels++;
 332                }
 333        }
 334
 335        rss_spread = efx->n_rx_channels;
 336        /* RSS might be usable on VFs even if it is disabled on the PF */
 337#ifdef CONFIG_SFC_SRIOV
 338        if (efx->type->sriov_wanted) {
 339                efx->rss_spread = ((rss_spread > 1 ||
 340                                    !efx->type->sriov_wanted(efx)) ?
 341                                   rss_spread : efx_vf_size(efx));
 342                return 0;
 343        }
 344#endif
 345        efx->rss_spread = rss_spread;
 346
 347        return 0;
 348}
 349
 350#if defined(CONFIG_SMP)
 351void efx_set_interrupt_affinity(struct efx_nic *efx)
 352{
 353        struct efx_channel *channel;
 354        unsigned int cpu;
 355
 356        efx_for_each_channel(channel, efx) {
 357                cpu = cpumask_local_spread(channel->channel,
 358                                           pcibus_to_node(efx->pci_dev->bus));
 359                irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
 360        }
 361}
 362
 363void efx_clear_interrupt_affinity(struct efx_nic *efx)
 364{
 365        struct efx_channel *channel;
 366
 367        efx_for_each_channel(channel, efx)
 368                irq_set_affinity_hint(channel->irq, NULL);
 369}
 370#else
 371void
 372efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
 373{
 374}
 375
 376void
 377efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
 378{
 379}
 380#endif /* CONFIG_SMP */
 381
 382void efx_remove_interrupts(struct efx_nic *efx)
 383{
 384        struct efx_channel *channel;
 385
 386        /* Remove MSI/MSI-X interrupts */
 387        efx_for_each_channel(channel, efx)
 388                channel->irq = 0;
 389        pci_disable_msi(efx->pci_dev);
 390        pci_disable_msix(efx->pci_dev);
 391
 392        /* Remove legacy interrupt */
 393        efx->legacy_irq = 0;
 394}
 395
 396/***************
 397 * EVENT QUEUES
 398 ***************/
 399
 400/* Create event queue
 401 * Event queue memory allocations are done only once.  If the channel
 402 * is reset, the memory buffer will be reused; this guards against
 403 * errors during channel reset and also simplifies interrupt handling.
 404 */
 405int efx_probe_eventq(struct efx_channel *channel)
 406{
 407        struct efx_nic *efx = channel->efx;
 408        unsigned long entries;
 409
 410        netif_dbg(efx, probe, efx->net_dev,
 411                  "chan %d create event queue\n", channel->channel);
 412
 413        /* Build an event queue with room for one event per tx and rx buffer,
 414         * plus some extra for link state events and MCDI completions.
 415         */
 416        entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
 417        EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
 418        channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
 419
 420        return efx_nic_probe_eventq(channel);
 421}
 422
 423/* Prepare channel's event queue */
 424int efx_init_eventq(struct efx_channel *channel)
 425{
 426        struct efx_nic *efx = channel->efx;
 427        int rc;
 428
 429        EFX_WARN_ON_PARANOID(channel->eventq_init);
 430
 431        netif_dbg(efx, drv, efx->net_dev,
 432                  "chan %d init event queue\n", channel->channel);
 433
 434        rc = efx_nic_init_eventq(channel);
 435        if (rc == 0) {
 436                efx->type->push_irq_moderation(channel);
 437                channel->eventq_read_ptr = 0;
 438                channel->eventq_init = true;
 439        }
 440        return rc;
 441}
 442
 443/* Enable event queue processing and NAPI */
 444void efx_start_eventq(struct efx_channel *channel)
 445{
 446        netif_dbg(channel->efx, ifup, channel->efx->net_dev,
 447                  "chan %d start event queue\n", channel->channel);
 448
 449        /* Make sure the NAPI handler sees the enabled flag set */
 450        channel->enabled = true;
 451        smp_wmb();
 452
 453        napi_enable(&channel->napi_str);
 454        efx_nic_eventq_read_ack(channel);
 455}
 456
 457/* Disable event queue processing and NAPI */
 458void efx_stop_eventq(struct efx_channel *channel)
 459{
 460        if (!channel->enabled)
 461                return;
 462
 463        napi_disable(&channel->napi_str);
 464        channel->enabled = false;
 465}
 466
 467void efx_fini_eventq(struct efx_channel *channel)
 468{
 469        if (!channel->eventq_init)
 470                return;
 471
 472        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 473                  "chan %d fini event queue\n", channel->channel);
 474
 475        efx_nic_fini_eventq(channel);
 476        channel->eventq_init = false;
 477}
 478
 479void efx_remove_eventq(struct efx_channel *channel)
 480{
 481        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 482                  "chan %d remove event queue\n", channel->channel);
 483
 484        efx_nic_remove_eventq(channel);
 485}
 486
 487/**************************************************************************
 488 *
 489 * Channel handling
 490 *
 491 *************************************************************************/
 492
 493#ifdef CONFIG_RFS_ACCEL
 494static void efx_filter_rfs_expire(struct work_struct *data)
 495{
 496        struct delayed_work *dwork = to_delayed_work(data);
 497        struct efx_channel *channel;
 498        unsigned int time, quota;
 499
 500        channel = container_of(dwork, struct efx_channel, filter_work);
 501        time = jiffies - channel->rfs_last_expiry;
 502        quota = channel->rfs_filter_count * time / (30 * HZ);
 503        if (quota >= 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota)))
 504                channel->rfs_last_expiry += time;
 505        /* Ensure we do more work eventually even if NAPI poll is not happening */
 506        schedule_delayed_work(dwork, 30 * HZ);
 507}
 508#endif
 509
 510/* Allocate and initialise a channel structure. */
 511static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
 512{
 513        struct efx_rx_queue *rx_queue;
 514        struct efx_tx_queue *tx_queue;
 515        struct efx_channel *channel;
 516        int j;
 517
 518        channel = kzalloc(sizeof(*channel), GFP_KERNEL);
 519        if (!channel)
 520                return NULL;
 521
 522        channel->efx = efx;
 523        channel->channel = i;
 524        channel->type = &efx_default_channel_type;
 525
 526        for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 527                tx_queue = &channel->tx_queue[j];
 528                tx_queue->efx = efx;
 529                tx_queue->queue = -1;
 530                tx_queue->label = j;
 531                tx_queue->channel = channel;
 532        }
 533
 534#ifdef CONFIG_RFS_ACCEL
 535        INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
 536#endif
 537
 538        rx_queue = &channel->rx_queue;
 539        rx_queue->efx = efx;
 540        timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 541
 542        return channel;
 543}
 544
 545int efx_init_channels(struct efx_nic *efx)
 546{
 547        unsigned int i;
 548
 549        for (i = 0; i < EFX_MAX_CHANNELS; i++) {
 550                efx->channel[i] = efx_alloc_channel(efx, i);
 551                if (!efx->channel[i])
 552                        return -ENOMEM;
 553                efx->msi_context[i].efx = efx;
 554                efx->msi_context[i].index = i;
 555        }
 556
 557        /* Higher numbered interrupt modes are less capable! */
 558        efx->interrupt_mode = min(efx->type->min_interrupt_mode,
 559                                  efx_interrupt_mode);
 560
 561        efx->max_channels = EFX_MAX_CHANNELS;
 562        efx->max_tx_channels = EFX_MAX_CHANNELS;
 563
 564        return 0;
 565}
 566
 567void efx_fini_channels(struct efx_nic *efx)
 568{
 569        unsigned int i;
 570
 571        for (i = 0; i < EFX_MAX_CHANNELS; i++)
 572                if (efx->channel[i]) {
 573                        kfree(efx->channel[i]);
 574                        efx->channel[i] = NULL;
 575                }
 576}
 577
 578/* Allocate and initialise a channel structure, copying parameters
 579 * (but not resources) from an old channel structure.
 580 */
 581struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
 582{
 583        struct efx_rx_queue *rx_queue;
 584        struct efx_tx_queue *tx_queue;
 585        struct efx_channel *channel;
 586        int j;
 587
 588        channel = kmalloc(sizeof(*channel), GFP_KERNEL);
 589        if (!channel)
 590                return NULL;
 591
 592        *channel = *old_channel;
 593
 594        channel->napi_dev = NULL;
 595        INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
 596        channel->napi_str.napi_id = 0;
 597        channel->napi_str.state = 0;
 598        memset(&channel->eventq, 0, sizeof(channel->eventq));
 599
 600        for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
 601                tx_queue = &channel->tx_queue[j];
 602                if (tx_queue->channel)
 603                        tx_queue->channel = channel;
 604                tx_queue->buffer = NULL;
 605                tx_queue->cb_page = NULL;
 606                memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
 607        }
 608
 609        rx_queue = &channel->rx_queue;
 610        rx_queue->buffer = NULL;
 611        memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
 612        timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
 613#ifdef CONFIG_RFS_ACCEL
 614        INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
 615#endif
 616
 617        return channel;
 618}
 619
 620static int efx_probe_channel(struct efx_channel *channel)
 621{
 622        struct efx_tx_queue *tx_queue;
 623        struct efx_rx_queue *rx_queue;
 624        int rc;
 625
 626        netif_dbg(channel->efx, probe, channel->efx->net_dev,
 627                  "creating channel %d\n", channel->channel);
 628
 629        rc = channel->type->pre_probe(channel);
 630        if (rc)
 631                goto fail;
 632
 633        rc = efx_probe_eventq(channel);
 634        if (rc)
 635                goto fail;
 636
 637        efx_for_each_channel_tx_queue(tx_queue, channel) {
 638                rc = efx_probe_tx_queue(tx_queue);
 639                if (rc)
 640                        goto fail;
 641        }
 642
 643        efx_for_each_channel_rx_queue(rx_queue, channel) {
 644                rc = efx_probe_rx_queue(rx_queue);
 645                if (rc)
 646                        goto fail;
 647        }
 648
 649        channel->rx_list = NULL;
 650
 651        return 0;
 652
 653fail:
 654        efx_remove_channel(channel);
 655        return rc;
 656}
 657
 658void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
 659{
 660        struct efx_nic *efx = channel->efx;
 661        const char *type;
 662        int number;
 663
 664        number = channel->channel;
 665
 666        if (number >= efx->xdp_channel_offset &&
 667            !WARN_ON_ONCE(!efx->n_xdp_channels)) {
 668                type = "-xdp";
 669                number -= efx->xdp_channel_offset;
 670        } else if (efx->tx_channel_offset == 0) {
 671                type = "";
 672        } else if (number < efx->tx_channel_offset) {
 673                type = "-rx";
 674        } else {
 675                type = "-tx";
 676                number -= efx->tx_channel_offset;
 677        }
 678        snprintf(buf, len, "%s%s-%d", efx->name, type, number);
 679}
 680
 681void efx_set_channel_names(struct efx_nic *efx)
 682{
 683        struct efx_channel *channel;
 684
 685        efx_for_each_channel(channel, efx)
 686                channel->type->get_name(channel,
 687                                        efx->msi_context[channel->channel].name,
 688                                        sizeof(efx->msi_context[0].name));
 689}
 690
 691int efx_probe_channels(struct efx_nic *efx)
 692{
 693        struct efx_channel *channel;
 694        int rc;
 695
 696        /* Restart special buffer allocation */
 697        efx->next_buffer_table = 0;
 698
 699        /* Probe channels in reverse, so that any 'extra' channels
 700         * use the start of the buffer table. This allows the traffic
 701         * channels to be resized without moving them or wasting the
 702         * entries before them.
 703         */
 704        efx_for_each_channel_rev(channel, efx) {
 705                rc = efx_probe_channel(channel);
 706                if (rc) {
 707                        netif_err(efx, probe, efx->net_dev,
 708                                  "failed to create channel %d\n",
 709                                  channel->channel);
 710                        goto fail;
 711                }
 712        }
 713        efx_set_channel_names(efx);
 714
 715        return 0;
 716
 717fail:
 718        efx_remove_channels(efx);
 719        return rc;
 720}
 721
 722void efx_remove_channel(struct efx_channel *channel)
 723{
 724        struct efx_tx_queue *tx_queue;
 725        struct efx_rx_queue *rx_queue;
 726
 727        netif_dbg(channel->efx, drv, channel->efx->net_dev,
 728                  "destroy chan %d\n", channel->channel);
 729
 730        efx_for_each_channel_rx_queue(rx_queue, channel)
 731                efx_remove_rx_queue(rx_queue);
 732        efx_for_each_channel_tx_queue(tx_queue, channel)
 733                efx_remove_tx_queue(tx_queue);
 734        efx_remove_eventq(channel);
 735        channel->type->post_remove(channel);
 736}
 737
 738void efx_remove_channels(struct efx_nic *efx)
 739{
 740        struct efx_channel *channel;
 741
 742        efx_for_each_channel(channel, efx)
 743                efx_remove_channel(channel);
 744
 745        kfree(efx->xdp_tx_queues);
 746}
 747
 748int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
 749{
 750        struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
 751        unsigned int i, next_buffer_table = 0;
 752        u32 old_rxq_entries, old_txq_entries;
 753        int rc, rc2;
 754
 755        rc = efx_check_disabled(efx);
 756        if (rc)
 757                return rc;
 758
 759        /* Not all channels should be reallocated. We must avoid
 760         * reallocating their buffer table entries.
 761         */
 762        efx_for_each_channel(channel, efx) {
 763                struct efx_rx_queue *rx_queue;
 764                struct efx_tx_queue *tx_queue;
 765
 766                if (channel->type->copy)
 767                        continue;
 768                next_buffer_table = max(next_buffer_table,
 769                                        channel->eventq.index +
 770                                        channel->eventq.entries);
 771                efx_for_each_channel_rx_queue(rx_queue, channel)
 772                        next_buffer_table = max(next_buffer_table,
 773                                                rx_queue->rxd.index +
 774                                                rx_queue->rxd.entries);
 775                efx_for_each_channel_tx_queue(tx_queue, channel)
 776                        next_buffer_table = max(next_buffer_table,
 777                                                tx_queue->txd.index +
 778                                                tx_queue->txd.entries);
 779        }
 780
 781        efx_device_detach_sync(efx);
 782        efx_stop_all(efx);
 783        efx_soft_disable_interrupts(efx);
 784
 785        /* Clone channels (where possible) */
 786        memset(other_channel, 0, sizeof(other_channel));
 787        for (i = 0; i < efx->n_channels; i++) {
 788                channel = efx->channel[i];
 789                if (channel->type->copy)
 790                        channel = channel->type->copy(channel);
 791                if (!channel) {
 792                        rc = -ENOMEM;
 793                        goto out;
 794                }
 795                other_channel[i] = channel;
 796        }
 797
 798        /* Swap entry counts and channel pointers */
 799        old_rxq_entries = efx->rxq_entries;
 800        old_txq_entries = efx->txq_entries;
 801        efx->rxq_entries = rxq_entries;
 802        efx->txq_entries = txq_entries;
 803        for (i = 0; i < efx->n_channels; i++) {
 804                channel = efx->channel[i];
 805                efx->channel[i] = other_channel[i];
 806                other_channel[i] = channel;
 807        }
 808
 809        /* Restart buffer table allocation */
 810        efx->next_buffer_table = next_buffer_table;
 811
 812        for (i = 0; i < efx->n_channels; i++) {
 813                channel = efx->channel[i];
 814                if (!channel->type->copy)
 815                        continue;
 816                rc = efx_probe_channel(channel);
 817                if (rc)
 818                        goto rollback;
 819                efx_init_napi_channel(efx->channel[i]);
 820        }
 821
 822out:
 823        /* Destroy unused channel structures */
 824        for (i = 0; i < efx->n_channels; i++) {
 825                channel = other_channel[i];
 826                if (channel && channel->type->copy) {
 827                        efx_fini_napi_channel(channel);
 828                        efx_remove_channel(channel);
 829                        kfree(channel);
 830                }
 831        }
 832
 833        rc2 = efx_soft_enable_interrupts(efx);
 834        if (rc2) {
 835                rc = rc ? rc : rc2;
 836                netif_err(efx, drv, efx->net_dev,
 837                          "unable to restart interrupts on channel reallocation\n");
 838                efx_schedule_reset(efx, RESET_TYPE_DISABLE);
 839        } else {
 840                efx_start_all(efx);
 841                efx_device_attach_if_not_resetting(efx);
 842        }
 843        return rc;
 844
 845rollback:
 846        /* Swap back */
 847        efx->rxq_entries = old_rxq_entries;
 848        efx->txq_entries = old_txq_entries;
 849        for (i = 0; i < efx->n_channels; i++) {
 850                channel = efx->channel[i];
 851                efx->channel[i] = other_channel[i];
 852                other_channel[i] = channel;
 853        }
 854        goto out;
 855}
 856
 857int efx_set_channels(struct efx_nic *efx)
 858{
 859        struct efx_tx_queue *tx_queue;
 860        struct efx_channel *channel;
 861        unsigned int next_queue = 0;
 862        int xdp_queue_number;
 863        int rc;
 864
 865        efx->tx_channel_offset =
 866                efx_separate_tx_channels ?
 867                efx->n_channels - efx->n_tx_channels : 0;
 868
 869        if (efx->xdp_tx_queue_count) {
 870                EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
 871
 872                /* Allocate array for XDP TX queue lookup. */
 873                efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
 874                                             sizeof(*efx->xdp_tx_queues),
 875                                             GFP_KERNEL);
 876                if (!efx->xdp_tx_queues)
 877                        return -ENOMEM;
 878        }
 879
 880        /* We need to mark which channels really have RX and TX
 881         * queues, and adjust the TX queue numbers if we have separate
 882         * RX-only and TX-only channels.
 883         */
 884        xdp_queue_number = 0;
 885        efx_for_each_channel(channel, efx) {
 886                if (channel->channel < efx->n_rx_channels)
 887                        channel->rx_queue.core_index = channel->channel;
 888                else
 889                        channel->rx_queue.core_index = -1;
 890
 891                if (channel->channel >= efx->tx_channel_offset) {
 892                        if (efx_channel_is_xdp_tx(channel)) {
 893                                efx_for_each_channel_tx_queue(tx_queue, channel) {
 894                                        tx_queue->queue = next_queue++;
 895
 896                                        /* We may have a few left-over XDP TX
 897                                         * queues owing to xdp_tx_queue_count
 898                                         * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
 899                                         * We still allocate and probe those
 900                                         * TXQs, but never use them.
 901                                         */
 902                                        if (xdp_queue_number < efx->xdp_tx_queue_count) {
 903                                                netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
 904                                                          channel->channel, tx_queue->label,
 905                                                          xdp_queue_number, tx_queue->queue);
 906                                                efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
 907                                                xdp_queue_number++;
 908                                        }
 909                                }
 910                        } else {
 911                                efx_for_each_channel_tx_queue(tx_queue, channel) {
 912                                        tx_queue->queue = next_queue++;
 913                                        netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
 914                                                  channel->channel, tx_queue->label,
 915                                                  tx_queue->queue);
 916                                }
 917                        }
 918                }
 919        }
 920        WARN_ON(xdp_queue_number != efx->xdp_tx_queue_count);
 921
 922        rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 923        if (rc)
 924                return rc;
 925        return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
 926}
 927
 928bool efx_default_channel_want_txqs(struct efx_channel *channel)
 929{
 930        return channel->channel - channel->efx->tx_channel_offset <
 931                channel->efx->n_tx_channels;
 932}
 933
 934/*************
 935 * START/STOP
 936 *************/
 937
 938int efx_soft_enable_interrupts(struct efx_nic *efx)
 939{
 940        struct efx_channel *channel, *end_channel;
 941        int rc;
 942
 943        BUG_ON(efx->state == STATE_DISABLED);
 944
 945        efx->irq_soft_enabled = true;
 946        smp_wmb();
 947
 948        efx_for_each_channel(channel, efx) {
 949                if (!channel->type->keep_eventq) {
 950                        rc = efx_init_eventq(channel);
 951                        if (rc)
 952                                goto fail;
 953                }
 954                efx_start_eventq(channel);
 955        }
 956
 957        efx_mcdi_mode_event(efx);
 958
 959        return 0;
 960fail:
 961        end_channel = channel;
 962        efx_for_each_channel(channel, efx) {
 963                if (channel == end_channel)
 964                        break;
 965                efx_stop_eventq(channel);
 966                if (!channel->type->keep_eventq)
 967                        efx_fini_eventq(channel);
 968        }
 969
 970        return rc;
 971}
 972
 973void efx_soft_disable_interrupts(struct efx_nic *efx)
 974{
 975        struct efx_channel *channel;
 976
 977        if (efx->state == STATE_DISABLED)
 978                return;
 979
 980        efx_mcdi_mode_poll(efx);
 981
 982        efx->irq_soft_enabled = false;
 983        smp_wmb();
 984
 985        if (efx->legacy_irq)
 986                synchronize_irq(efx->legacy_irq);
 987
 988        efx_for_each_channel(channel, efx) {
 989                if (channel->irq)
 990                        synchronize_irq(channel->irq);
 991
 992                efx_stop_eventq(channel);
 993                if (!channel->type->keep_eventq)
 994                        efx_fini_eventq(channel);
 995        }
 996
 997        /* Flush the asynchronous MCDI request queue */
 998        efx_mcdi_flush_async(efx);
 999}
1000
1001int efx_enable_interrupts(struct efx_nic *efx)
1002{
1003        struct efx_channel *channel, *end_channel;
1004        int rc;
1005
1006        /* TODO: Is this really a bug? */
1007        BUG_ON(efx->state == STATE_DISABLED);
1008
1009        if (efx->eeh_disabled_legacy_irq) {
1010                enable_irq(efx->legacy_irq);
1011                efx->eeh_disabled_legacy_irq = false;
1012        }
1013
1014        efx->type->irq_enable_master(efx);
1015
1016        efx_for_each_channel(channel, efx) {
1017                if (channel->type->keep_eventq) {
1018                        rc = efx_init_eventq(channel);
1019                        if (rc)
1020                                goto fail;
1021                }
1022        }
1023
1024        rc = efx_soft_enable_interrupts(efx);
1025        if (rc)
1026                goto fail;
1027
1028        return 0;
1029
1030fail:
1031        end_channel = channel;
1032        efx_for_each_channel(channel, efx) {
1033                if (channel == end_channel)
1034                        break;
1035                if (channel->type->keep_eventq)
1036                        efx_fini_eventq(channel);
1037        }
1038
1039        efx->type->irq_disable_non_ev(efx);
1040
1041        return rc;
1042}
1043
1044void efx_disable_interrupts(struct efx_nic *efx)
1045{
1046        struct efx_channel *channel;
1047
1048        efx_soft_disable_interrupts(efx);
1049
1050        efx_for_each_channel(channel, efx) {
1051                if (channel->type->keep_eventq)
1052                        efx_fini_eventq(channel);
1053        }
1054
1055        efx->type->irq_disable_non_ev(efx);
1056}
1057
1058void efx_start_channels(struct efx_nic *efx)
1059{
1060        struct efx_tx_queue *tx_queue;
1061        struct efx_rx_queue *rx_queue;
1062        struct efx_channel *channel;
1063
1064        efx_for_each_channel(channel, efx) {
1065                efx_for_each_channel_tx_queue(tx_queue, channel) {
1066                        efx_init_tx_queue(tx_queue);
1067                        atomic_inc(&efx->active_queues);
1068                }
1069
1070                efx_for_each_channel_rx_queue(rx_queue, channel) {
1071                        efx_init_rx_queue(rx_queue);
1072                        atomic_inc(&efx->active_queues);
1073                        efx_stop_eventq(channel);
1074                        efx_fast_push_rx_descriptors(rx_queue, false);
1075                        efx_start_eventq(channel);
1076                }
1077
1078                WARN_ON(channel->rx_pkt_n_frags);
1079        }
1080}
1081
1082void efx_stop_channels(struct efx_nic *efx)
1083{
1084        struct efx_tx_queue *tx_queue;
1085        struct efx_rx_queue *rx_queue;
1086        struct efx_channel *channel;
1087        int rc = 0;
1088
1089        /* Stop RX refill */
1090        efx_for_each_channel(channel, efx) {
1091                efx_for_each_channel_rx_queue(rx_queue, channel)
1092                        rx_queue->refill_enabled = false;
1093        }
1094
1095        efx_for_each_channel(channel, efx) {
1096                /* RX packet processing is pipelined, so wait for the
1097                 * NAPI handler to complete.  At least event queue 0
1098                 * might be kept active by non-data events, so don't
1099                 * use napi_synchronize() but actually disable NAPI
1100                 * temporarily.
1101                 */
1102                if (efx_channel_has_rx_queue(channel)) {
1103                        efx_stop_eventq(channel);
1104                        efx_start_eventq(channel);
1105                }
1106        }
1107
1108        if (efx->type->fini_dmaq)
1109                rc = efx->type->fini_dmaq(efx);
1110
1111        if (rc) {
1112                netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
1113        } else {
1114                netif_dbg(efx, drv, efx->net_dev,
1115                          "successfully flushed all queues\n");
1116        }
1117
1118        efx_for_each_channel(channel, efx) {
1119                efx_for_each_channel_rx_queue(rx_queue, channel)
1120                        efx_fini_rx_queue(rx_queue);
1121                efx_for_each_channel_tx_queue(tx_queue, channel)
1122                        efx_fini_tx_queue(tx_queue);
1123        }
1124}
1125
1126/**************************************************************************
1127 *
1128 * NAPI interface
1129 *
1130 *************************************************************************/
1131
1132/* Process channel's event queue
1133 *
1134 * This function is responsible for processing the event queue of a
1135 * single channel.  The caller must guarantee that this function will
1136 * never be concurrently called more than once on the same channel,
1137 * though different channels may be being processed concurrently.
1138 */
1139static int efx_process_channel(struct efx_channel *channel, int budget)
1140{
1141        struct efx_tx_queue *tx_queue;
1142        struct list_head rx_list;
1143        int spent;
1144
1145        if (unlikely(!channel->enabled))
1146                return 0;
1147
1148        /* Prepare the batch receive list */
1149        EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
1150        INIT_LIST_HEAD(&rx_list);
1151        channel->rx_list = &rx_list;
1152
1153        efx_for_each_channel_tx_queue(tx_queue, channel) {
1154                tx_queue->pkts_compl = 0;
1155                tx_queue->bytes_compl = 0;
1156        }
1157
1158        spent = efx_nic_process_eventq(channel, budget);
1159        if (spent && efx_channel_has_rx_queue(channel)) {
1160                struct efx_rx_queue *rx_queue =
1161                        efx_channel_get_rx_queue(channel);
1162
1163                efx_rx_flush_packet(channel);
1164                efx_fast_push_rx_descriptors(rx_queue, true);
1165        }
1166
1167        /* Update BQL */
1168        efx_for_each_channel_tx_queue(tx_queue, channel) {
1169                if (tx_queue->bytes_compl) {
1170                        netdev_tx_completed_queue(tx_queue->core_txq,
1171                                                  tx_queue->pkts_compl,
1172                                                  tx_queue->bytes_compl);
1173                }
1174        }
1175
1176        /* Receive any packets we queued up */
1177        netif_receive_skb_list(channel->rx_list);
1178        channel->rx_list = NULL;
1179
1180        return spent;
1181}
1182
1183static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
1184{
1185        int step = efx->irq_mod_step_us;
1186
1187        if (channel->irq_mod_score < irq_adapt_low_thresh) {
1188                if (channel->irq_moderation_us > step) {
1189                        channel->irq_moderation_us -= step;
1190                        efx->type->push_irq_moderation(channel);
1191                }
1192        } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
1193                if (channel->irq_moderation_us <
1194                    efx->irq_rx_moderation_us) {
1195                        channel->irq_moderation_us += step;
1196                        efx->type->push_irq_moderation(channel);
1197                }
1198        }
1199
1200        channel->irq_count = 0;
1201        channel->irq_mod_score = 0;
1202}
1203
1204/* NAPI poll handler
1205 *
1206 * NAPI guarantees serialisation of polls of the same device, which
1207 * provides the guarantee required by efx_process_channel().
1208 */
1209static int efx_poll(struct napi_struct *napi, int budget)
1210{
1211        struct efx_channel *channel =
1212                container_of(napi, struct efx_channel, napi_str);
1213        struct efx_nic *efx = channel->efx;
1214#ifdef CONFIG_RFS_ACCEL
1215        unsigned int time;
1216#endif
1217        int spent;
1218
1219        netif_vdbg(efx, intr, efx->net_dev,
1220                   "channel %d NAPI poll executing on CPU %d\n",
1221                   channel->channel, raw_smp_processor_id());
1222
1223        spent = efx_process_channel(channel, budget);
1224
1225        xdp_do_flush_map();
1226
1227        if (spent < budget) {
1228                if (efx_channel_has_rx_queue(channel) &&
1229                    efx->irq_rx_adaptive &&
1230                    unlikely(++channel->irq_count == 1000)) {
1231                        efx_update_irq_mod(efx, channel);
1232                }
1233
1234#ifdef CONFIG_RFS_ACCEL
1235                /* Perhaps expire some ARFS filters */
1236                time = jiffies - channel->rfs_last_expiry;
1237                /* Would our quota be >= 20? */
1238                if (channel->rfs_filter_count * time >= 600 * HZ)
1239                        mod_delayed_work(system_wq, &channel->filter_work, 0);
1240#endif
1241
1242                /* There is no race here; although napi_disable() will
1243                 * only wait for napi_complete(), this isn't a problem
1244                 * since efx_nic_eventq_read_ack() will have no effect if
1245                 * interrupts have already been disabled.
1246                 */
1247                if (napi_complete_done(napi, spent))
1248                        efx_nic_eventq_read_ack(channel);
1249        }
1250
1251        return spent;
1252}
1253
1254void efx_init_napi_channel(struct efx_channel *channel)
1255{
1256        struct efx_nic *efx = channel->efx;
1257
1258        channel->napi_dev = efx->net_dev;
1259        netif_napi_add(channel->napi_dev, &channel->napi_str,
1260                       efx_poll, napi_weight);
1261}
1262
1263void efx_init_napi(struct efx_nic *efx)
1264{
1265        struct efx_channel *channel;
1266
1267        efx_for_each_channel(channel, efx)
1268                efx_init_napi_channel(channel);
1269}
1270
1271void efx_fini_napi_channel(struct efx_channel *channel)
1272{
1273        if (channel->napi_dev)
1274                netif_napi_del(&channel->napi_str);
1275
1276        channel->napi_dev = NULL;
1277}
1278
1279void efx_fini_napi(struct efx_nic *efx)
1280{
1281        struct efx_channel *channel;
1282
1283        efx_for_each_channel(channel, efx)
1284                efx_fini_napi_channel(channel);
1285}
1286