linux/drivers/net/ethernet/intel/ice/ice_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2018, Intel Corporation. */
   3
   4/* Intel(R) Ethernet Connection E800 Series Linux Driver */
   5
   6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   7
   8#include <generated/utsrelease.h>
   9#include "ice.h"
  10#include "ice_base.h"
  11#include "ice_lib.h"
  12#include "ice_fltr.h"
  13#include "ice_dcb_lib.h"
  14#include "ice_dcb_nl.h"
  15#include "ice_devlink.h"
  16
  17#define DRV_SUMMARY     "Intel(R) Ethernet Connection E800 Series Linux Driver"
  18static const char ice_driver_string[] = DRV_SUMMARY;
  19static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
  20
  21/* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
  22#define ICE_DDP_PKG_PATH        "intel/ice/ddp/"
  23#define ICE_DDP_PKG_FILE        ICE_DDP_PKG_PATH "ice.pkg"
  24
  25MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
  26MODULE_DESCRIPTION(DRV_SUMMARY);
  27MODULE_LICENSE("GPL v2");
  28MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
  29
  30static int debug = -1;
  31module_param(debug, int, 0644);
  32#ifndef CONFIG_DYNAMIC_DEBUG
  33MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
  34#else
  35MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
  36#endif /* !CONFIG_DYNAMIC_DEBUG */
  37
  38static struct workqueue_struct *ice_wq;
  39static const struct net_device_ops ice_netdev_safe_mode_ops;
  40static const struct net_device_ops ice_netdev_ops;
  41static int ice_vsi_open(struct ice_vsi *vsi);
  42
  43static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
  44
  45static void ice_vsi_release_all(struct ice_pf *pf);
  46
  47bool netif_is_ice(struct net_device *dev)
  48{
  49        return dev && (dev->netdev_ops == &ice_netdev_ops);
  50}
  51
  52/**
  53 * ice_get_tx_pending - returns number of Tx descriptors not processed
  54 * @ring: the ring of descriptors
  55 */
  56static u16 ice_get_tx_pending(struct ice_ring *ring)
  57{
  58        u16 head, tail;
  59
  60        head = ring->next_to_clean;
  61        tail = ring->next_to_use;
  62
  63        if (head != tail)
  64                return (head < tail) ?
  65                        tail - head : (tail + ring->count - head);
  66        return 0;
  67}
  68
  69/**
  70 * ice_check_for_hang_subtask - check for and recover hung queues
  71 * @pf: pointer to PF struct
  72 */
  73static void ice_check_for_hang_subtask(struct ice_pf *pf)
  74{
  75        struct ice_vsi *vsi = NULL;
  76        struct ice_hw *hw;
  77        unsigned int i;
  78        int packets;
  79        u32 v;
  80
  81        ice_for_each_vsi(pf, v)
  82                if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
  83                        vsi = pf->vsi[v];
  84                        break;
  85                }
  86
  87        if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
  88                return;
  89
  90        if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
  91                return;
  92
  93        hw = &vsi->back->hw;
  94
  95        for (i = 0; i < vsi->num_txq; i++) {
  96                struct ice_ring *tx_ring = vsi->tx_rings[i];
  97
  98                if (tx_ring && tx_ring->desc) {
  99                        /* If packet counter has not changed the queue is
 100                         * likely stalled, so force an interrupt for this
 101                         * queue.
 102                         *
 103                         * prev_pkt would be negative if there was no
 104                         * pending work.
 105                         */
 106                        packets = tx_ring->stats.pkts & INT_MAX;
 107                        if (tx_ring->tx_stats.prev_pkt == packets) {
 108                                /* Trigger sw interrupt to revive the queue */
 109                                ice_trigger_sw_intr(hw, tx_ring->q_vector);
 110                                continue;
 111                        }
 112
 113                        /* Memory barrier between read of packet count and call
 114                         * to ice_get_tx_pending()
 115                         */
 116                        smp_rmb();
 117                        tx_ring->tx_stats.prev_pkt =
 118                            ice_get_tx_pending(tx_ring) ? packets : -1;
 119                }
 120        }
 121}
 122
 123/**
 124 * ice_init_mac_fltr - Set initial MAC filters
 125 * @pf: board private structure
 126 *
 127 * Set initial set of MAC filters for PF VSI; configure filters for permanent
 128 * address and broadcast address. If an error is encountered, netdevice will be
 129 * unregistered.
 130 */
 131static int ice_init_mac_fltr(struct ice_pf *pf)
 132{
 133        enum ice_status status;
 134        struct ice_vsi *vsi;
 135        u8 *perm_addr;
 136
 137        vsi = ice_get_main_vsi(pf);
 138        if (!vsi)
 139                return -EINVAL;
 140
 141        perm_addr = vsi->port_info->mac.perm_addr;
 142        status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
 143        if (status)
 144                return -EIO;
 145
 146        return 0;
 147}
 148
 149/**
 150 * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
 151 * @netdev: the net device on which the sync is happening
 152 * @addr: MAC address to sync
 153 *
 154 * This is a callback function which is called by the in kernel device sync
 155 * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
 156 * populates the tmp_sync_list, which is later used by ice_add_mac to add the
 157 * MAC filters from the hardware.
 158 */
 159static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
 160{
 161        struct ice_netdev_priv *np = netdev_priv(netdev);
 162        struct ice_vsi *vsi = np->vsi;
 163
 164        if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
 165                                     ICE_FWD_TO_VSI))
 166                return -EINVAL;
 167
 168        return 0;
 169}
 170
 171/**
 172 * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
 173 * @netdev: the net device on which the unsync is happening
 174 * @addr: MAC address to unsync
 175 *
 176 * This is a callback function which is called by the in kernel device unsync
 177 * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
 178 * populates the tmp_unsync_list, which is later used by ice_remove_mac to
 179 * delete the MAC filters from the hardware.
 180 */
 181static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
 182{
 183        struct ice_netdev_priv *np = netdev_priv(netdev);
 184        struct ice_vsi *vsi = np->vsi;
 185
 186        /* Under some circumstances, we might receive a request to delete our
 187         * own device address from our uc list. Because we store the device
 188         * address in the VSI's MAC filter list, we need to ignore such
 189         * requests and not delete our device address from this list.
 190         */
 191        if (ether_addr_equal(addr, netdev->dev_addr))
 192                return 0;
 193
 194        if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
 195                                     ICE_FWD_TO_VSI))
 196                return -EINVAL;
 197
 198        return 0;
 199}
 200
 201/**
 202 * ice_vsi_fltr_changed - check if filter state changed
 203 * @vsi: VSI to be checked
 204 *
 205 * returns true if filter state has changed, false otherwise.
 206 */
 207static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
 208{
 209        return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
 210               test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) ||
 211               test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
 212}
 213
 214/**
 215 * ice_cfg_promisc - Enable or disable promiscuous mode for a given PF
 216 * @vsi: the VSI being configured
 217 * @promisc_m: mask of promiscuous config bits
 218 * @set_promisc: enable or disable promisc flag request
 219 *
 220 */
 221static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc)
 222{
 223        struct ice_hw *hw = &vsi->back->hw;
 224        enum ice_status status = 0;
 225
 226        if (vsi->type != ICE_VSI_PF)
 227                return 0;
 228
 229        if (vsi->num_vlan > 1) {
 230                status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
 231                                                  set_promisc);
 232        } else {
 233                if (set_promisc)
 234                        status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m,
 235                                                     0);
 236                else
 237                        status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m,
 238                                                       0);
 239        }
 240
 241        if (status)
 242                return -EIO;
 243
 244        return 0;
 245}
 246
 247/**
 248 * ice_vsi_sync_fltr - Update the VSI filter list to the HW
 249 * @vsi: ptr to the VSI
 250 *
 251 * Push any outstanding VSI filter changes through the AdminQ.
 252 */
 253static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 254{
 255        struct device *dev = ice_pf_to_dev(vsi->back);
 256        struct net_device *netdev = vsi->netdev;
 257        bool promisc_forced_on = false;
 258        struct ice_pf *pf = vsi->back;
 259        struct ice_hw *hw = &pf->hw;
 260        enum ice_status status = 0;
 261        u32 changed_flags = 0;
 262        u8 promisc_m;
 263        int err = 0;
 264
 265        if (!vsi->netdev)
 266                return -EINVAL;
 267
 268        while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
 269                usleep_range(1000, 2000);
 270
 271        changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
 272        vsi->current_netdev_flags = vsi->netdev->flags;
 273
 274        INIT_LIST_HEAD(&vsi->tmp_sync_list);
 275        INIT_LIST_HEAD(&vsi->tmp_unsync_list);
 276
 277        if (ice_vsi_fltr_changed(vsi)) {
 278                clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
 279                clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
 280                clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
 281
 282                /* grab the netdev's addr_list_lock */
 283                netif_addr_lock_bh(netdev);
 284                __dev_uc_sync(netdev, ice_add_mac_to_sync_list,
 285                              ice_add_mac_to_unsync_list);
 286                __dev_mc_sync(netdev, ice_add_mac_to_sync_list,
 287                              ice_add_mac_to_unsync_list);
 288                /* our temp lists are populated. release lock */
 289                netif_addr_unlock_bh(netdev);
 290        }
 291
 292        /* Remove MAC addresses in the unsync list */
 293        status = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
 294        ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
 295        if (status) {
 296                netdev_err(netdev, "Failed to delete MAC filters\n");
 297                /* if we failed because of alloc failures, just bail */
 298                if (status == ICE_ERR_NO_MEMORY) {
 299                        err = -ENOMEM;
 300                        goto out;
 301                }
 302        }
 303
 304        /* Add MAC addresses in the sync list */
 305        status = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
 306        ice_fltr_free_list(dev, &vsi->tmp_sync_list);
 307        /* If filter is added successfully or already exists, do not go into
 308         * 'if' condition and report it as error. Instead continue processing
 309         * rest of the function.
 310         */
 311        if (status && status != ICE_ERR_ALREADY_EXISTS) {
 312                netdev_err(netdev, "Failed to add MAC filters\n");
 313                /* If there is no more space for new umac filters, VSI
 314                 * should go into promiscuous mode. There should be some
 315                 * space reserved for promiscuous filters.
 316                 */
 317                if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
 318                    !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC,
 319                                      vsi->state)) {
 320                        promisc_forced_on = true;
 321                        netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
 322                                    vsi->vsi_num);
 323                } else {
 324                        err = -EIO;
 325                        goto out;
 326                }
 327        }
 328        /* check for changes in promiscuous modes */
 329        if (changed_flags & IFF_ALLMULTI) {
 330                if (vsi->current_netdev_flags & IFF_ALLMULTI) {
 331                        if (vsi->num_vlan > 1)
 332                                promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
 333                        else
 334                                promisc_m = ICE_MCAST_PROMISC_BITS;
 335
 336                        err = ice_cfg_promisc(vsi, promisc_m, true);
 337                        if (err) {
 338                                netdev_err(netdev, "Error setting Multicast promiscuous mode on VSI %i\n",
 339                                           vsi->vsi_num);
 340                                vsi->current_netdev_flags &= ~IFF_ALLMULTI;
 341                                goto out_promisc;
 342                        }
 343                } else {
 344                        /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
 345                        if (vsi->num_vlan > 1)
 346                                promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
 347                        else
 348                                promisc_m = ICE_MCAST_PROMISC_BITS;
 349
 350                        err = ice_cfg_promisc(vsi, promisc_m, false);
 351                        if (err) {
 352                                netdev_err(netdev, "Error clearing Multicast promiscuous mode on VSI %i\n",
 353                                           vsi->vsi_num);
 354                                vsi->current_netdev_flags |= IFF_ALLMULTI;
 355                                goto out_promisc;
 356                        }
 357                }
 358        }
 359
 360        if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
 361            test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
 362                clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
 363                if (vsi->current_netdev_flags & IFF_PROMISC) {
 364                        /* Apply Rx filter rule to get traffic from wire */
 365                        if (!ice_is_dflt_vsi_in_use(pf->first_sw)) {
 366                                err = ice_set_dflt_vsi(pf->first_sw, vsi);
 367                                if (err && err != -EEXIST) {
 368                                        netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
 369                                                   err, vsi->vsi_num);
 370                                        vsi->current_netdev_flags &=
 371                                                ~IFF_PROMISC;
 372                                        goto out_promisc;
 373                                }
 374                                ice_cfg_vlan_pruning(vsi, false, false);
 375                        }
 376                } else {
 377                        /* Clear Rx filter to remove traffic from wire */
 378                        if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) {
 379                                err = ice_clear_dflt_vsi(pf->first_sw);
 380                                if (err) {
 381                                        netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
 382                                                   err, vsi->vsi_num);
 383                                        vsi->current_netdev_flags |=
 384                                                IFF_PROMISC;
 385                                        goto out_promisc;
 386                                }
 387                                if (vsi->num_vlan > 1)
 388                                        ice_cfg_vlan_pruning(vsi, true, false);
 389                        }
 390                }
 391        }
 392        goto exit;
 393
 394out_promisc:
 395        set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
 396        goto exit;
 397out:
 398        /* if something went wrong then set the changed flag so we try again */
 399        set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
 400        set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
 401exit:
 402        clear_bit(ICE_CFG_BUSY, vsi->state);
 403        return err;
 404}
 405
 406/**
 407 * ice_sync_fltr_subtask - Sync the VSI filter list with HW
 408 * @pf: board private structure
 409 */
 410static void ice_sync_fltr_subtask(struct ice_pf *pf)
 411{
 412        int v;
 413
 414        if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
 415                return;
 416
 417        clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
 418
 419        ice_for_each_vsi(pf, v)
 420                if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
 421                    ice_vsi_sync_fltr(pf->vsi[v])) {
 422                        /* come back and try again later */
 423                        set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
 424                        break;
 425                }
 426}
 427
 428/**
 429 * ice_pf_dis_all_vsi - Pause all VSIs on a PF
 430 * @pf: the PF
 431 * @locked: is the rtnl_lock already held
 432 */
 433static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
 434{
 435        int node;
 436        int v;
 437
 438        ice_for_each_vsi(pf, v)
 439                if (pf->vsi[v])
 440                        ice_dis_vsi(pf->vsi[v], locked);
 441
 442        for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
 443                pf->pf_agg_node[node].num_vsis = 0;
 444
 445        for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
 446                pf->vf_agg_node[node].num_vsis = 0;
 447}
 448
 449/**
 450 * ice_prepare_for_reset - prep for the core to reset
 451 * @pf: board private structure
 452 *
 453 * Inform or close all dependent features in prep for reset.
 454 */
 455static void
 456ice_prepare_for_reset(struct ice_pf *pf)
 457{
 458        struct ice_hw *hw = &pf->hw;
 459        unsigned int i;
 460
 461        /* already prepared for reset */
 462        if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
 463                return;
 464
 465        /* Notify VFs of impending reset */
 466        if (ice_check_sq_alive(hw, &hw->mailboxq))
 467                ice_vc_notify_reset(pf);
 468
 469        /* Disable VFs until reset is completed */
 470        ice_for_each_vf(pf, i)
 471                ice_set_vf_state_qs_dis(&pf->vf[i]);
 472
 473        /* clear SW filtering DB */
 474        ice_clear_hw_tbls(hw);
 475        /* disable the VSIs and their queues that are not already DOWN */
 476        ice_pf_dis_all_vsi(pf, false);
 477
 478        if (hw->port_info)
 479                ice_sched_clear_port(hw->port_info);
 480
 481        ice_shutdown_all_ctrlq(hw);
 482
 483        set_bit(ICE_PREPARED_FOR_RESET, pf->state);
 484}
 485
 486/**
 487 * ice_do_reset - Initiate one of many types of resets
 488 * @pf: board private structure
 489 * @reset_type: reset type requested
 490 * before this function was called.
 491 */
 492static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 493{
 494        struct device *dev = ice_pf_to_dev(pf);
 495        struct ice_hw *hw = &pf->hw;
 496
 497        dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
 498
 499        ice_prepare_for_reset(pf);
 500
 501        /* trigger the reset */
 502        if (ice_reset(hw, reset_type)) {
 503                dev_err(dev, "reset %d failed\n", reset_type);
 504                set_bit(ICE_RESET_FAILED, pf->state);
 505                clear_bit(ICE_RESET_OICR_RECV, pf->state);
 506                clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
 507                clear_bit(ICE_PFR_REQ, pf->state);
 508                clear_bit(ICE_CORER_REQ, pf->state);
 509                clear_bit(ICE_GLOBR_REQ, pf->state);
 510                return;
 511        }
 512
 513        /* PFR is a bit of a special case because it doesn't result in an OICR
 514         * interrupt. So for PFR, rebuild after the reset and clear the reset-
 515         * associated state bits.
 516         */
 517        if (reset_type == ICE_RESET_PFR) {
 518                pf->pfr_count++;
 519                ice_rebuild(pf, reset_type);
 520                clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
 521                clear_bit(ICE_PFR_REQ, pf->state);
 522                ice_reset_all_vfs(pf, true);
 523        }
 524}
 525
 526/**
 527 * ice_reset_subtask - Set up for resetting the device and driver
 528 * @pf: board private structure
 529 */
 530static void ice_reset_subtask(struct ice_pf *pf)
 531{
 532        enum ice_reset_req reset_type = ICE_RESET_INVAL;
 533
 534        /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
 535         * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
 536         * of reset is pending and sets bits in pf->state indicating the reset
 537         * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
 538         * prepare for pending reset if not already (for PF software-initiated
 539         * global resets the software should already be prepared for it as
 540         * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
 541         * by firmware or software on other PFs, that bit is not set so prepare
 542         * for the reset now), poll for reset done, rebuild and return.
 543         */
 544        if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
 545                /* Perform the largest reset requested */
 546                if (test_and_clear_bit(ICE_CORER_RECV, pf->state))
 547                        reset_type = ICE_RESET_CORER;
 548                if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state))
 549                        reset_type = ICE_RESET_GLOBR;
 550                if (test_and_clear_bit(ICE_EMPR_RECV, pf->state))
 551                        reset_type = ICE_RESET_EMPR;
 552                /* return if no valid reset type requested */
 553                if (reset_type == ICE_RESET_INVAL)
 554                        return;
 555                ice_prepare_for_reset(pf);
 556
 557                /* make sure we are ready to rebuild */
 558                if (ice_check_reset(&pf->hw)) {
 559                        set_bit(ICE_RESET_FAILED, pf->state);
 560                } else {
 561                        /* done with reset. start rebuild */
 562                        pf->hw.reset_ongoing = false;
 563                        ice_rebuild(pf, reset_type);
 564                        /* clear bit to resume normal operations, but
 565                         * ICE_NEEDS_RESTART bit is set in case rebuild failed
 566                         */
 567                        clear_bit(ICE_RESET_OICR_RECV, pf->state);
 568                        clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
 569                        clear_bit(ICE_PFR_REQ, pf->state);
 570                        clear_bit(ICE_CORER_REQ, pf->state);
 571                        clear_bit(ICE_GLOBR_REQ, pf->state);
 572                        ice_reset_all_vfs(pf, true);
 573                }
 574
 575                return;
 576        }
 577
 578        /* No pending resets to finish processing. Check for new resets */
 579        if (test_bit(ICE_PFR_REQ, pf->state))
 580                reset_type = ICE_RESET_PFR;
 581        if (test_bit(ICE_CORER_REQ, pf->state))
 582                reset_type = ICE_RESET_CORER;
 583        if (test_bit(ICE_GLOBR_REQ, pf->state))
 584                reset_type = ICE_RESET_GLOBR;
 585        /* If no valid reset type requested just return */
 586        if (reset_type == ICE_RESET_INVAL)
 587                return;
 588
 589        /* reset if not already down or busy */
 590        if (!test_bit(ICE_DOWN, pf->state) &&
 591            !test_bit(ICE_CFG_BUSY, pf->state)) {
 592                ice_do_reset(pf, reset_type);
 593        }
 594}
 595
 596/**
 597 * ice_print_topo_conflict - print topology conflict message
 598 * @vsi: the VSI whose topology status is being checked
 599 */
 600static void ice_print_topo_conflict(struct ice_vsi *vsi)
 601{
 602        switch (vsi->port_info->phy.link_info.topo_media_conflict) {
 603        case ICE_AQ_LINK_TOPO_CONFLICT:
 604        case ICE_AQ_LINK_MEDIA_CONFLICT:
 605        case ICE_AQ_LINK_TOPO_UNREACH_PRT:
 606        case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
 607        case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
 608                netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
 609                break;
 610        case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
 611                netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
 612                break;
 613        default:
 614                break;
 615        }
 616}
 617
 618/**
 619 * ice_print_link_msg - print link up or down message
 620 * @vsi: the VSI whose link status is being queried
 621 * @isup: boolean for if the link is now up or down
 622 */
 623void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 624{
 625        struct ice_aqc_get_phy_caps_data *caps;
 626        const char *an_advertised;
 627        enum ice_status status;
 628        const char *fec_req;
 629        const char *speed;
 630        const char *fec;
 631        const char *fc;
 632        const char *an;
 633
 634        if (!vsi)
 635                return;
 636
 637        if (vsi->current_isup == isup)
 638                return;
 639
 640        vsi->current_isup = isup;
 641
 642        if (!isup) {
 643                netdev_info(vsi->netdev, "NIC Link is Down\n");
 644                return;
 645        }
 646
 647        switch (vsi->port_info->phy.link_info.link_speed) {
 648        case ICE_AQ_LINK_SPEED_100GB:
 649                speed = "100 G";
 650                break;
 651        case ICE_AQ_LINK_SPEED_50GB:
 652                speed = "50 G";
 653                break;
 654        case ICE_AQ_LINK_SPEED_40GB:
 655                speed = "40 G";
 656                break;
 657        case ICE_AQ_LINK_SPEED_25GB:
 658                speed = "25 G";
 659                break;
 660        case ICE_AQ_LINK_SPEED_20GB:
 661                speed = "20 G";
 662                break;
 663        case ICE_AQ_LINK_SPEED_10GB:
 664                speed = "10 G";
 665                break;
 666        case ICE_AQ_LINK_SPEED_5GB:
 667                speed = "5 G";
 668                break;
 669        case ICE_AQ_LINK_SPEED_2500MB:
 670                speed = "2.5 G";
 671                break;
 672        case ICE_AQ_LINK_SPEED_1000MB:
 673                speed = "1 G";
 674                break;
 675        case ICE_AQ_LINK_SPEED_100MB:
 676                speed = "100 M";
 677                break;
 678        default:
 679                speed = "Unknown ";
 680                break;
 681        }
 682
 683        switch (vsi->port_info->fc.current_mode) {
 684        case ICE_FC_FULL:
 685                fc = "Rx/Tx";
 686                break;
 687        case ICE_FC_TX_PAUSE:
 688                fc = "Tx";
 689                break;
 690        case ICE_FC_RX_PAUSE:
 691                fc = "Rx";
 692                break;
 693        case ICE_FC_NONE:
 694                fc = "None";
 695                break;
 696        default:
 697                fc = "Unknown";
 698                break;
 699        }
 700
 701        /* Get FEC mode based on negotiated link info */
 702        switch (vsi->port_info->phy.link_info.fec_info) {
 703        case ICE_AQ_LINK_25G_RS_528_FEC_EN:
 704        case ICE_AQ_LINK_25G_RS_544_FEC_EN:
 705                fec = "RS-FEC";
 706                break;
 707        case ICE_AQ_LINK_25G_KR_FEC_EN:
 708                fec = "FC-FEC/BASE-R";
 709                break;
 710        default:
 711                fec = "NONE";
 712                break;
 713        }
 714
 715        /* check if autoneg completed, might be false due to not supported */
 716        if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
 717                an = "True";
 718        else
 719                an = "False";
 720
 721        /* Get FEC mode requested based on PHY caps last SW configuration */
 722        caps = kzalloc(sizeof(*caps), GFP_KERNEL);
 723        if (!caps) {
 724                fec_req = "Unknown";
 725                an_advertised = "Unknown";
 726                goto done;
 727        }
 728
 729        status = ice_aq_get_phy_caps(vsi->port_info, false,
 730                                     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
 731        if (status)
 732                netdev_info(vsi->netdev, "Get phy capability failed.\n");
 733
 734        an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
 735
 736        if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
 737            caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
 738                fec_req = "RS-FEC";
 739        else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
 740                 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
 741                fec_req = "FC-FEC/BASE-R";
 742        else
 743                fec_req = "NONE";
 744
 745        kfree(caps);
 746
 747done:
 748        netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
 749                    speed, fec_req, fec, an_advertised, an, fc);
 750        ice_print_topo_conflict(vsi);
 751}
 752
 753/**
 754 * ice_vsi_link_event - update the VSI's netdev
 755 * @vsi: the VSI on which the link event occurred
 756 * @link_up: whether or not the VSI needs to be set up or down
 757 */
 758static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
 759{
 760        if (!vsi)
 761                return;
 762
 763        if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
 764                return;
 765
 766        if (vsi->type == ICE_VSI_PF) {
 767                if (link_up == netif_carrier_ok(vsi->netdev))
 768                        return;
 769
 770                if (link_up) {
 771                        netif_carrier_on(vsi->netdev);
 772                        netif_tx_wake_all_queues(vsi->netdev);
 773                } else {
 774                        netif_carrier_off(vsi->netdev);
 775                        netif_tx_stop_all_queues(vsi->netdev);
 776                }
 777        }
 778}
 779
 780/**
 781 * ice_set_dflt_mib - send a default config MIB to the FW
 782 * @pf: private PF struct
 783 *
 784 * This function sends a default configuration MIB to the FW.
 785 *
 786 * If this function errors out at any point, the driver is still able to
 787 * function.  The main impact is that LFC may not operate as expected.
 788 * Therefore an error state in this function should be treated with a DBG
 789 * message and continue on with driver rebuild/reenable.
 790 */
 791static void ice_set_dflt_mib(struct ice_pf *pf)
 792{
 793        struct device *dev = ice_pf_to_dev(pf);
 794        u8 mib_type, *buf, *lldpmib = NULL;
 795        u16 len, typelen, offset = 0;
 796        struct ice_lldp_org_tlv *tlv;
 797        struct ice_hw *hw = &pf->hw;
 798        u32 ouisubtype;
 799
 800        mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
 801        lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
 802        if (!lldpmib) {
 803                dev_dbg(dev, "%s Failed to allocate MIB memory\n",
 804                        __func__);
 805                return;
 806        }
 807
 808        /* Add ETS CFG TLV */
 809        tlv = (struct ice_lldp_org_tlv *)lldpmib;
 810        typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
 811                   ICE_IEEE_ETS_TLV_LEN);
 812        tlv->typelen = htons(typelen);
 813        ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
 814                      ICE_IEEE_SUBTYPE_ETS_CFG);
 815        tlv->ouisubtype = htonl(ouisubtype);
 816
 817        buf = tlv->tlvinfo;
 818        buf[0] = 0;
 819
 820        /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
 821         * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
 822         * Octets 13 - 20 are TSA values - leave as zeros
 823         */
 824        buf[5] = 0x64;
 825        len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
 826        offset += len + 2;
 827        tlv = (struct ice_lldp_org_tlv *)
 828                ((char *)tlv + sizeof(tlv->typelen) + len);
 829
 830        /* Add ETS REC TLV */
 831        buf = tlv->tlvinfo;
 832        tlv->typelen = htons(typelen);
 833
 834        ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
 835                      ICE_IEEE_SUBTYPE_ETS_REC);
 836        tlv->ouisubtype = htonl(ouisubtype);
 837
 838        /* First octet of buf is reserved
 839         * Octets 1 - 4 map UP to TC - all UPs map to zero
 840         * Octets 5 - 12 are BW values - set TC 0 to 100%.
 841         * Octets 13 - 20 are TSA value - leave as zeros
 842         */
 843        buf[5] = 0x64;
 844        offset += len + 2;
 845        tlv = (struct ice_lldp_org_tlv *)
 846                ((char *)tlv + sizeof(tlv->typelen) + len);
 847
 848        /* Add PFC CFG TLV */
 849        typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
 850                   ICE_IEEE_PFC_TLV_LEN);
 851        tlv->typelen = htons(typelen);
 852
 853        ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
 854                      ICE_IEEE_SUBTYPE_PFC_CFG);
 855        tlv->ouisubtype = htonl(ouisubtype);
 856
 857        /* Octet 1 left as all zeros - PFC disabled */
 858        buf[0] = 0x08;
 859        len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
 860        offset += len + 2;
 861
 862        if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
 863                dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
 864
 865        kfree(lldpmib);
 866}
 867
 868/**
 869 * ice_link_event - process the link event
 870 * @pf: PF that the link event is associated with
 871 * @pi: port_info for the port that the link event is associated with
 872 * @link_up: true if the physical link is up and false if it is down
 873 * @link_speed: current link speed received from the link event
 874 *
 875 * Returns 0 on success and negative on failure
 876 */
 877static int
 878ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 879               u16 link_speed)
 880{
 881        struct device *dev = ice_pf_to_dev(pf);
 882        struct ice_phy_info *phy_info;
 883        enum ice_status status;
 884        struct ice_vsi *vsi;
 885        u16 old_link_speed;
 886        bool old_link;
 887
 888        phy_info = &pi->phy;
 889        phy_info->link_info_old = phy_info->link_info;
 890
 891        old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
 892        old_link_speed = phy_info->link_info_old.link_speed;
 893
 894        /* update the link info structures and re-enable link events,
 895         * don't bail on failure due to other book keeping needed
 896         */
 897        status = ice_update_link_info(pi);
 898        if (status)
 899                dev_dbg(dev, "Failed to update link status on port %d, err %s aq_err %s\n",
 900                        pi->lport, ice_stat_str(status),
 901                        ice_aq_str(pi->hw->adminq.sq_last_status));
 902
 903        /* Check if the link state is up after updating link info, and treat
 904         * this event as an UP event since the link is actually UP now.
 905         */
 906        if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
 907                link_up = true;
 908
 909        vsi = ice_get_main_vsi(pf);
 910        if (!vsi || !vsi->port_info)
 911                return -EINVAL;
 912
 913        /* turn off PHY if media was removed */
 914        if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
 915            !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
 916                set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
 917                ice_set_link(vsi, false);
 918        }
 919
 920        /* if the old link up/down and speed is the same as the new */
 921        if (link_up == old_link && link_speed == old_link_speed)
 922                return 0;
 923
 924        if (ice_is_dcb_active(pf)) {
 925                if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
 926                        ice_dcb_rebuild(pf);
 927        } else {
 928                if (link_up)
 929                        ice_set_dflt_mib(pf);
 930        }
 931        ice_vsi_link_event(vsi, link_up);
 932        ice_print_link_msg(vsi, link_up);
 933
 934        ice_vc_notify_link_state(pf);
 935
 936        return 0;
 937}
 938
 939/**
 940 * ice_watchdog_subtask - periodic tasks not using event driven scheduling
 941 * @pf: board private structure
 942 */
 943static void ice_watchdog_subtask(struct ice_pf *pf)
 944{
 945        int i;
 946
 947        /* if interface is down do nothing */
 948        if (test_bit(ICE_DOWN, pf->state) ||
 949            test_bit(ICE_CFG_BUSY, pf->state))
 950                return;
 951
 952        /* make sure we don't do these things too often */
 953        if (time_before(jiffies,
 954                        pf->serv_tmr_prev + pf->serv_tmr_period))
 955                return;
 956
 957        pf->serv_tmr_prev = jiffies;
 958
 959        /* Update the stats for active netdevs so the network stack
 960         * can look at updated numbers whenever it cares to
 961         */
 962        ice_update_pf_stats(pf);
 963        ice_for_each_vsi(pf, i)
 964                if (pf->vsi[i] && pf->vsi[i]->netdev)
 965                        ice_update_vsi_stats(pf->vsi[i]);
 966}
 967
 968/**
 969 * ice_init_link_events - enable/initialize link events
 970 * @pi: pointer to the port_info instance
 971 *
 972 * Returns -EIO on failure, 0 on success
 973 */
 974static int ice_init_link_events(struct ice_port_info *pi)
 975{
 976        u16 mask;
 977
 978        mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
 979                       ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
 980
 981        if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
 982                dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
 983                        pi->lport);
 984                return -EIO;
 985        }
 986
 987        if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
 988                dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
 989                        pi->lport);
 990                return -EIO;
 991        }
 992
 993        return 0;
 994}
 995
 996/**
 997 * ice_handle_link_event - handle link event via ARQ
 998 * @pf: PF that the link event is associated with
 999 * @event: event structure containing link status info
1000 */
1001static int
1002ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
1003{
1004        struct ice_aqc_get_link_status_data *link_data;
1005        struct ice_port_info *port_info;
1006        int status;
1007
1008        link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
1009        port_info = pf->hw.port_info;
1010        if (!port_info)
1011                return -EINVAL;
1012
1013        status = ice_link_event(pf, port_info,
1014                                !!(link_data->link_info & ICE_AQ_LINK_UP),
1015                                le16_to_cpu(link_data->link_speed));
1016        if (status)
1017                dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
1018                        status);
1019
1020        return status;
1021}
1022
1023enum ice_aq_task_state {
1024        ICE_AQ_TASK_WAITING = 0,
1025        ICE_AQ_TASK_COMPLETE,
1026        ICE_AQ_TASK_CANCELED,
1027};
1028
1029struct ice_aq_task {
1030        struct hlist_node entry;
1031
1032        u16 opcode;
1033        struct ice_rq_event_info *event;
1034        enum ice_aq_task_state state;
1035};
1036
1037/**
1038 * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
1039 * @pf: pointer to the PF private structure
1040 * @opcode: the opcode to wait for
1041 * @timeout: how long to wait, in jiffies
1042 * @event: storage for the event info
1043 *
1044 * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
1045 * current thread will be put to sleep until the specified event occurs or
1046 * until the given timeout is reached.
1047 *
1048 * To obtain only the descriptor contents, pass an event without an allocated
1049 * msg_buf. If the complete data buffer is desired, allocate the
1050 * event->msg_buf with enough space ahead of time.
1051 *
1052 * Returns: zero on success, or a negative error code on failure.
1053 */
1054int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
1055                          struct ice_rq_event_info *event)
1056{
1057        struct device *dev = ice_pf_to_dev(pf);
1058        struct ice_aq_task *task;
1059        unsigned long start;
1060        long ret;
1061        int err;
1062
1063        task = kzalloc(sizeof(*task), GFP_KERNEL);
1064        if (!task)
1065                return -ENOMEM;
1066
1067        INIT_HLIST_NODE(&task->entry);
1068        task->opcode = opcode;
1069        task->event = event;
1070        task->state = ICE_AQ_TASK_WAITING;
1071
1072        spin_lock_bh(&pf->aq_wait_lock);
1073        hlist_add_head(&task->entry, &pf->aq_wait_list);
1074        spin_unlock_bh(&pf->aq_wait_lock);
1075
1076        start = jiffies;
1077
1078        ret = wait_event_interruptible_timeout(pf->aq_wait_queue, task->state,
1079                                               timeout);
1080        switch (task->state) {
1081        case ICE_AQ_TASK_WAITING:
1082                err = ret < 0 ? ret : -ETIMEDOUT;
1083                break;
1084        case ICE_AQ_TASK_CANCELED:
1085                err = ret < 0 ? ret : -ECANCELED;
1086                break;
1087        case ICE_AQ_TASK_COMPLETE:
1088                err = ret < 0 ? ret : 0;
1089                break;
1090        default:
1091                WARN(1, "Unexpected AdminQ wait task state %u", task->state);
1092                err = -EINVAL;
1093                break;
1094        }
1095
1096        dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
1097                jiffies_to_msecs(jiffies - start),
1098                jiffies_to_msecs(timeout),
1099                opcode);
1100
1101        spin_lock_bh(&pf->aq_wait_lock);
1102        hlist_del(&task->entry);
1103        spin_unlock_bh(&pf->aq_wait_lock);
1104        kfree(task);
1105
1106        return err;
1107}
1108
1109/**
1110 * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
1111 * @pf: pointer to the PF private structure
1112 * @opcode: the opcode of the event
1113 * @event: the event to check
1114 *
1115 * Loops over the current list of pending threads waiting for an AdminQ event.
1116 * For each matching task, copy the contents of the event into the task
1117 * structure and wake up the thread.
1118 *
1119 * If multiple threads wait for the same opcode, they will all be woken up.
1120 *
1121 * Note that event->msg_buf will only be duplicated if the event has a buffer
1122 * with enough space already allocated. Otherwise, only the descriptor and
1123 * message length will be copied.
1124 *
1125 * Returns: true if an event was found, false otherwise
1126 */
1127static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
1128                                struct ice_rq_event_info *event)
1129{
1130        struct ice_aq_task *task;
1131        bool found = false;
1132
1133        spin_lock_bh(&pf->aq_wait_lock);
1134        hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
1135                if (task->state || task->opcode != opcode)
1136                        continue;
1137
1138                memcpy(&task->event->desc, &event->desc, sizeof(event->desc));
1139                task->event->msg_len = event->msg_len;
1140
1141                /* Only copy the data buffer if a destination was set */
1142                if (task->event->msg_buf &&
1143                    task->event->buf_len > event->buf_len) {
1144                        memcpy(task->event->msg_buf, event->msg_buf,
1145                               event->buf_len);
1146                        task->event->buf_len = event->buf_len;
1147                }
1148
1149                task->state = ICE_AQ_TASK_COMPLETE;
1150                found = true;
1151        }
1152        spin_unlock_bh(&pf->aq_wait_lock);
1153
1154        if (found)
1155                wake_up(&pf->aq_wait_queue);
1156}
1157
1158/**
1159 * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
1160 * @pf: the PF private structure
1161 *
1162 * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
1163 * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
1164 */
1165static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
1166{
1167        struct ice_aq_task *task;
1168
1169        spin_lock_bh(&pf->aq_wait_lock);
1170        hlist_for_each_entry(task, &pf->aq_wait_list, entry)
1171                task->state = ICE_AQ_TASK_CANCELED;
1172        spin_unlock_bh(&pf->aq_wait_lock);
1173
1174        wake_up(&pf->aq_wait_queue);
1175}
1176
1177/**
1178 * __ice_clean_ctrlq - helper function to clean controlq rings
1179 * @pf: ptr to struct ice_pf
1180 * @q_type: specific Control queue type
1181 */
1182static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
1183{
1184        struct device *dev = ice_pf_to_dev(pf);
1185        struct ice_rq_event_info event;
1186        struct ice_hw *hw = &pf->hw;
1187        struct ice_ctl_q_info *cq;
1188        u16 pending, i = 0;
1189        const char *qtype;
1190        u32 oldval, val;
1191
1192        /* Do not clean control queue if/when PF reset fails */
1193        if (test_bit(ICE_RESET_FAILED, pf->state))
1194                return 0;
1195
1196        switch (q_type) {
1197        case ICE_CTL_Q_ADMIN:
1198                cq = &hw->adminq;
1199                qtype = "Admin";
1200                break;
1201        case ICE_CTL_Q_MAILBOX:
1202                cq = &hw->mailboxq;
1203                qtype = "Mailbox";
1204                /* we are going to try to detect a malicious VF, so set the
1205                 * state to begin detection
1206                 */
1207                hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
1208                break;
1209        default:
1210                dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
1211                return 0;
1212        }
1213
1214        /* check for error indications - PF_xx_AxQLEN register layout for
1215         * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
1216         */
1217        val = rd32(hw, cq->rq.len);
1218        if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1219                   PF_FW_ARQLEN_ARQCRIT_M)) {
1220                oldval = val;
1221                if (val & PF_FW_ARQLEN_ARQVFE_M)
1222                        dev_dbg(dev, "%s Receive Queue VF Error detected\n",
1223                                qtype);
1224                if (val & PF_FW_ARQLEN_ARQOVFL_M) {
1225                        dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
1226                                qtype);
1227                }
1228                if (val & PF_FW_ARQLEN_ARQCRIT_M)
1229                        dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
1230                                qtype);
1231                val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
1232                         PF_FW_ARQLEN_ARQCRIT_M);
1233                if (oldval != val)
1234                        wr32(hw, cq->rq.len, val);
1235        }
1236
1237        val = rd32(hw, cq->sq.len);
1238        if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1239                   PF_FW_ATQLEN_ATQCRIT_M)) {
1240                oldval = val;
1241                if (val & PF_FW_ATQLEN_ATQVFE_M)
1242                        dev_dbg(dev, "%s Send Queue VF Error detected\n",
1243                                qtype);
1244                if (val & PF_FW_ATQLEN_ATQOVFL_M) {
1245                        dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
1246                                qtype);
1247                }
1248                if (val & PF_FW_ATQLEN_ATQCRIT_M)
1249                        dev_dbg(dev, "%s Send Queue Critical Error detected\n",
1250                                qtype);
1251                val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
1252                         PF_FW_ATQLEN_ATQCRIT_M);
1253                if (oldval != val)
1254                        wr32(hw, cq->sq.len, val);
1255        }
1256
1257        event.buf_len = cq->rq_buf_size;
1258        event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
1259        if (!event.msg_buf)
1260                return 0;
1261
1262        do {
1263                enum ice_status ret;
1264                u16 opcode;
1265
1266                ret = ice_clean_rq_elem(hw, cq, &event, &pending);
1267                if (ret == ICE_ERR_AQ_NO_WORK)
1268                        break;
1269                if (ret) {
1270                        dev_err(dev, "%s Receive Queue event error %s\n", qtype,
1271                                ice_stat_str(ret));
1272                        break;
1273                }
1274
1275                opcode = le16_to_cpu(event.desc.opcode);
1276
1277                /* Notify any thread that might be waiting for this event */
1278                ice_aq_check_events(pf, opcode, &event);
1279
1280                switch (opcode) {
1281                case ice_aqc_opc_get_link_status:
1282                        if (ice_handle_link_event(pf, &event))
1283                                dev_err(dev, "Could not handle link event\n");
1284                        break;
1285                case ice_aqc_opc_event_lan_overflow:
1286                        ice_vf_lan_overflow_event(pf, &event);
1287                        break;
1288                case ice_mbx_opc_send_msg_to_pf:
1289                        if (!ice_is_malicious_vf(pf, &event, i, pending))
1290                                ice_vc_process_vf_msg(pf, &event);
1291                        break;
1292                case ice_aqc_opc_fw_logging:
1293                        ice_output_fw_log(hw, &event.desc, event.msg_buf);
1294                        break;
1295                case ice_aqc_opc_lldp_set_mib_change:
1296                        ice_dcb_process_lldp_set_mib_change(pf, &event);
1297                        break;
1298                default:
1299                        dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
1300                                qtype, opcode);
1301                        break;
1302                }
1303        } while (pending && (i++ < ICE_DFLT_IRQ_WORK));
1304
1305        kfree(event.msg_buf);
1306
1307        return pending && (i == ICE_DFLT_IRQ_WORK);
1308}
1309
1310/**
1311 * ice_ctrlq_pending - check if there is a difference between ntc and ntu
1312 * @hw: pointer to hardware info
1313 * @cq: control queue information
1314 *
1315 * returns true if there are pending messages in a queue, false if there aren't
1316 */
1317static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
1318{
1319        u16 ntu;
1320
1321        ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
1322        return cq->rq.next_to_clean != ntu;
1323}
1324
1325/**
1326 * ice_clean_adminq_subtask - clean the AdminQ rings
1327 * @pf: board private structure
1328 */
1329static void ice_clean_adminq_subtask(struct ice_pf *pf)
1330{
1331        struct ice_hw *hw = &pf->hw;
1332
1333        if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
1334                return;
1335
1336        if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
1337                return;
1338
1339        clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
1340
1341        /* There might be a situation where new messages arrive to a control
1342         * queue between processing the last message and clearing the
1343         * EVENT_PENDING bit. So before exiting, check queue head again (using
1344         * ice_ctrlq_pending) and process new messages if any.
1345         */
1346        if (ice_ctrlq_pending(hw, &hw->adminq))
1347                __ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
1348
1349        ice_flush(hw);
1350}
1351
1352/**
1353 * ice_clean_mailboxq_subtask - clean the MailboxQ rings
1354 * @pf: board private structure
1355 */
1356static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
1357{
1358        struct ice_hw *hw = &pf->hw;
1359
1360        if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
1361                return;
1362
1363        if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
1364                return;
1365
1366        clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
1367
1368        if (ice_ctrlq_pending(hw, &hw->mailboxq))
1369                __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
1370
1371        ice_flush(hw);
1372}
1373
1374/**
1375 * ice_service_task_schedule - schedule the service task to wake up
1376 * @pf: board private structure
1377 *
1378 * If not already scheduled, this puts the task into the work queue.
1379 */
1380void ice_service_task_schedule(struct ice_pf *pf)
1381{
1382        if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
1383            !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) &&
1384            !test_bit(ICE_NEEDS_RESTART, pf->state))
1385                queue_work(ice_wq, &pf->serv_task);
1386}
1387
1388/**
1389 * ice_service_task_complete - finish up the service task
1390 * @pf: board private structure
1391 */
1392static void ice_service_task_complete(struct ice_pf *pf)
1393{
1394        WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
1395
1396        /* force memory (pf->state) to sync before next service task */
1397        smp_mb__before_atomic();
1398        clear_bit(ICE_SERVICE_SCHED, pf->state);
1399}
1400
1401/**
1402 * ice_service_task_stop - stop service task and cancel works
1403 * @pf: board private structure
1404 *
1405 * Return 0 if the ICE_SERVICE_DIS bit was not already set,
1406 * 1 otherwise.
1407 */
1408static int ice_service_task_stop(struct ice_pf *pf)
1409{
1410        int ret;
1411
1412        ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state);
1413
1414        if (pf->serv_tmr.function)
1415                del_timer_sync(&pf->serv_tmr);
1416        if (pf->serv_task.func)
1417                cancel_work_sync(&pf->serv_task);
1418
1419        clear_bit(ICE_SERVICE_SCHED, pf->state);
1420        return ret;
1421}
1422
1423/**
1424 * ice_service_task_restart - restart service task and schedule works
1425 * @pf: board private structure
1426 *
1427 * This function is needed for suspend and resume works (e.g WoL scenario)
1428 */
1429static void ice_service_task_restart(struct ice_pf *pf)
1430{
1431        clear_bit(ICE_SERVICE_DIS, pf->state);
1432        ice_service_task_schedule(pf);
1433}
1434
1435/**
1436 * ice_service_timer - timer callback to schedule service task
1437 * @t: pointer to timer_list
1438 */
1439static void ice_service_timer(struct timer_list *t)
1440{
1441        struct ice_pf *pf = from_timer(pf, t, serv_tmr);
1442
1443        mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
1444        ice_service_task_schedule(pf);
1445}
1446
1447/**
1448 * ice_handle_mdd_event - handle malicious driver detect event
1449 * @pf: pointer to the PF structure
1450 *
1451 * Called from service task. OICR interrupt handler indicates MDD event.
1452 * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
1453 * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
1454 * disable the queue, the PF can be configured to reset the VF using ethtool
1455 * private flag mdd-auto-reset-vf.
1456 */
1457static void ice_handle_mdd_event(struct ice_pf *pf)
1458{
1459        struct device *dev = ice_pf_to_dev(pf);
1460        struct ice_hw *hw = &pf->hw;
1461        unsigned int i;
1462        u32 reg;
1463
1464        if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) {
1465                /* Since the VF MDD event logging is rate limited, check if
1466                 * there are pending MDD events.
1467                 */
1468                ice_print_vfs_mdd_events(pf);
1469                return;
1470        }
1471
1472        /* find what triggered an MDD event */
1473        reg = rd32(hw, GL_MDET_TX_PQM);
1474        if (reg & GL_MDET_TX_PQM_VALID_M) {
1475                u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
1476                                GL_MDET_TX_PQM_PF_NUM_S;
1477                u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
1478                                GL_MDET_TX_PQM_VF_NUM_S;
1479                u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
1480                                GL_MDET_TX_PQM_MAL_TYPE_S;
1481                u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
1482                                GL_MDET_TX_PQM_QNUM_S);
1483
1484                if (netif_msg_tx_err(pf))
1485                        dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1486                                 event, queue, pf_num, vf_num);
1487                wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
1488        }
1489
1490        reg = rd32(hw, GL_MDET_TX_TCLAN);
1491        if (reg & GL_MDET_TX_TCLAN_VALID_M) {
1492                u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
1493                                GL_MDET_TX_TCLAN_PF_NUM_S;
1494                u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
1495                                GL_MDET_TX_TCLAN_VF_NUM_S;
1496                u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
1497                                GL_MDET_TX_TCLAN_MAL_TYPE_S;
1498                u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
1499                                GL_MDET_TX_TCLAN_QNUM_S);
1500
1501                if (netif_msg_tx_err(pf))
1502                        dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1503                                 event, queue, pf_num, vf_num);
1504                wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
1505        }
1506
1507        reg = rd32(hw, GL_MDET_RX);
1508        if (reg & GL_MDET_RX_VALID_M) {
1509                u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
1510                                GL_MDET_RX_PF_NUM_S;
1511                u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
1512                                GL_MDET_RX_VF_NUM_S;
1513                u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
1514                                GL_MDET_RX_MAL_TYPE_S;
1515                u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
1516                                GL_MDET_RX_QNUM_S);
1517
1518                if (netif_msg_rx_err(pf))
1519                        dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
1520                                 event, queue, pf_num, vf_num);
1521                wr32(hw, GL_MDET_RX, 0xffffffff);
1522        }
1523
1524        /* check to see if this PF caused an MDD event */
1525        reg = rd32(hw, PF_MDET_TX_PQM);
1526        if (reg & PF_MDET_TX_PQM_VALID_M) {
1527                wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
1528                if (netif_msg_tx_err(pf))
1529                        dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
1530        }
1531
1532        reg = rd32(hw, PF_MDET_TX_TCLAN);
1533        if (reg & PF_MDET_TX_TCLAN_VALID_M) {
1534                wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
1535                if (netif_msg_tx_err(pf))
1536                        dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
1537        }
1538
1539        reg = rd32(hw, PF_MDET_RX);
1540        if (reg & PF_MDET_RX_VALID_M) {
1541                wr32(hw, PF_MDET_RX, 0xFFFF);
1542                if (netif_msg_rx_err(pf))
1543                        dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
1544        }
1545
1546        /* Check to see if one of the VFs caused an MDD event, and then
1547         * increment counters and set print pending
1548         */
1549        ice_for_each_vf(pf, i) {
1550                struct ice_vf *vf = &pf->vf[i];
1551
1552                reg = rd32(hw, VP_MDET_TX_PQM(i));
1553                if (reg & VP_MDET_TX_PQM_VALID_M) {
1554                        wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
1555                        vf->mdd_tx_events.count++;
1556                        set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1557                        if (netif_msg_tx_err(pf))
1558                                dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
1559                                         i);
1560                }
1561
1562                reg = rd32(hw, VP_MDET_TX_TCLAN(i));
1563                if (reg & VP_MDET_TX_TCLAN_VALID_M) {
1564                        wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
1565                        vf->mdd_tx_events.count++;
1566                        set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1567                        if (netif_msg_tx_err(pf))
1568                                dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
1569                                         i);
1570                }
1571
1572                reg = rd32(hw, VP_MDET_TX_TDPU(i));
1573                if (reg & VP_MDET_TX_TDPU_VALID_M) {
1574                        wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
1575                        vf->mdd_tx_events.count++;
1576                        set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1577                        if (netif_msg_tx_err(pf))
1578                                dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
1579                                         i);
1580                }
1581
1582                reg = rd32(hw, VP_MDET_RX(i));
1583                if (reg & VP_MDET_RX_VALID_M) {
1584                        wr32(hw, VP_MDET_RX(i), 0xFFFF);
1585                        vf->mdd_rx_events.count++;
1586                        set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
1587                        if (netif_msg_rx_err(pf))
1588                                dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
1589                                         i);
1590
1591                        /* Since the queue is disabled on VF Rx MDD events, the
1592                         * PF can be configured to reset the VF through ethtool
1593                         * private flag mdd-auto-reset-vf.
1594                         */
1595                        if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
1596                                /* VF MDD event counters will be cleared by
1597                                 * reset, so print the event prior to reset.
1598                                 */
1599                                ice_print_vf_rx_mdd_event(vf);
1600                                ice_reset_vf(&pf->vf[i], false);
1601                        }
1602                }
1603        }
1604
1605        ice_print_vfs_mdd_events(pf);
1606}
1607
1608/**
1609 * ice_force_phys_link_state - Force the physical link state
1610 * @vsi: VSI to force the physical link state to up/down
1611 * @link_up: true/false indicates to set the physical link to up/down
1612 *
1613 * Force the physical link state by getting the current PHY capabilities from
1614 * hardware and setting the PHY config based on the determined capabilities. If
1615 * link changes a link event will be triggered because both the Enable Automatic
1616 * Link Update and LESM Enable bits are set when setting the PHY capabilities.
1617 *
1618 * Returns 0 on success, negative on failure
1619 */
1620static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
1621{
1622        struct ice_aqc_get_phy_caps_data *pcaps;
1623        struct ice_aqc_set_phy_cfg_data *cfg;
1624        struct ice_port_info *pi;
1625        struct device *dev;
1626        int retcode;
1627
1628        if (!vsi || !vsi->port_info || !vsi->back)
1629                return -EINVAL;
1630        if (vsi->type != ICE_VSI_PF)
1631                return 0;
1632
1633        dev = ice_pf_to_dev(vsi->back);
1634
1635        pi = vsi->port_info;
1636
1637        pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
1638        if (!pcaps)
1639                return -ENOMEM;
1640
1641        retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
1642                                      NULL);
1643        if (retcode) {
1644                dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
1645                        vsi->vsi_num, retcode);
1646                retcode = -EIO;
1647                goto out;
1648        }
1649
1650        /* No change in link */
1651        if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
1652            link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
1653                goto out;
1654
1655        /* Use the current user PHY configuration. The current user PHY
1656         * configuration is initialized during probe from PHY capabilities
1657         * software mode, and updated on set PHY configuration.
1658         */
1659        cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL);
1660        if (!cfg) {
1661                retcode = -ENOMEM;
1662                goto out;
1663        }
1664
1665        cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
1666        if (link_up)
1667                cfg->caps |= ICE_AQ_PHY_ENA_LINK;
1668        else
1669                cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
1670
1671        retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
1672        if (retcode) {
1673                dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
1674                        vsi->vsi_num, retcode);
1675                retcode = -EIO;
1676        }
1677
1678        kfree(cfg);
1679out:
1680        kfree(pcaps);
1681        return retcode;
1682}
1683
1684/**
1685 * ice_init_nvm_phy_type - Initialize the NVM PHY type
1686 * @pi: port info structure
1687 *
1688 * Initialize nvm_phy_type_[low|high] for link lenient mode support
1689 */
1690static int ice_init_nvm_phy_type(struct ice_port_info *pi)
1691{
1692        struct ice_aqc_get_phy_caps_data *pcaps;
1693        struct ice_pf *pf = pi->hw->back;
1694        enum ice_status status;
1695        int err = 0;
1696
1697        pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
1698        if (!pcaps)
1699                return -ENOMEM;
1700
1701        status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps,
1702                                     NULL);
1703
1704        if (status) {
1705                dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
1706                err = -EIO;
1707                goto out;
1708        }
1709
1710        pf->nvm_phy_type_hi = pcaps->phy_type_high;
1711        pf->nvm_phy_type_lo = pcaps->phy_type_low;
1712
1713out:
1714        kfree(pcaps);
1715        return err;
1716}
1717
1718/**
1719 * ice_init_link_dflt_override - Initialize link default override
1720 * @pi: port info structure
1721 *
1722 * Initialize link default override and PHY total port shutdown during probe
1723 */
1724static void ice_init_link_dflt_override(struct ice_port_info *pi)
1725{
1726        struct ice_link_default_override_tlv *ldo;
1727        struct ice_pf *pf = pi->hw->back;
1728
1729        ldo = &pf->link_dflt_override;
1730        if (ice_get_link_default_override(ldo, pi))
1731                return;
1732
1733        if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
1734                return;
1735
1736        /* Enable Total Port Shutdown (override/replace link-down-on-close
1737         * ethtool private flag) for ports with Port Disable bit set.
1738         */
1739        set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
1740        set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
1741}
1742
1743/**
1744 * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
1745 * @pi: port info structure
1746 *
1747 * If default override is enabled, initialize the user PHY cfg speed and FEC
1748 * settings using the default override mask from the NVM.
1749 *
1750 * The PHY should only be configured with the default override settings the
1751 * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
1752 * is used to indicate that the user PHY cfg default override is initialized
1753 * and the PHY has not been configured with the default override settings. The
1754 * state is set here, and cleared in ice_configure_phy the first time the PHY is
1755 * configured.
1756 *
1757 * This function should be called only if the FW doesn't support default
1758 * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
1759 */
1760static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
1761{
1762        struct ice_link_default_override_tlv *ldo;
1763        struct ice_aqc_set_phy_cfg_data *cfg;
1764        struct ice_phy_info *phy = &pi->phy;
1765        struct ice_pf *pf = pi->hw->back;
1766
1767        ldo = &pf->link_dflt_override;
1768
1769        /* If link default override is enabled, use to mask NVM PHY capabilities
1770         * for speed and FEC default configuration.
1771         */
1772        cfg = &phy->curr_user_phy_cfg;
1773
1774        if (ldo->phy_type_low || ldo->phy_type_high) {
1775                cfg->phy_type_low = pf->nvm_phy_type_lo &
1776                                    cpu_to_le64(ldo->phy_type_low);
1777                cfg->phy_type_high = pf->nvm_phy_type_hi &
1778                                     cpu_to_le64(ldo->phy_type_high);
1779        }
1780        cfg->link_fec_opt = ldo->fec_options;
1781        phy->curr_user_fec_req = ICE_FEC_AUTO;
1782
1783        set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
1784}
1785
1786/**
1787 * ice_init_phy_user_cfg - Initialize the PHY user configuration
1788 * @pi: port info structure
1789 *
1790 * Initialize the current user PHY configuration, speed, FEC, and FC requested
1791 * mode to default. The PHY defaults are from get PHY capabilities topology
1792 * with media so call when media is first available. An error is returned if
1793 * called when media is not available. The PHY initialization completed state is
1794 * set here.
1795 *
1796 * These configurations are used when setting PHY
1797 * configuration. The user PHY configuration is updated on set PHY
1798 * configuration. Returns 0 on success, negative on failure
1799 */
1800static int ice_init_phy_user_cfg(struct ice_port_info *pi)
1801{
1802        struct ice_aqc_get_phy_caps_data *pcaps;
1803        struct ice_phy_info *phy = &pi->phy;
1804        struct ice_pf *pf = pi->hw->back;
1805        enum ice_status status;
1806        int err = 0;
1807
1808        if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
1809                return -EIO;
1810
1811        pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
1812        if (!pcaps)
1813                return -ENOMEM;
1814
1815        if (ice_fw_supports_report_dflt_cfg(pi->hw))
1816                status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
1817                                             pcaps, NULL);
1818        else
1819                status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
1820                                             pcaps, NULL);
1821        if (status) {
1822                dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
1823                err = -EIO;
1824                goto err_out;
1825        }
1826
1827        ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
1828
1829        /* check if lenient mode is supported and enabled */
1830        if (ice_fw_supports_link_override(pi->hw) &&
1831            !(pcaps->module_compliance_enforcement &
1832              ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
1833                set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
1834
1835                /* if the FW supports default PHY configuration mode, then the driver
1836                 * does not have to apply link override settings. If not,
1837                 * initialize user PHY configuration with link override values
1838                 */
1839                if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
1840                    (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
1841                        ice_init_phy_cfg_dflt_override(pi);
1842                        goto out;
1843                }
1844        }
1845
1846        /* if link default override is not enabled, set user flow control and
1847         * FEC settings based on what get_phy_caps returned
1848         */
1849        phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
1850                                                      pcaps->link_fec_options);
1851        phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);
1852
1853out:
1854        phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
1855        set_bit(ICE_PHY_INIT_COMPLETE, pf->state);
1856err_out:
1857        kfree(pcaps);
1858        return err;
1859}
1860
1861/**
1862 * ice_configure_phy - configure PHY
1863 * @vsi: VSI of PHY
1864 *
1865 * Set the PHY configuration. If the current PHY configuration is the same as
1866 * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
1867 * configure the based get PHY capabilities for topology with media.
1868 */
1869static int ice_configure_phy(struct ice_vsi *vsi)
1870{
1871        struct device *dev = ice_pf_to_dev(vsi->back);
1872        struct ice_port_info *pi = vsi->port_info;
1873        struct ice_aqc_get_phy_caps_data *pcaps;
1874        struct ice_aqc_set_phy_cfg_data *cfg;
1875        struct ice_phy_info *phy = &pi->phy;
1876        struct ice_pf *pf = vsi->back;
1877        enum ice_status status;
1878        int err = 0;
1879
1880        /* Ensure we have media as we cannot configure a medialess port */
1881        if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
1882                return -EPERM;
1883
1884        ice_print_topo_conflict(vsi);
1885
1886        if (phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
1887                return -EPERM;
1888
1889        if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
1890                return ice_force_phys_link_state(vsi, true);
1891
1892        pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
1893        if (!pcaps)
1894                return -ENOMEM;
1895
1896        /* Get current PHY config */
1897        status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
1898                                     NULL);
1899        if (status) {
1900                dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n",
1901                        vsi->vsi_num, ice_stat_str(status));
1902                err = -EIO;
1903                goto done;
1904        }
1905
1906        /* If PHY enable link is configured and configuration has not changed,
1907         * there's nothing to do
1908         */
1909        if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
1910            ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
1911                goto done;
1912
1913        /* Use PHY topology as baseline for configuration */
1914        memset(pcaps, 0, sizeof(*pcaps));
1915        if (ice_fw_supports_report_dflt_cfg(pi->hw))
1916                status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
1917                                             pcaps, NULL);
1918        else
1919                status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
1920                                             pcaps, NULL);
1921        if (status) {
1922                dev_err(dev, "Failed to get PHY caps, VSI %d error %s\n",
1923                        vsi->vsi_num, ice_stat_str(status));
1924                err = -EIO;
1925                goto done;
1926        }
1927
1928        cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
1929        if (!cfg) {
1930                err = -ENOMEM;
1931                goto done;
1932        }
1933
1934        ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);
1935
1936        /* Speed - If default override pending, use curr_user_phy_cfg set in
1937         * ice_init_phy_user_cfg_ldo.
1938         */
1939        if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING,
1940                               vsi->back->state)) {
1941                cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
1942                cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
1943        } else {
1944                u64 phy_low = 0, phy_high = 0;
1945
1946                ice_update_phy_type(&phy_low, &phy_high,
1947                                    pi->phy.curr_user_speed_req);
1948                cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
1949                cfg->phy_type_high = pcaps->phy_type_high &
1950                                     cpu_to_le64(phy_high);
1951        }
1952
1953        /* Can't provide what was requested; use PHY capabilities */
1954        if (!cfg->phy_type_low && !cfg->phy_type_high) {
1955                cfg->phy_type_low = pcaps->phy_type_low;
1956                cfg->phy_type_high = pcaps->phy_type_high;
1957        }
1958
1959        /* FEC */
1960        ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
1961
1962        /* Can't provide what was requested; use PHY capabilities */
1963        if (cfg->link_fec_opt !=
1964            (cfg->link_fec_opt & pcaps->link_fec_options)) {
1965                cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
1966                cfg->link_fec_opt = pcaps->link_fec_options;
1967        }
1968
1969        /* Flow Control - always supported; no need to check against
1970         * capabilities
1971         */
1972        ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
1973
1974        /* Enable link and link update */
1975        cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
1976
1977        status = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL);
1978        if (status) {
1979                dev_err(dev, "Failed to set phy config, VSI %d error %s\n",
1980                        vsi->vsi_num, ice_stat_str(status));
1981                err = -EIO;
1982        }
1983
1984        kfree(cfg);
1985done:
1986        kfree(pcaps);
1987        return err;
1988}
1989
1990/**
1991 * ice_check_media_subtask - Check for media
1992 * @pf: pointer to PF struct
1993 *
1994 * If media is available, then initialize PHY user configuration if it is not
1995 * been, and configure the PHY if the interface is up.
1996 */
1997static void ice_check_media_subtask(struct ice_pf *pf)
1998{
1999        struct ice_port_info *pi;
2000        struct ice_vsi *vsi;
2001        int err;
2002
2003        /* No need to check for media if it's already present */
2004        if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
2005                return;
2006
2007        vsi = ice_get_main_vsi(pf);
2008        if (!vsi)
2009                return;
2010
2011        /* Refresh link info and check if media is present */
2012        pi = vsi->port_info;
2013        err = ice_update_link_info(pi);
2014        if (err)
2015                return;
2016
2017        if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
2018                if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
2019                        ice_init_phy_user_cfg(pi);
2020
2021                /* PHY settings are reset on media insertion, reconfigure
2022                 * PHY to preserve settings.
2023                 */
2024                if (test_bit(ICE_VSI_DOWN, vsi->state) &&
2025                    test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
2026                        return;
2027
2028                err = ice_configure_phy(vsi);
2029                if (!err)
2030                        clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
2031
2032                /* A Link Status Event will be generated; the event handler
2033                 * will complete bringing the interface up
2034                 */
2035        }
2036}
2037
2038/**
2039 * ice_service_task - manage and run subtasks
2040 * @work: pointer to work_struct contained by the PF struct
2041 */
2042static void ice_service_task(struct work_struct *work)
2043{
2044        struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
2045        unsigned long start_time = jiffies;
2046
2047        /* subtasks */
2048
2049        /* process reset requests first */
2050        ice_reset_subtask(pf);
2051
2052        /* bail if a reset/recovery cycle is pending or rebuild failed */
2053        if (ice_is_reset_in_progress(pf->state) ||
2054            test_bit(ICE_SUSPENDED, pf->state) ||
2055            test_bit(ICE_NEEDS_RESTART, pf->state)) {
2056                ice_service_task_complete(pf);
2057                return;
2058        }
2059
2060        ice_clean_adminq_subtask(pf);
2061        ice_check_media_subtask(pf);
2062        ice_check_for_hang_subtask(pf);
2063        ice_sync_fltr_subtask(pf);
2064        ice_handle_mdd_event(pf);
2065        ice_watchdog_subtask(pf);
2066
2067        if (ice_is_safe_mode(pf)) {
2068                ice_service_task_complete(pf);
2069                return;
2070        }
2071
2072        ice_process_vflr_event(pf);
2073        ice_clean_mailboxq_subtask(pf);
2074        ice_sync_arfs_fltrs(pf);
2075        ice_flush_fdir_ctx(pf);
2076
2077        /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
2078        ice_service_task_complete(pf);
2079
2080        /* If the tasks have taken longer than one service timer period
2081         * or there is more work to be done, reset the service timer to
2082         * schedule the service task now.
2083         */
2084        if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
2085            test_bit(ICE_MDD_EVENT_PENDING, pf->state) ||
2086            test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
2087            test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
2088            test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
2089            test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
2090                mod_timer(&pf->serv_tmr, jiffies);
2091}
2092
2093/**
2094 * ice_set_ctrlq_len - helper function to set controlq length
2095 * @hw: pointer to the HW instance
2096 */
2097static void ice_set_ctrlq_len(struct ice_hw *hw)
2098{
2099        hw->adminq.num_rq_entries = ICE_AQ_LEN;
2100        hw->adminq.num_sq_entries = ICE_AQ_LEN;
2101        hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
2102        hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
2103        hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
2104        hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
2105        hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2106        hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
2107}
2108
2109/**
2110 * ice_schedule_reset - schedule a reset
2111 * @pf: board private structure
2112 * @reset: reset being requested
2113 */
2114int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
2115{
2116        struct device *dev = ice_pf_to_dev(pf);
2117
2118        /* bail out if earlier reset has failed */
2119        if (test_bit(ICE_RESET_FAILED, pf->state)) {
2120                dev_dbg(dev, "earlier reset has failed\n");
2121                return -EIO;
2122        }
2123        /* bail if reset/recovery already in progress */
2124        if (ice_is_reset_in_progress(pf->state)) {
2125                dev_dbg(dev, "Reset already in progress\n");
2126                return -EBUSY;
2127        }
2128
2129        switch (reset) {
2130        case ICE_RESET_PFR:
2131                set_bit(ICE_PFR_REQ, pf->state);
2132                break;
2133        case ICE_RESET_CORER:
2134                set_bit(ICE_CORER_REQ, pf->state);
2135                break;
2136        case ICE_RESET_GLOBR:
2137                set_bit(ICE_GLOBR_REQ, pf->state);
2138                break;
2139        default:
2140                return -EINVAL;
2141        }
2142
2143        ice_service_task_schedule(pf);
2144        return 0;
2145}
2146
2147/**
2148 * ice_irq_affinity_notify - Callback for affinity changes
2149 * @notify: context as to what irq was changed
2150 * @mask: the new affinity mask
2151 *
2152 * This is a callback function used by the irq_set_affinity_notifier function
2153 * so that we may register to receive changes to the irq affinity masks.
2154 */
2155static void
2156ice_irq_affinity_notify(struct irq_affinity_notify *notify,
2157                        const cpumask_t *mask)
2158{
2159        struct ice_q_vector *q_vector =
2160                container_of(notify, struct ice_q_vector, affinity_notify);
2161
2162        cpumask_copy(&q_vector->affinity_mask, mask);
2163}
2164
2165/**
2166 * ice_irq_affinity_release - Callback for affinity notifier release
2167 * @ref: internal core kernel usage
2168 *
2169 * This is a callback function used by the irq_set_affinity_notifier function
2170 * to inform the current notification subscriber that they will no longer
2171 * receive notifications.
2172 */
2173static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
2174
2175/**
2176 * ice_vsi_ena_irq - Enable IRQ for the given VSI
2177 * @vsi: the VSI being configured
2178 */
2179static int ice_vsi_ena_irq(struct ice_vsi *vsi)
2180{
2181        struct ice_hw *hw = &vsi->back->hw;
2182        int i;
2183
2184        ice_for_each_q_vector(vsi, i)
2185                ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
2186
2187        ice_flush(hw);
2188        return 0;
2189}
2190
2191/**
2192 * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
2193 * @vsi: the VSI being configured
2194 * @basename: name for the vector
2195 */
2196static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
2197{
2198        int q_vectors = vsi->num_q_vectors;
2199        struct ice_pf *pf = vsi->back;
2200        int base = vsi->base_vector;
2201        struct device *dev;
2202        int rx_int_idx = 0;
2203        int tx_int_idx = 0;
2204        int vector, err;
2205        int irq_num;
2206
2207        dev = ice_pf_to_dev(pf);
2208        for (vector = 0; vector < q_vectors; vector++) {
2209                struct ice_q_vector *q_vector = vsi->q_vectors[vector];
2210
2211                irq_num = pf->msix_entries[base + vector].vector;
2212
2213                if (q_vector->tx.ring && q_vector->rx.ring) {
2214                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2215                                 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
2216                        tx_int_idx++;
2217                } else if (q_vector->rx.ring) {
2218                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2219                                 "%s-%s-%d", basename, "rx", rx_int_idx++);
2220                } else if (q_vector->tx.ring) {
2221                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
2222                                 "%s-%s-%d", basename, "tx", tx_int_idx++);
2223                } else {
2224                        /* skip this unused q_vector */
2225                        continue;
2226                }
2227                if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID)
2228                        err = devm_request_irq(dev, irq_num, vsi->irq_handler,
2229                                               IRQF_SHARED, q_vector->name,
2230                                               q_vector);
2231                else
2232                        err = devm_request_irq(dev, irq_num, vsi->irq_handler,
2233                                               0, q_vector->name, q_vector);
2234                if (err) {
2235                        netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
2236                                   err);
2237                        goto free_q_irqs;
2238                }
2239
2240                /* register for affinity change notifications */
2241                if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
2242                        struct irq_affinity_notify *affinity_notify;
2243
2244                        affinity_notify = &q_vector->affinity_notify;
2245                        affinity_notify->notify = ice_irq_affinity_notify;
2246                        affinity_notify->release = ice_irq_affinity_release;
2247                        irq_set_affinity_notifier(irq_num, affinity_notify);
2248                }
2249
2250                /* assign the mask for this irq */
2251                irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
2252        }
2253
2254        vsi->irqs_ready = true;
2255        return 0;
2256
2257free_q_irqs:
2258        while (vector) {
2259                vector--;
2260                irq_num = pf->msix_entries[base + vector].vector;
2261                if (!IS_ENABLED(CONFIG_RFS_ACCEL))
2262                        irq_set_affinity_notifier(irq_num, NULL);
2263                irq_set_affinity_hint(irq_num, NULL);
2264                devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
2265        }
2266        return err;
2267}
2268
2269/**
2270 * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
2271 * @vsi: VSI to setup Tx rings used by XDP
2272 *
2273 * Return 0 on success and negative value on error
2274 */
2275static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
2276{
2277        struct device *dev = ice_pf_to_dev(vsi->back);
2278        int i;
2279
2280        for (i = 0; i < vsi->num_xdp_txq; i++) {
2281                u16 xdp_q_idx = vsi->alloc_txq + i;
2282                struct ice_ring *xdp_ring;
2283
2284                xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
2285
2286                if (!xdp_ring)
2287                        goto free_xdp_rings;
2288
2289                xdp_ring->q_index = xdp_q_idx;
2290                xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
2291                xdp_ring->ring_active = false;
2292                xdp_ring->vsi = vsi;
2293                xdp_ring->netdev = NULL;
2294                xdp_ring->dev = dev;
2295                xdp_ring->count = vsi->num_tx_desc;
2296                WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
2297                if (ice_setup_tx_ring(xdp_ring))
2298                        goto free_xdp_rings;
2299                ice_set_ring_xdp(xdp_ring);
2300                xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring);
2301        }
2302
2303        return 0;
2304
2305free_xdp_rings:
2306        for (; i >= 0; i--)
2307                if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
2308                        ice_free_tx_ring(vsi->xdp_rings[i]);
2309        return -ENOMEM;
2310}
2311
2312/**
2313 * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
2314 * @vsi: VSI to set the bpf prog on
2315 * @prog: the bpf prog pointer
2316 */
2317static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
2318{
2319        struct bpf_prog *old_prog;
2320        int i;
2321
2322        old_prog = xchg(&vsi->xdp_prog, prog);
2323        if (old_prog)
2324                bpf_prog_put(old_prog);
2325
2326        ice_for_each_rxq(vsi, i)
2327                WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
2328}
2329
2330/**
2331 * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
2332 * @vsi: VSI to bring up Tx rings used by XDP
2333 * @prog: bpf program that will be assigned to VSI
2334 *
2335 * Return 0 on success and negative value on error
2336 */
2337int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
2338{
2339        u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
2340        int xdp_rings_rem = vsi->num_xdp_txq;
2341        struct ice_pf *pf = vsi->back;
2342        struct ice_qs_cfg xdp_qs_cfg = {
2343                .qs_mutex = &pf->avail_q_mutex,
2344                .pf_map = pf->avail_txqs,
2345                .pf_map_size = pf->max_pf_txqs,
2346                .q_count = vsi->num_xdp_txq,
2347                .scatter_count = ICE_MAX_SCATTER_TXQS,
2348                .vsi_map = vsi->txq_map,
2349                .vsi_map_offset = vsi->alloc_txq,
2350                .mapping_mode = ICE_VSI_MAP_CONTIG
2351        };
2352        enum ice_status status;
2353        struct device *dev;
2354        int i, v_idx;
2355
2356        dev = ice_pf_to_dev(pf);
2357        vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
2358                                      sizeof(*vsi->xdp_rings), GFP_KERNEL);
2359        if (!vsi->xdp_rings)
2360                return -ENOMEM;
2361
2362        vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
2363        if (__ice_vsi_get_qs(&xdp_qs_cfg))
2364                goto err_map_xdp;
2365
2366        if (ice_xdp_alloc_setup_rings(vsi))
2367                goto clear_xdp_rings;
2368
2369        /* follow the logic from ice_vsi_map_rings_to_vectors */
2370        ice_for_each_q_vector(vsi, v_idx) {
2371                struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2372                int xdp_rings_per_v, q_id, q_base;
2373
2374                xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
2375                                               vsi->num_q_vectors - v_idx);
2376                q_base = vsi->num_xdp_txq - xdp_rings_rem;
2377
2378                for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
2379                        struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
2380
2381                        xdp_ring->q_vector = q_vector;
2382                        xdp_ring->next = q_vector->tx.ring;
2383                        q_vector->tx.ring = xdp_ring;
2384                }
2385                xdp_rings_rem -= xdp_rings_per_v;
2386        }
2387
2388        /* omit the scheduler update if in reset path; XDP queues will be
2389         * taken into account at the end of ice_vsi_rebuild, where
2390         * ice_cfg_vsi_lan is being called
2391         */
2392        if (ice_is_reset_in_progress(pf->state))
2393                return 0;
2394
2395        /* tell the Tx scheduler that right now we have
2396         * additional queues
2397         */
2398        for (i = 0; i < vsi->tc_cfg.numtc; i++)
2399                max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
2400
2401        status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
2402                                 max_txqs);
2403        if (status) {
2404                dev_err(dev, "Failed VSI LAN queue config for XDP, error: %s\n",
2405                        ice_stat_str(status));
2406                goto clear_xdp_rings;
2407        }
2408        ice_vsi_assign_bpf_prog(vsi, prog);
2409
2410        return 0;
2411clear_xdp_rings:
2412        for (i = 0; i < vsi->num_xdp_txq; i++)
2413                if (vsi->xdp_rings[i]) {
2414                        kfree_rcu(vsi->xdp_rings[i], rcu);
2415                        vsi->xdp_rings[i] = NULL;
2416                }
2417
2418err_map_xdp:
2419        mutex_lock(&pf->avail_q_mutex);
2420        for (i = 0; i < vsi->num_xdp_txq; i++) {
2421                clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
2422                vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2423        }
2424        mutex_unlock(&pf->avail_q_mutex);
2425
2426        devm_kfree(dev, vsi->xdp_rings);
2427        return -ENOMEM;
2428}
2429
2430/**
2431 * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
2432 * @vsi: VSI to remove XDP rings
2433 *
2434 * Detach XDP rings from irq vectors, clean up the PF bitmap and free
2435 * resources
2436 */
2437int ice_destroy_xdp_rings(struct ice_vsi *vsi)
2438{
2439        u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
2440        struct ice_pf *pf = vsi->back;
2441        int i, v_idx;
2442
2443        /* q_vectors are freed in reset path so there's no point in detaching
2444         * rings; in case of rebuild being triggered not from reset bits
2445         * in pf->state won't be set, so additionally check first q_vector
2446         * against NULL
2447         */
2448        if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
2449                goto free_qmap;
2450
2451        ice_for_each_q_vector(vsi, v_idx) {
2452                struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
2453                struct ice_ring *ring;
2454
2455                ice_for_each_ring(ring, q_vector->tx)
2456                        if (!ring->tx_buf || !ice_ring_is_xdp(ring))
2457                                break;
2458
2459                /* restore the value of last node prior to XDP setup */
2460                q_vector->tx.ring = ring;
2461        }
2462
2463free_qmap:
2464        mutex_lock(&pf->avail_q_mutex);
2465        for (i = 0; i < vsi->num_xdp_txq; i++) {
2466                clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
2467                vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
2468        }
2469        mutex_unlock(&pf->avail_q_mutex);
2470
2471        for (i = 0; i < vsi->num_xdp_txq; i++)
2472                if (vsi->xdp_rings[i]) {
2473                        if (vsi->xdp_rings[i]->desc)
2474                                ice_free_tx_ring(vsi->xdp_rings[i]);
2475                        kfree_rcu(vsi->xdp_rings[i], rcu);
2476                        vsi->xdp_rings[i] = NULL;
2477                }
2478
2479        devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
2480        vsi->xdp_rings = NULL;
2481
2482        if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
2483                return 0;
2484
2485        ice_vsi_assign_bpf_prog(vsi, NULL);
2486
2487        /* notify Tx scheduler that we destroyed XDP queues and bring
2488         * back the old number of child nodes
2489         */
2490        for (i = 0; i < vsi->tc_cfg.numtc; i++)
2491                max_txqs[i] = vsi->num_txq;
2492
2493        /* change number of XDP Tx queues to 0 */
2494        vsi->num_xdp_txq = 0;
2495
2496        return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
2497                               max_txqs);
2498}
2499
2500/**
2501 * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
2502 * @vsi: VSI to schedule napi on
2503 */
2504static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
2505{
2506        int i;
2507
2508        ice_for_each_rxq(vsi, i) {
2509                struct ice_ring *rx_ring = vsi->rx_rings[i];
2510
2511                if (rx_ring->xsk_pool)
2512                        napi_schedule(&rx_ring->q_vector->napi);
2513        }
2514}
2515
2516/**
2517 * ice_xdp_setup_prog - Add or remove XDP eBPF program
2518 * @vsi: VSI to setup XDP for
2519 * @prog: XDP program
2520 * @extack: netlink extended ack
2521 */
2522static int
2523ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
2524                   struct netlink_ext_ack *extack)
2525{
2526        int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
2527        bool if_running = netif_running(vsi->netdev);
2528        int ret = 0, xdp_ring_err = 0;
2529
2530        if (frame_size > vsi->rx_buf_len) {
2531                NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
2532                return -EOPNOTSUPP;
2533        }
2534
2535        /* need to stop netdev while setting up the program for Rx rings */
2536        if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
2537                ret = ice_down(vsi);
2538                if (ret) {
2539                        NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
2540                        return ret;
2541                }
2542        }
2543
2544        if (!ice_is_xdp_ena_vsi(vsi) && prog) {
2545                vsi->num_xdp_txq = vsi->alloc_rxq;
2546                xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
2547                if (xdp_ring_err)
2548                        NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
2549        } else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
2550                xdp_ring_err = ice_destroy_xdp_rings(vsi);
2551                if (xdp_ring_err)
2552                        NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
2553        } else {
2554                ice_vsi_assign_bpf_prog(vsi, prog);
2555        }
2556
2557        if (if_running)
2558                ret = ice_up(vsi);
2559
2560        if (!ret && prog)
2561                ice_vsi_rx_napi_schedule(vsi);
2562
2563        return (ret || xdp_ring_err) ? -ENOMEM : 0;
2564}
2565
2566/**
2567 * ice_xdp_safe_mode - XDP handler for safe mode
2568 * @dev: netdevice
2569 * @xdp: XDP command
2570 */
2571static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
2572                             struct netdev_bpf *xdp)
2573{
2574        NL_SET_ERR_MSG_MOD(xdp->extack,
2575                           "Please provide working DDP firmware package in order to use XDP\n"
2576                           "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
2577        return -EOPNOTSUPP;
2578}
2579
2580/**
2581 * ice_xdp - implements XDP handler
2582 * @dev: netdevice
2583 * @xdp: XDP command
2584 */
2585static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
2586{
2587        struct ice_netdev_priv *np = netdev_priv(dev);
2588        struct ice_vsi *vsi = np->vsi;
2589
2590        if (vsi->type != ICE_VSI_PF) {
2591                NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
2592                return -EINVAL;
2593        }
2594
2595        switch (xdp->command) {
2596        case XDP_SETUP_PROG:
2597                return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
2598        case XDP_SETUP_XSK_POOL:
2599                return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
2600                                          xdp->xsk.queue_id);
2601        default:
2602                return -EINVAL;
2603        }
2604}
2605
2606/**
2607 * ice_ena_misc_vector - enable the non-queue interrupts
2608 * @pf: board private structure
2609 */
2610static void ice_ena_misc_vector(struct ice_pf *pf)
2611{
2612        struct ice_hw *hw = &pf->hw;
2613        u32 val;
2614
2615        /* Disable anti-spoof detection interrupt to prevent spurious event
2616         * interrupts during a function reset. Anti-spoof functionally is
2617         * still supported.
2618         */
2619        val = rd32(hw, GL_MDCK_TX_TDPU);
2620        val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
2621        wr32(hw, GL_MDCK_TX_TDPU, val);
2622
2623        /* clear things first */
2624        wr32(hw, PFINT_OICR_ENA, 0);    /* disable all */
2625        rd32(hw, PFINT_OICR);           /* read to clear */
2626
2627        val = (PFINT_OICR_ECC_ERR_M |
2628               PFINT_OICR_MAL_DETECT_M |
2629               PFINT_OICR_GRST_M |
2630               PFINT_OICR_PCI_EXCEPTION_M |
2631               PFINT_OICR_VFLR_M |
2632               PFINT_OICR_HMC_ERR_M |
2633               PFINT_OICR_PE_CRITERR_M);
2634
2635        wr32(hw, PFINT_OICR_ENA, val);
2636
2637        /* SW_ITR_IDX = 0, but don't change INTENA */
2638        wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
2639             GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
2640}
2641
2642/**
2643 * ice_misc_intr - misc interrupt handler
2644 * @irq: interrupt number
2645 * @data: pointer to a q_vector
2646 */
2647static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
2648{
2649        struct ice_pf *pf = (struct ice_pf *)data;
2650        struct ice_hw *hw = &pf->hw;
2651        irqreturn_t ret = IRQ_NONE;
2652        struct device *dev;
2653        u32 oicr, ena_mask;
2654
2655        dev = ice_pf_to_dev(pf);
2656        set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
2657        set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
2658
2659        oicr = rd32(hw, PFINT_OICR);
2660        ena_mask = rd32(hw, PFINT_OICR_ENA);
2661
2662        if (oicr & PFINT_OICR_SWINT_M) {
2663                ena_mask &= ~PFINT_OICR_SWINT_M;
2664                pf->sw_int_count++;
2665        }
2666
2667        if (oicr & PFINT_OICR_MAL_DETECT_M) {
2668                ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
2669                set_bit(ICE_MDD_EVENT_PENDING, pf->state);
2670        }
2671        if (oicr & PFINT_OICR_VFLR_M) {
2672                /* disable any further VFLR event notifications */
2673                if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
2674                        u32 reg = rd32(hw, PFINT_OICR_ENA);
2675
2676                        reg &= ~PFINT_OICR_VFLR_M;
2677                        wr32(hw, PFINT_OICR_ENA, reg);
2678                } else {
2679                        ena_mask &= ~PFINT_OICR_VFLR_M;
2680                        set_bit(ICE_VFLR_EVENT_PENDING, pf->state);
2681                }
2682        }
2683
2684        if (oicr & PFINT_OICR_GRST_M) {
2685                u32 reset;
2686
2687                /* we have a reset warning */
2688                ena_mask &= ~PFINT_OICR_GRST_M;
2689                reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
2690                        GLGEN_RSTAT_RESET_TYPE_S;
2691
2692                if (reset == ICE_RESET_CORER)
2693                        pf->corer_count++;
2694                else if (reset == ICE_RESET_GLOBR)
2695                        pf->globr_count++;
2696                else if (reset == ICE_RESET_EMPR)
2697                        pf->empr_count++;
2698                else
2699                        dev_dbg(dev, "Invalid reset type %d\n", reset);
2700
2701                /* If a reset cycle isn't already in progress, we set a bit in
2702                 * pf->state so that the service task can start a reset/rebuild.
2703                 * We also make note of which reset happened so that peer
2704                 * devices/drivers can be informed.
2705                 */
2706                if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
2707                        if (reset == ICE_RESET_CORER)
2708                                set_bit(ICE_CORER_RECV, pf->state);
2709                        else if (reset == ICE_RESET_GLOBR)
2710                                set_bit(ICE_GLOBR_RECV, pf->state);
2711                        else
2712                                set_bit(ICE_EMPR_RECV, pf->state);
2713
2714                        /* There are couple of different bits at play here.
2715                         * hw->reset_ongoing indicates whether the hardware is
2716                         * in reset. This is set to true when a reset interrupt
2717                         * is received and set back to false after the driver
2718                         * has determined that the hardware is out of reset.
2719                         *
2720                         * ICE_RESET_OICR_RECV in pf->state indicates
2721                         * that a post reset rebuild is required before the
2722                         * driver is operational again. This is set above.
2723                         *
2724                         * As this is the start of the reset/rebuild cycle, set
2725                         * both to indicate that.
2726                         */
2727                        hw->reset_ongoing = true;
2728                }
2729        }
2730
2731        if (oicr & PFINT_OICR_HMC_ERR_M) {
2732                ena_mask &= ~PFINT_OICR_HMC_ERR_M;
2733                dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n",
2734                        rd32(hw, PFHMC_ERRORINFO),
2735                        rd32(hw, PFHMC_ERRORDATA));
2736        }
2737
2738        /* Report any remaining unexpected interrupts */
2739        oicr &= ena_mask;
2740        if (oicr) {
2741                dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
2742                /* If a critical error is pending there is no choice but to
2743                 * reset the device.
2744                 */
2745                if (oicr & (PFINT_OICR_PE_CRITERR_M |
2746                            PFINT_OICR_PCI_EXCEPTION_M |
2747                            PFINT_OICR_ECC_ERR_M)) {
2748                        set_bit(ICE_PFR_REQ, pf->state);
2749                        ice_service_task_schedule(pf);
2750                }
2751        }
2752        ret = IRQ_HANDLED;
2753
2754        ice_service_task_schedule(pf);
2755        ice_irq_dynamic_ena(hw, NULL, NULL);
2756
2757        return ret;
2758}
2759
2760/**
2761 * ice_dis_ctrlq_interrupts - disable control queue interrupts
2762 * @hw: pointer to HW structure
2763 */
2764static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
2765{
2766        /* disable Admin queue Interrupt causes */
2767        wr32(hw, PFINT_FW_CTL,
2768             rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
2769
2770        /* disable Mailbox queue Interrupt causes */
2771        wr32(hw, PFINT_MBX_CTL,
2772             rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
2773
2774        /* disable Control queue Interrupt causes */
2775        wr32(hw, PFINT_OICR_CTL,
2776             rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
2777
2778        ice_flush(hw);
2779}
2780
2781/**
2782 * ice_free_irq_msix_misc - Unroll misc vector setup
2783 * @pf: board private structure
2784 */
2785static void ice_free_irq_msix_misc(struct ice_pf *pf)
2786{
2787        struct ice_hw *hw = &pf->hw;
2788
2789        ice_dis_ctrlq_interrupts(hw);
2790
2791        /* disable OICR interrupt */
2792        wr32(hw, PFINT_OICR_ENA, 0);
2793        ice_flush(hw);
2794
2795        if (pf->msix_entries) {
2796                synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
2797                devm_free_irq(ice_pf_to_dev(pf),
2798                              pf->msix_entries[pf->oicr_idx].vector, pf);
2799        }
2800
2801        pf->num_avail_sw_msix += 1;
2802        ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
2803}
2804
2805/**
2806 * ice_ena_ctrlq_interrupts - enable control queue interrupts
2807 * @hw: pointer to HW structure
2808 * @reg_idx: HW vector index to associate the control queue interrupts with
2809 */
2810static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
2811{
2812        u32 val;
2813
2814        val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
2815               PFINT_OICR_CTL_CAUSE_ENA_M);
2816        wr32(hw, PFINT_OICR_CTL, val);
2817
2818        /* enable Admin queue Interrupt causes */
2819        val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) |
2820               PFINT_FW_CTL_CAUSE_ENA_M);
2821        wr32(hw, PFINT_FW_CTL, val);
2822
2823        /* enable Mailbox queue Interrupt causes */
2824        val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
2825               PFINT_MBX_CTL_CAUSE_ENA_M);
2826        wr32(hw, PFINT_MBX_CTL, val);
2827
2828        ice_flush(hw);
2829}
2830
2831/**
2832 * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
2833 * @pf: board private structure
2834 *
2835 * This sets up the handler for MSIX 0, which is used to manage the
2836 * non-queue interrupts, e.g. AdminQ and errors. This is not used
2837 * when in MSI or Legacy interrupt mode.
2838 */
2839static int ice_req_irq_msix_misc(struct ice_pf *pf)
2840{
2841        struct device *dev = ice_pf_to_dev(pf);
2842        struct ice_hw *hw = &pf->hw;
2843        int oicr_idx, err = 0;
2844
2845        if (!pf->int_name[0])
2846                snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
2847                         dev_driver_string(dev), dev_name(dev));
2848
2849        /* Do not request IRQ but do enable OICR interrupt since settings are
2850         * lost during reset. Note that this function is called only during
2851         * rebuild path and not while reset is in progress.
2852         */
2853        if (ice_is_reset_in_progress(pf->state))
2854                goto skip_req_irq;
2855
2856        /* reserve one vector in irq_tracker for misc interrupts */
2857        oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
2858        if (oicr_idx < 0)
2859                return oicr_idx;
2860
2861        pf->num_avail_sw_msix -= 1;
2862        pf->oicr_idx = (u16)oicr_idx;
2863
2864        err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector,
2865                               ice_misc_intr, 0, pf->int_name, pf);
2866        if (err) {
2867                dev_err(dev, "devm_request_irq for %s failed: %d\n",
2868                        pf->int_name, err);
2869                ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
2870                pf->num_avail_sw_msix += 1;
2871                return err;
2872        }
2873
2874skip_req_irq:
2875        ice_ena_misc_vector(pf);
2876
2877        ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
2878        wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
2879             ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
2880
2881        ice_flush(hw);
2882        ice_irq_dynamic_ena(hw, NULL, NULL);
2883
2884        return 0;
2885}
2886
2887/**
2888 * ice_napi_add - register NAPI handler for the VSI
2889 * @vsi: VSI for which NAPI handler is to be registered
2890 *
2891 * This function is only called in the driver's load path. Registering the NAPI
2892 * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
2893 * reset/rebuild, etc.)
2894 */
2895static void ice_napi_add(struct ice_vsi *vsi)
2896{
2897        int v_idx;
2898
2899        if (!vsi->netdev)
2900                return;
2901
2902        ice_for_each_q_vector(vsi, v_idx)
2903                netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
2904                               ice_napi_poll, NAPI_POLL_WEIGHT);
2905}
2906
2907/**
2908 * ice_set_ops - set netdev and ethtools ops for the given netdev
2909 * @netdev: netdev instance
2910 */
2911static void ice_set_ops(struct net_device *netdev)
2912{
2913        struct ice_pf *pf = ice_netdev_to_pf(netdev);
2914
2915        if (ice_is_safe_mode(pf)) {
2916                netdev->netdev_ops = &ice_netdev_safe_mode_ops;
2917                ice_set_ethtool_safe_mode_ops(netdev);
2918                return;
2919        }
2920
2921        netdev->netdev_ops = &ice_netdev_ops;
2922        netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
2923        ice_set_ethtool_ops(netdev);
2924}
2925
2926/**
2927 * ice_set_netdev_features - set features for the given netdev
2928 * @netdev: netdev instance
2929 */
2930static void ice_set_netdev_features(struct net_device *netdev)
2931{
2932        struct ice_pf *pf = ice_netdev_to_pf(netdev);
2933        netdev_features_t csumo_features;
2934        netdev_features_t vlano_features;
2935        netdev_features_t dflt_features;
2936        netdev_features_t tso_features;
2937
2938        if (ice_is_safe_mode(pf)) {
2939                /* safe mode */
2940                netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA;
2941                netdev->hw_features = netdev->features;
2942                return;
2943        }
2944
2945        dflt_features = NETIF_F_SG      |
2946                        NETIF_F_HIGHDMA |
2947                        NETIF_F_NTUPLE  |
2948                        NETIF_F_RXHASH;
2949
2950        csumo_features = NETIF_F_RXCSUM   |
2951                         NETIF_F_IP_CSUM  |
2952                         NETIF_F_SCTP_CRC |
2953                         NETIF_F_IPV6_CSUM;
2954
2955        vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
2956                         NETIF_F_HW_VLAN_CTAG_TX     |
2957                         NETIF_F_HW_VLAN_CTAG_RX;
2958
2959        tso_features = NETIF_F_TSO                      |
2960                       NETIF_F_TSO_ECN                  |
2961                       NETIF_F_TSO6                     |
2962                       NETIF_F_GSO_GRE                  |
2963                       NETIF_F_GSO_UDP_TUNNEL           |
2964                       NETIF_F_GSO_GRE_CSUM             |
2965                       NETIF_F_GSO_UDP_TUNNEL_CSUM      |
2966                       NETIF_F_GSO_PARTIAL              |
2967                       NETIF_F_GSO_IPXIP4               |
2968                       NETIF_F_GSO_IPXIP6               |
2969                       NETIF_F_GSO_UDP_L4;
2970
2971        netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
2972                                        NETIF_F_GSO_GRE_CSUM;
2973        /* set features that user can change */
2974        netdev->hw_features = dflt_features | csumo_features |
2975                              vlano_features | tso_features;
2976
2977        /* add support for HW_CSUM on packets with MPLS header */
2978        netdev->mpls_features =  NETIF_F_HW_CSUM;
2979
2980        /* enable features */
2981        netdev->features |= netdev->hw_features;
2982        /* encap and VLAN devices inherit default, csumo and tso features */
2983        netdev->hw_enc_features |= dflt_features | csumo_features |
2984                                   tso_features;
2985        netdev->vlan_features |= dflt_features | csumo_features |
2986                                 tso_features;
2987}
2988
2989/**
2990 * ice_cfg_netdev - Allocate, configure and register a netdev
2991 * @vsi: the VSI associated with the new netdev
2992 *
2993 * Returns 0 on success, negative value on failure
2994 */
2995static int ice_cfg_netdev(struct ice_vsi *vsi)
2996{
2997        struct ice_pf *pf = vsi->back;
2998        struct ice_netdev_priv *np;
2999        struct net_device *netdev;
3000        u8 mac_addr[ETH_ALEN];
3001
3002        netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
3003                                    vsi->alloc_rxq);
3004        if (!netdev)
3005                return -ENOMEM;
3006
3007        set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
3008        vsi->netdev = netdev;
3009        np = netdev_priv(netdev);
3010        np->vsi = vsi;
3011
3012        ice_set_netdev_features(netdev);
3013
3014        ice_set_ops(netdev);
3015
3016        if (vsi->type == ICE_VSI_PF) {
3017                SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf));
3018                ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
3019                ether_addr_copy(netdev->dev_addr, mac_addr);
3020                ether_addr_copy(netdev->perm_addr, mac_addr);
3021        }
3022
3023        netdev->priv_flags |= IFF_UNICAST_FLT;
3024
3025        /* Setup netdev TC information */
3026        ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
3027
3028        /* setup watchdog timeout value to be 5 second */
3029        netdev->watchdog_timeo = 5 * HZ;
3030
3031        netdev->min_mtu = ETH_MIN_MTU;
3032        netdev->max_mtu = ICE_MAX_MTU;
3033
3034        return 0;
3035}
3036
3037/**
3038 * ice_fill_rss_lut - Fill the RSS lookup table with default values
3039 * @lut: Lookup table
3040 * @rss_table_size: Lookup table size
3041 * @rss_size: Range of queue number for hashing
3042 */
3043void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
3044{
3045        u16 i;
3046
3047        for (i = 0; i < rss_table_size; i++)
3048                lut[i] = i % rss_size;
3049}
3050
3051/**
3052 * ice_pf_vsi_setup - Set up a PF VSI
3053 * @pf: board private structure
3054 * @pi: pointer to the port_info instance
3055 *
3056 * Returns pointer to the successfully allocated VSI software struct
3057 * on success, otherwise returns NULL on failure.
3058 */
3059static struct ice_vsi *
3060ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3061{
3062        return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID);
3063}
3064
3065/**
3066 * ice_ctrl_vsi_setup - Set up a control VSI
3067 * @pf: board private structure
3068 * @pi: pointer to the port_info instance
3069 *
3070 * Returns pointer to the successfully allocated VSI software struct
3071 * on success, otherwise returns NULL on failure.
3072 */
3073static struct ice_vsi *
3074ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3075{
3076        return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID);
3077}
3078
3079/**
3080 * ice_lb_vsi_setup - Set up a loopback VSI
3081 * @pf: board private structure
3082 * @pi: pointer to the port_info instance
3083 *
3084 * Returns pointer to the successfully allocated VSI software struct
3085 * on success, otherwise returns NULL on failure.
3086 */
3087struct ice_vsi *
3088ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
3089{
3090        return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID);
3091}
3092
3093/**
3094 * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
3095 * @netdev: network interface to be adjusted
3096 * @proto: unused protocol
3097 * @vid: VLAN ID to be added
3098 *
3099 * net_device_ops implementation for adding VLAN IDs
3100 */
3101static int
3102ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
3103                    u16 vid)
3104{
3105        struct ice_netdev_priv *np = netdev_priv(netdev);
3106        struct ice_vsi *vsi = np->vsi;
3107        int ret;
3108
3109        /* VLAN 0 is added by default during load/reset */
3110        if (!vid)
3111                return 0;
3112
3113        /* Enable VLAN pruning when a VLAN other than 0 is added */
3114        if (!ice_vsi_is_vlan_pruning_ena(vsi)) {
3115                ret = ice_cfg_vlan_pruning(vsi, true, false);
3116                if (ret)
3117                        return ret;
3118        }
3119
3120        /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
3121         * packets aren't pruned by the device's internal switch on Rx
3122         */
3123        ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
3124        if (!ret)
3125                set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
3126
3127        return ret;
3128}
3129
3130/**
3131 * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
3132 * @netdev: network interface to be adjusted
3133 * @proto: unused protocol
3134 * @vid: VLAN ID to be removed
3135 *
3136 * net_device_ops implementation for removing VLAN IDs
3137 */
3138static int
3139ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
3140                     u16 vid)
3141{
3142        struct ice_netdev_priv *np = netdev_priv(netdev);
3143        struct ice_vsi *vsi = np->vsi;
3144        int ret;
3145
3146        /* don't allow removal of VLAN 0 */
3147        if (!vid)
3148                return 0;
3149
3150        /* Make sure ice_vsi_kill_vlan is successful before updating VLAN
3151         * information
3152         */
3153        ret = ice_vsi_kill_vlan(vsi, vid);
3154        if (ret)
3155                return ret;
3156
3157        /* Disable pruning when VLAN 0 is the only VLAN rule */
3158        if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi))
3159                ret = ice_cfg_vlan_pruning(vsi, false, false);
3160
3161        set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
3162        return ret;
3163}
3164
3165/**
3166 * ice_setup_pf_sw - Setup the HW switch on startup or after reset
3167 * @pf: board private structure
3168 *
3169 * Returns 0 on success, negative value on failure
3170 */
3171static int ice_setup_pf_sw(struct ice_pf *pf)
3172{
3173        struct ice_vsi *vsi;
3174        int status = 0;
3175
3176        if (ice_is_reset_in_progress(pf->state))
3177                return -EBUSY;
3178
3179        vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
3180        if (!vsi)
3181                return -ENOMEM;
3182
3183        status = ice_cfg_netdev(vsi);
3184        if (status) {
3185                status = -ENODEV;
3186                goto unroll_vsi_setup;
3187        }
3188        /* netdev has to be configured before setting frame size */
3189        ice_vsi_cfg_frame_size(vsi);
3190
3191        /* Setup DCB netlink interface */
3192        ice_dcbnl_setup(vsi);
3193
3194        /* registering the NAPI handler requires both the queues and
3195         * netdev to be created, which are done in ice_pf_vsi_setup()
3196         * and ice_cfg_netdev() respectively
3197         */
3198        ice_napi_add(vsi);
3199
3200        status = ice_set_cpu_rx_rmap(vsi);
3201        if (status) {
3202                dev_err(ice_pf_to_dev(pf), "Failed to set CPU Rx map VSI %d error %d\n",
3203                        vsi->vsi_num, status);
3204                status = -EINVAL;
3205                goto unroll_napi_add;
3206        }
3207        status = ice_init_mac_fltr(pf);
3208        if (status)
3209                goto free_cpu_rx_map;
3210
3211        return status;
3212
3213free_cpu_rx_map:
3214        ice_free_cpu_rx_rmap(vsi);
3215
3216unroll_napi_add:
3217        if (vsi) {
3218                ice_napi_del(vsi);
3219                if (vsi->netdev) {
3220                        clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
3221                        free_netdev(vsi->netdev);
3222                        vsi->netdev = NULL;
3223                }
3224        }
3225
3226unroll_vsi_setup:
3227        ice_vsi_release(vsi);
3228        return status;
3229}
3230
3231/**
3232 * ice_get_avail_q_count - Get count of queues in use
3233 * @pf_qmap: bitmap to get queue use count from
3234 * @lock: pointer to a mutex that protects access to pf_qmap
3235 * @size: size of the bitmap
3236 */
3237static u16
3238ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
3239{
3240        unsigned long bit;
3241        u16 count = 0;
3242
3243        mutex_lock(lock);
3244        for_each_clear_bit(bit, pf_qmap, size)
3245                count++;
3246        mutex_unlock(lock);
3247
3248        return count;
3249}
3250
3251/**
3252 * ice_get_avail_txq_count - Get count of Tx queues in use
3253 * @pf: pointer to an ice_pf instance
3254 */
3255u16 ice_get_avail_txq_count(struct ice_pf *pf)
3256{
3257        return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
3258                                     pf->max_pf_txqs);
3259}
3260
3261/**
3262 * ice_get_avail_rxq_count - Get count of Rx queues in use
3263 * @pf: pointer to an ice_pf instance
3264 */
3265u16 ice_get_avail_rxq_count(struct ice_pf *pf)
3266{
3267        return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
3268                                     pf->max_pf_rxqs);
3269}
3270
3271/**
3272 * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
3273 * @pf: board private structure to initialize
3274 */
3275static void ice_deinit_pf(struct ice_pf *pf)
3276{
3277        ice_service_task_stop(pf);
3278        mutex_destroy(&pf->sw_mutex);
3279        mutex_destroy(&pf->tc_mutex);
3280        mutex_destroy(&pf->avail_q_mutex);
3281
3282        if (pf->avail_txqs) {
3283                bitmap_free(pf->avail_txqs);
3284                pf->avail_txqs = NULL;
3285        }
3286
3287        if (pf->avail_rxqs) {
3288                bitmap_free(pf->avail_rxqs);
3289                pf->avail_rxqs = NULL;
3290        }
3291}
3292
3293/**
3294 * ice_set_pf_caps - set PFs capability flags
3295 * @pf: pointer to the PF instance
3296 */
3297static void ice_set_pf_caps(struct ice_pf *pf)
3298{
3299        struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
3300
3301        clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
3302        if (func_caps->common_cap.dcb)
3303                set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
3304        clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
3305        if (func_caps->common_cap.sr_iov_1_1) {
3306                set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
3307                pf->num_vfs_supported = min_t(int, func_caps->num_allocd_vfs,
3308                                              ICE_MAX_VF_COUNT);
3309        }
3310        clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
3311        if (func_caps->common_cap.rss_table_size)
3312                set_bit(ICE_FLAG_RSS_ENA, pf->flags);
3313
3314        clear_bit(ICE_FLAG_FD_ENA, pf->flags);
3315        if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) {
3316                u16 unused;
3317
3318                /* ctrl_vsi_idx will be set to a valid value when flow director
3319                 * is setup by ice_init_fdir
3320                 */
3321                pf->ctrl_vsi_idx = ICE_NO_VSI;
3322                set_bit(ICE_FLAG_FD_ENA, pf->flags);
3323                /* force guaranteed filter pool for PF */
3324                ice_alloc_fd_guar_item(&pf->hw, &unused,
3325                                       func_caps->fd_fltr_guar);
3326                /* force shared filter pool for PF */
3327                ice_alloc_fd_shrd_item(&pf->hw, &unused,
3328                                       func_caps->fd_fltr_best_effort);
3329        }
3330
3331        pf->max_pf_txqs = func_caps->common_cap.num_txq;
3332        pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
3333}
3334
3335/**
3336 * ice_init_pf - Initialize general software structures (struct ice_pf)
3337 * @pf: board private structure to initialize
3338 */
3339static int ice_init_pf(struct ice_pf *pf)
3340{
3341        ice_set_pf_caps(pf);
3342
3343        mutex_init(&pf->sw_mutex);
3344        mutex_init(&pf->tc_mutex);
3345
3346        INIT_HLIST_HEAD(&pf->aq_wait_list);
3347        spin_lock_init(&pf->aq_wait_lock);
3348        init_waitqueue_head(&pf->aq_wait_queue);
3349
3350        /* setup service timer and periodic service task */
3351        timer_setup(&pf->serv_tmr, ice_service_timer, 0);
3352        pf->serv_tmr_period = HZ;
3353        INIT_WORK(&pf->serv_task, ice_service_task);
3354        clear_bit(ICE_SERVICE_SCHED, pf->state);
3355
3356        mutex_init(&pf->avail_q_mutex);
3357        pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
3358        if (!pf->avail_txqs)
3359                return -ENOMEM;
3360
3361        pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
3362        if (!pf->avail_rxqs) {
3363                devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs);
3364                pf->avail_txqs = NULL;
3365                return -ENOMEM;
3366        }
3367
3368        return 0;
3369}
3370
3371/**
3372 * ice_ena_msix_range - Request a range of MSIX vectors from the OS
3373 * @pf: board private structure
3374 *
3375 * compute the number of MSIX vectors required (v_budget) and request from
3376 * the OS. Return the number of vectors reserved or negative on failure
3377 */
3378static int ice_ena_msix_range(struct ice_pf *pf)
3379{
3380        int v_left, v_actual, v_other, v_budget = 0;
3381        struct device *dev = ice_pf_to_dev(pf);
3382        int needed, err, i;
3383
3384        v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
3385
3386        /* reserve for LAN miscellaneous handler */
3387        needed = ICE_MIN_LAN_OICR_MSIX;
3388        if (v_left < needed)
3389                goto no_hw_vecs_left_err;
3390        v_budget += needed;
3391        v_left -= needed;
3392
3393        /* reserve for flow director */
3394        if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
3395                needed = ICE_FDIR_MSIX;
3396                if (v_left < needed)
3397                        goto no_hw_vecs_left_err;
3398                v_budget += needed;
3399                v_left -= needed;
3400        }
3401
3402        /* total used for non-traffic vectors */
3403        v_other = v_budget;
3404
3405        /* reserve vectors for LAN traffic */
3406        needed = min_t(int, num_online_cpus(), v_left);
3407        if (v_left < needed)
3408                goto no_hw_vecs_left_err;
3409        pf->num_lan_msix = needed;
3410        v_budget += needed;
3411        v_left -= needed;
3412
3413        pf->msix_entries = devm_kcalloc(dev, v_budget,
3414                                        sizeof(*pf->msix_entries), GFP_KERNEL);
3415        if (!pf->msix_entries) {
3416                err = -ENOMEM;
3417                goto exit_err;
3418        }
3419
3420        for (i = 0; i < v_budget; i++)
3421                pf->msix_entries[i].entry = i;
3422
3423        /* actually reserve the vectors */
3424        v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries,
3425                                         ICE_MIN_MSIX, v_budget);
3426        if (v_actual < 0) {
3427                dev_err(dev, "unable to reserve MSI-X vectors\n");
3428                err = v_actual;
3429                goto msix_err;
3430        }
3431
3432        if (v_actual < v_budget) {
3433                dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
3434                         v_budget, v_actual);
3435
3436                if (v_actual < ICE_MIN_MSIX) {
3437                        /* error if we can't get minimum vectors */
3438                        pci_disable_msix(pf->pdev);
3439                        err = -ERANGE;
3440                        goto msix_err;
3441                } else {
3442                        int v_traffic = v_actual - v_other;
3443
3444                        if (v_actual == ICE_MIN_MSIX ||
3445                            v_traffic < ICE_MIN_LAN_TXRX_MSIX)
3446                                pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
3447                        else
3448                                pf->num_lan_msix = v_traffic;
3449
3450                        dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
3451                                   pf->num_lan_msix);
3452                }
3453        }
3454
3455        return v_actual;
3456
3457msix_err:
3458        devm_kfree(dev, pf->msix_entries);
3459        goto exit_err;
3460
3461no_hw_vecs_left_err:
3462        dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n",
3463                needed, v_left);
3464        err = -ERANGE;
3465exit_err:
3466        pf->num_lan_msix = 0;
3467        return err;
3468}
3469
3470/**
3471 * ice_dis_msix - Disable MSI-X interrupt setup in OS
3472 * @pf: board private structure
3473 */
3474static void ice_dis_msix(struct ice_pf *pf)
3475{
3476        pci_disable_msix(pf->pdev);
3477        devm_kfree(ice_pf_to_dev(pf), pf->msix_entries);
3478        pf->msix_entries = NULL;
3479}
3480
3481/**
3482 * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
3483 * @pf: board private structure
3484 */
3485static void ice_clear_interrupt_scheme(struct ice_pf *pf)
3486{
3487        ice_dis_msix(pf);
3488
3489        if (pf->irq_tracker) {
3490                devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker);
3491                pf->irq_tracker = NULL;
3492        }
3493}
3494
3495/**
3496 * ice_init_interrupt_scheme - Determine proper interrupt scheme
3497 * @pf: board private structure to initialize
3498 */
3499static int ice_init_interrupt_scheme(struct ice_pf *pf)
3500{
3501        int vectors;
3502
3503        vectors = ice_ena_msix_range(pf);
3504
3505        if (vectors < 0)
3506                return vectors;
3507
3508        /* set up vector assignment tracking */
3509        pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf),
3510                                       struct_size(pf->irq_tracker, list, vectors),
3511                                       GFP_KERNEL);
3512        if (!pf->irq_tracker) {
3513                ice_dis_msix(pf);
3514                return -ENOMEM;
3515        }
3516
3517        /* populate SW interrupts pool with number of OS granted IRQs. */
3518        pf->num_avail_sw_msix = (u16)vectors;
3519        pf->irq_tracker->num_entries = (u16)vectors;
3520        pf->irq_tracker->end = pf->irq_tracker->num_entries;
3521
3522        return 0;
3523}
3524
3525/**
3526 * ice_is_wol_supported - check if WoL is supported
3527 * @hw: pointer to hardware info
3528 *
3529 * Check if WoL is supported based on the HW configuration.
3530 * Returns true if NVM supports and enables WoL for this port, false otherwise
3531 */
3532bool ice_is_wol_supported(struct ice_hw *hw)
3533{
3534        u16 wol_ctrl;
3535
3536        /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
3537         * word) indicates WoL is not supported on the corresponding PF ID.
3538         */
3539        if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
3540                return false;
3541
3542        return !(BIT(hw->port_info->lport) & wol_ctrl);
3543}
3544
3545/**
3546 * ice_vsi_recfg_qs - Change the number of queues on a VSI
3547 * @vsi: VSI being changed
3548 * @new_rx: new number of Rx queues
3549 * @new_tx: new number of Tx queues
3550 *
3551 * Only change the number of queues if new_tx, or new_rx is non-0.
3552 *
3553 * Returns 0 on success.
3554 */
3555int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
3556{
3557        struct ice_pf *pf = vsi->back;
3558        int err = 0, timeout = 50;
3559
3560        if (!new_rx && !new_tx)
3561                return -EINVAL;
3562
3563        while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
3564                timeout--;
3565                if (!timeout)
3566                        return -EBUSY;
3567                usleep_range(1000, 2000);
3568        }
3569
3570        if (new_tx)
3571                vsi->req_txq = (u16)new_tx;
3572        if (new_rx)
3573                vsi->req_rxq = (u16)new_rx;
3574
3575        /* set for the next time the netdev is started */
3576        if (!netif_running(vsi->netdev)) {
3577                ice_vsi_rebuild(vsi, false);
3578                dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
3579                goto done;
3580        }
3581
3582        ice_vsi_close(vsi);
3583        ice_vsi_rebuild(vsi, false);
3584        ice_pf_dcb_recfg(pf);
3585        ice_vsi_open(vsi);
3586done:
3587        clear_bit(ICE_CFG_BUSY, pf->state);
3588        return err;
3589}
3590
3591/**
3592 * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
3593 * @pf: PF to configure
3594 *
3595 * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
3596 * VSI can still Tx/Rx VLAN tagged packets.
3597 */
3598static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
3599{
3600        struct ice_vsi *vsi = ice_get_main_vsi(pf);
3601        struct ice_vsi_ctx *ctxt;
3602        enum ice_status status;
3603        struct ice_hw *hw;
3604
3605        if (!vsi)
3606                return;
3607
3608        ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
3609        if (!ctxt)
3610                return;
3611
3612        hw = &pf->hw;
3613        ctxt->info = vsi->info;
3614
3615        ctxt->info.valid_sections =
3616                cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
3617                            ICE_AQ_VSI_PROP_SECURITY_VALID |
3618                            ICE_AQ_VSI_PROP_SW_VALID);
3619
3620        /* disable VLAN anti-spoof */
3621        ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
3622                                  ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
3623
3624        /* disable VLAN pruning and keep all other settings */
3625        ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
3626
3627        /* allow all VLANs on Tx and don't strip on Rx */
3628        ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_ALL |
3629                ICE_AQ_VSI_VLAN_EMOD_NOTHING;
3630
3631        status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
3632        if (status) {
3633                dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %s aq_err %s\n",
3634                        ice_stat_str(status),
3635                        ice_aq_str(hw->adminq.sq_last_status));
3636        } else {
3637                vsi->info.sec_flags = ctxt->info.sec_flags;
3638                vsi->info.sw_flags2 = ctxt->info.sw_flags2;
3639                vsi->info.vlan_flags = ctxt->info.vlan_flags;
3640        }
3641
3642        kfree(ctxt);
3643}
3644
3645/**
3646 * ice_log_pkg_init - log result of DDP package load
3647 * @hw: pointer to hardware info
3648 * @status: status of package load
3649 */
3650static void
3651ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
3652{
3653        struct ice_pf *pf = (struct ice_pf *)hw->back;
3654        struct device *dev = ice_pf_to_dev(pf);
3655
3656        switch (*status) {
3657        case ICE_SUCCESS:
3658                /* The package download AdminQ command returned success because
3659                 * this download succeeded or ICE_ERR_AQ_NO_WORK since there is
3660                 * already a package loaded on the device.
3661                 */
3662                if (hw->pkg_ver.major == hw->active_pkg_ver.major &&
3663                    hw->pkg_ver.minor == hw->active_pkg_ver.minor &&
3664                    hw->pkg_ver.update == hw->active_pkg_ver.update &&
3665                    hw->pkg_ver.draft == hw->active_pkg_ver.draft &&
3666                    !memcmp(hw->pkg_name, hw->active_pkg_name,
3667                            sizeof(hw->pkg_name))) {
3668                        if (hw->pkg_dwnld_status == ICE_AQ_RC_EEXIST)
3669                                dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
3670                                         hw->active_pkg_name,
3671                                         hw->active_pkg_ver.major,
3672                                         hw->active_pkg_ver.minor,
3673                                         hw->active_pkg_ver.update,
3674                                         hw->active_pkg_ver.draft);
3675                        else
3676                                dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
3677                                         hw->active_pkg_name,
3678                                         hw->active_pkg_ver.major,
3679                                         hw->active_pkg_ver.minor,
3680                                         hw->active_pkg_ver.update,
3681                                         hw->active_pkg_ver.draft);
3682                } else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ ||
3683                           hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) {
3684                        dev_err(dev, "The device has a DDP package that is not supported by the driver.  The device has package '%s' version %d.%d.x.x.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
3685                                hw->active_pkg_name,
3686                                hw->active_pkg_ver.major,
3687                                hw->active_pkg_ver.minor,
3688                                ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
3689                        *status = ICE_ERR_NOT_SUPPORTED;
3690                } else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
3691                           hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) {
3692                        dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package '%s' version %d.%d.%d.%d.  The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
3693                                 hw->active_pkg_name,
3694                                 hw->active_pkg_ver.major,
3695                                 hw->active_pkg_ver.minor,
3696                                 hw->active_pkg_ver.update,
3697                                 hw->active_pkg_ver.draft,
3698                                 hw->pkg_name,
3699                                 hw->pkg_ver.major,
3700                                 hw->pkg_ver.minor,
3701                                 hw->pkg_ver.update,
3702                                 hw->pkg_ver.draft);
3703                } else {
3704                        dev_err(dev, "An unknown error occurred when loading the DDP package, please reboot the system.  If the problem persists, update the NVM.  Entering Safe Mode.\n");
3705                        *status = ICE_ERR_NOT_SUPPORTED;
3706                }
3707                break;
3708        case ICE_ERR_FW_DDP_MISMATCH:
3709                dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
3710                break;
3711        case ICE_ERR_BUF_TOO_SHORT:
3712        case ICE_ERR_CFG:
3713                dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
3714                break;
3715        case ICE_ERR_NOT_SUPPORTED:
3716                /* Package File version not supported */
3717                if (hw->pkg_ver.major > ICE_PKG_SUPP_VER_MAJ ||
3718                    (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
3719                     hw->pkg_ver.minor > ICE_PKG_SUPP_VER_MNR))
3720                        dev_err(dev, "The DDP package file version is higher than the driver supports.  Please use an updated driver.  Entering Safe Mode.\n");
3721                else if (hw->pkg_ver.major < ICE_PKG_SUPP_VER_MAJ ||
3722                         (hw->pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
3723                          hw->pkg_ver.minor < ICE_PKG_SUPP_VER_MNR))
3724                        dev_err(dev, "The DDP package file version is lower than the driver supports.  The driver requires version %d.%d.x.x.  Please use an updated DDP Package file.  Entering Safe Mode.\n",
3725                                ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
3726                break;
3727        case ICE_ERR_AQ_ERROR:
3728                switch (hw->pkg_dwnld_status) {
3729                case ICE_AQ_RC_ENOSEC:
3730                case ICE_AQ_RC_EBADSIG:
3731                        dev_err(dev, "The DDP package could not be loaded because its signature is not valid.  Please use a valid DDP Package.  Entering Safe Mode.\n");
3732                        return;
3733                case ICE_AQ_RC_ESVN:
3734                        dev_err(dev, "The DDP Package could not be loaded because its security revision is too low.  Please use an updated DDP Package.  Entering Safe Mode.\n");
3735                        return;
3736                case ICE_AQ_RC_EBADMAN:
3737                case ICE_AQ_RC_EBADBUF:
3738                        dev_err(dev, "An error occurred on the device while loading the DDP package.  The device will be reset.\n");
3739                        /* poll for reset to complete */
3740                        if (ice_check_reset(hw))
3741                                dev_err(dev, "Error resetting device. Please reload the driver\n");
3742                        return;
3743                default:
3744                        break;
3745                }
3746                fallthrough;
3747        default:
3748                dev_err(dev, "An unknown error (%d) occurred when loading the DDP package.  Entering Safe Mode.\n",
3749                        *status);
3750                break;
3751        }
3752}
3753
3754/**
3755 * ice_load_pkg - load/reload the DDP Package file
3756 * @firmware: firmware structure when firmware requested or NULL for reload
3757 * @pf: pointer to the PF instance
3758 *
3759 * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
3760 * initialize HW tables.
3761 */
3762static void
3763ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
3764{
3765        enum ice_status status = ICE_ERR_PARAM;
3766        struct device *dev = ice_pf_to_dev(pf);
3767        struct ice_hw *hw = &pf->hw;
3768
3769        /* Load DDP Package */
3770        if (firmware && !hw->pkg_copy) {
3771                status = ice_copy_and_init_pkg(hw, firmware->data,
3772                                               firmware->size);
3773                ice_log_pkg_init(hw, &status);
3774        } else if (!firmware && hw->pkg_copy) {
3775                /* Reload package during rebuild after CORER/GLOBR reset */
3776                status = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
3777                ice_log_pkg_init(hw, &status);
3778        } else {
3779                dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
3780        }
3781
3782        if (status) {
3783                /* Safe Mode */
3784                clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
3785                return;
3786        }
3787
3788        /* Successful download package is the precondition for advanced
3789         * features, hence setting the ICE_FLAG_ADV_FEATURES flag
3790         */
3791        set_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
3792}
3793
3794/**
3795 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
3796 * @pf: pointer to the PF structure
3797 *
3798 * There is no error returned here because the driver should be able to handle
3799 * 128 Byte cache lines, so we only print a warning in case issues are seen,
3800 * specifically with Tx.
3801 */
3802static void ice_verify_cacheline_size(struct ice_pf *pf)
3803{
3804        if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
3805                dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
3806                         ICE_CACHE_LINE_BYTES);
3807}
3808
3809/**
3810 * ice_send_version - update firmware with driver version
3811 * @pf: PF struct
3812 *
3813 * Returns ICE_SUCCESS on success, else error code
3814 */
3815static enum ice_status ice_send_version(struct ice_pf *pf)
3816{
3817        struct ice_driver_ver dv;
3818
3819        dv.major_ver = 0xff;
3820        dv.minor_ver = 0xff;
3821        dv.build_ver = 0xff;
3822        dv.subbuild_ver = 0;
3823        strscpy((char *)dv.driver_string, UTS_RELEASE,
3824                sizeof(dv.driver_string));
3825        return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
3826}
3827
3828/**
3829 * ice_init_fdir - Initialize flow director VSI and configuration
3830 * @pf: pointer to the PF instance
3831 *
3832 * returns 0 on success, negative on error
3833 */
3834static int ice_init_fdir(struct ice_pf *pf)
3835{
3836        struct device *dev = ice_pf_to_dev(pf);
3837        struct ice_vsi *ctrl_vsi;
3838        int err;
3839
3840        /* Side Band Flow Director needs to have a control VSI.
3841         * Allocate it and store it in the PF.
3842         */
3843        ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info);
3844        if (!ctrl_vsi) {
3845                dev_dbg(dev, "could not create control VSI\n");
3846                return -ENOMEM;
3847        }
3848
3849        err = ice_vsi_open_ctrl(ctrl_vsi);
3850        if (err) {
3851                dev_dbg(dev, "could not open control VSI\n");
3852                goto err_vsi_open;
3853        }
3854
3855        mutex_init(&pf->hw.fdir_fltr_lock);
3856
3857        err = ice_fdir_create_dflt_rules(pf);
3858        if (err)
3859                goto err_fdir_rule;
3860
3861        return 0;
3862
3863err_fdir_rule:
3864        ice_fdir_release_flows(&pf->hw);
3865        ice_vsi_close(ctrl_vsi);
3866err_vsi_open:
3867        ice_vsi_release(ctrl_vsi);
3868        if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
3869                pf->vsi[pf->ctrl_vsi_idx] = NULL;
3870                pf->ctrl_vsi_idx = ICE_NO_VSI;
3871        }
3872        return err;
3873}
3874
3875/**
3876 * ice_get_opt_fw_name - return optional firmware file name or NULL
3877 * @pf: pointer to the PF instance
3878 */
3879static char *ice_get_opt_fw_name(struct ice_pf *pf)
3880{
3881        /* Optional firmware name same as default with additional dash
3882         * followed by a EUI-64 identifier (PCIe Device Serial Number)
3883         */
3884        struct pci_dev *pdev = pf->pdev;
3885        char *opt_fw_filename;
3886        u64 dsn;
3887
3888        /* Determine the name of the optional file using the DSN (two
3889         * dwords following the start of the DSN Capability).
3890         */
3891        dsn = pci_get_dsn(pdev);
3892        if (!dsn)
3893                return NULL;
3894
3895        opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
3896        if (!opt_fw_filename)
3897                return NULL;
3898
3899        snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg",
3900                 ICE_DDP_PKG_PATH, dsn);
3901
3902        return opt_fw_filename;
3903}
3904
3905/**
3906 * ice_request_fw - Device initialization routine
3907 * @pf: pointer to the PF instance
3908 */
3909static void ice_request_fw(struct ice_pf *pf)
3910{
3911        char *opt_fw_filename = ice_get_opt_fw_name(pf);
3912        const struct firmware *firmware = NULL;
3913        struct device *dev = ice_pf_to_dev(pf);
3914        int err = 0;
3915
3916        /* optional device-specific DDP (if present) overrides the default DDP
3917         * package file. kernel logs a debug message if the file doesn't exist,
3918         * and warning messages for other errors.
3919         */
3920        if (opt_fw_filename) {
3921                err = firmware_request_nowarn(&firmware, opt_fw_filename, dev);
3922                if (err) {
3923                        kfree(opt_fw_filename);
3924                        goto dflt_pkg_load;
3925                }
3926
3927                /* request for firmware was successful. Download to device */
3928                ice_load_pkg(firmware, pf);
3929                kfree(opt_fw_filename);
3930                release_firmware(firmware);
3931                return;
3932        }
3933
3934dflt_pkg_load:
3935        err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev);
3936        if (err) {
3937                dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
3938                return;
3939        }
3940
3941        /* request for firmware was successful. Download to device */
3942        ice_load_pkg(firmware, pf);
3943        release_firmware(firmware);
3944}
3945
3946/**
3947 * ice_print_wake_reason - show the wake up cause in the log
3948 * @pf: pointer to the PF struct
3949 */
3950static void ice_print_wake_reason(struct ice_pf *pf)
3951{
3952        u32 wus = pf->wakeup_reason;
3953        const char *wake_str;
3954
3955        /* if no wake event, nothing to print */
3956        if (!wus)
3957                return;
3958
3959        if (wus & PFPM_WUS_LNKC_M)
3960                wake_str = "Link\n";
3961        else if (wus & PFPM_WUS_MAG_M)
3962                wake_str = "Magic Packet\n";
3963        else if (wus & PFPM_WUS_MNG_M)
3964                wake_str = "Management\n";
3965        else if (wus & PFPM_WUS_FW_RST_WK_M)
3966                wake_str = "Firmware Reset\n";
3967        else
3968                wake_str = "Unknown\n";
3969
3970        dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
3971}
3972
3973/**
3974 * ice_register_netdev - register netdev and devlink port
3975 * @pf: pointer to the PF struct
3976 */
3977static int ice_register_netdev(struct ice_pf *pf)
3978{
3979        struct ice_vsi *vsi;
3980        int err = 0;
3981
3982        vsi = ice_get_main_vsi(pf);
3983        if (!vsi || !vsi->netdev)
3984                return -EIO;
3985
3986        err = register_netdev(vsi->netdev);
3987        if (err)
3988                goto err_register_netdev;
3989
3990        set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
3991        netif_carrier_off(vsi->netdev);
3992        netif_tx_stop_all_queues(vsi->netdev);
3993        err = ice_devlink_create_port(vsi);
3994        if (err)
3995                goto err_devlink_create;
3996
3997        devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
3998
3999        return 0;
4000err_devlink_create:
4001        unregister_netdev(vsi->netdev);
4002        clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
4003err_register_netdev:
4004        free_netdev(vsi->netdev);
4005        vsi->netdev = NULL;
4006        clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
4007        return err;
4008}
4009
4010/**
4011 * ice_probe - Device initialization routine
4012 * @pdev: PCI device information struct
4013 * @ent: entry in ice_pci_tbl
4014 *
4015 * Returns 0 on success, negative on failure
4016 */
4017static int
4018ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
4019{
4020        struct device *dev = &pdev->dev;
4021        struct ice_pf *pf;
4022        struct ice_hw *hw;
4023        int i, err;
4024
4025        if (pdev->is_virtfn) {
4026                dev_err(dev, "can't probe a virtual function\n");
4027                return -EINVAL;
4028        }
4029
4030        /* this driver uses devres, see
4031         * Documentation/driver-api/driver-model/devres.rst
4032         */
4033        err = pcim_enable_device(pdev);
4034        if (err)
4035                return err;
4036
4037        err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev));
4038        if (err) {
4039                dev_err(dev, "BAR0 I/O map error %d\n", err);
4040                return err;
4041        }
4042
4043        pf = ice_allocate_pf(dev);
4044        if (!pf)
4045                return -ENOMEM;
4046
4047        /* set up for high or low DMA */
4048        err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
4049        if (err)
4050                err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
4051        if (err) {
4052                dev_err(dev, "DMA configuration failed: 0x%x\n", err);
4053                return err;
4054        }
4055
4056        pci_enable_pcie_error_reporting(pdev);
4057        pci_set_master(pdev);
4058
4059        pf->pdev = pdev;
4060        pci_set_drvdata(pdev, pf);
4061        set_bit(ICE_DOWN, pf->state);
4062        /* Disable service task until DOWN bit is cleared */
4063        set_bit(ICE_SERVICE_DIS, pf->state);
4064
4065        hw = &pf->hw;
4066        hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
4067        pci_save_state(pdev);
4068
4069        hw->back =