linux/drivers/net/ethernet/intel/igb/igb_main.c
<<
>>
Prefs
   1/*******************************************************************************
   2
   3  Intel(R) Gigabit Ethernet Linux driver
   4  Copyright(c) 2007-2011 Intel Corporation.
   5
   6  This program is free software; you can redistribute it and/or modify it
   7  under the terms and conditions of the GNU General Public License,
   8  version 2, as published by the Free Software Foundation.
   9
  10  This program is distributed in the hope it will be useful, but WITHOUT
  11  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13  more details.
  14
  15  You should have received a copy of the GNU General Public License along with
  16  this program; if not, write to the Free Software Foundation, Inc.,
  17  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  18
  19  The full GNU General Public License is included in this distribution in
  20  the file called "COPYING".
  21
  22  Contact Information:
  23  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
  24  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  25
  26*******************************************************************************/
  27
  28#include <linux/module.h>
  29#include <linux/types.h>
  30#include <linux/init.h>
  31#include <linux/bitops.h>
  32#include <linux/vmalloc.h>
  33#include <linux/pagemap.h>
  34#include <linux/netdevice.h>
  35#include <linux/ipv6.h>
  36#include <linux/slab.h>
  37#include <net/checksum.h>
  38#include <net/ip6_checksum.h>
  39#include <linux/net_tstamp.h>
  40#include <linux/mii.h>
  41#include <linux/ethtool.h>
  42#include <linux/if.h>
  43#include <linux/if_vlan.h>
  44#include <linux/pci.h>
  45#include <linux/pci-aspm.h>
  46#include <linux/delay.h>
  47#include <linux/interrupt.h>
  48#include <linux/ip.h>
  49#include <linux/tcp.h>
  50#include <linux/sctp.h>
  51#include <linux/if_ether.h>
  52#include <linux/aer.h>
  53#include <linux/prefetch.h>
  54#ifdef CONFIG_IGB_DCA
  55#include <linux/dca.h>
  56#endif
  57#include "igb.h"
  58
  59#define MAJ 3
  60#define MIN 2
  61#define BUILD 10
  62#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
  63__stringify(BUILD) "-k"
  64char igb_driver_name[] = "igb";
  65char igb_driver_version[] = DRV_VERSION;
  66static const char igb_driver_string[] =
  67                                "Intel(R) Gigabit Ethernet Network Driver";
  68static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
  69
  70static const struct e1000_info *igb_info_tbl[] = {
  71        [board_82575] = &e1000_82575_info,
  72};
  73
  74static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
  75        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
  76        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
  77        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
  78        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
  79        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
  80        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
  81        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
  82        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
  83        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
  84        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
  85        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
  86        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
  87        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
  88        { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
  89        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
  90        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
  91        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
  92        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
  93        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
  94        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
  95        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
  96        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
  97        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
  98        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
  99        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
 100        /* required last entry */
 101        {0, }
 102};
 103
 104MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
 105
 106void igb_reset(struct igb_adapter *);
 107static int igb_setup_all_tx_resources(struct igb_adapter *);
 108static int igb_setup_all_rx_resources(struct igb_adapter *);
 109static void igb_free_all_tx_resources(struct igb_adapter *);
 110static void igb_free_all_rx_resources(struct igb_adapter *);
 111static void igb_setup_mrqc(struct igb_adapter *);
 112static int igb_probe(struct pci_dev *, const struct pci_device_id *);
 113static void __devexit igb_remove(struct pci_dev *pdev);
 114static void igb_init_hw_timer(struct igb_adapter *adapter);
 115static int igb_sw_init(struct igb_adapter *);
 116static int igb_open(struct net_device *);
 117static int igb_close(struct net_device *);
 118static void igb_configure_tx(struct igb_adapter *);
 119static void igb_configure_rx(struct igb_adapter *);
 120static void igb_clean_all_tx_rings(struct igb_adapter *);
 121static void igb_clean_all_rx_rings(struct igb_adapter *);
 122static void igb_clean_tx_ring(struct igb_ring *);
 123static void igb_clean_rx_ring(struct igb_ring *);
 124static void igb_set_rx_mode(struct net_device *);
 125static void igb_update_phy_info(unsigned long);
 126static void igb_watchdog(unsigned long);
 127static void igb_watchdog_task(struct work_struct *);
 128static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
 129static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
 130                                                 struct rtnl_link_stats64 *stats);
 131static int igb_change_mtu(struct net_device *, int);
 132static int igb_set_mac(struct net_device *, void *);
 133static void igb_set_uta(struct igb_adapter *adapter);
 134static irqreturn_t igb_intr(int irq, void *);
 135static irqreturn_t igb_intr_msi(int irq, void *);
 136static irqreturn_t igb_msix_other(int irq, void *);
 137static irqreturn_t igb_msix_ring(int irq, void *);
 138#ifdef CONFIG_IGB_DCA
 139static void igb_update_dca(struct igb_q_vector *);
 140static void igb_setup_dca(struct igb_adapter *);
 141#endif /* CONFIG_IGB_DCA */
 142static int igb_poll(struct napi_struct *, int);
 143static bool igb_clean_tx_irq(struct igb_q_vector *);
 144static bool igb_clean_rx_irq(struct igb_q_vector *, int);
 145static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 146static void igb_tx_timeout(struct net_device *);
 147static void igb_reset_task(struct work_struct *);
 148static void igb_vlan_mode(struct net_device *netdev, u32 features);
 149static void igb_vlan_rx_add_vid(struct net_device *, u16);
 150static void igb_vlan_rx_kill_vid(struct net_device *, u16);
 151static void igb_restore_vlan(struct igb_adapter *);
 152static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
 153static void igb_ping_all_vfs(struct igb_adapter *);
 154static void igb_msg_task(struct igb_adapter *);
 155static void igb_vmm_control(struct igb_adapter *);
 156static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
 157static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 158static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 159static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 160                               int vf, u16 vlan, u8 qos);
 161static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
 162static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 163                                 struct ifla_vf_info *ivi);
 164static void igb_check_vf_rate_limit(struct igb_adapter *);
 165
 166#ifdef CONFIG_PCI_IOV
 167static int igb_vf_configure(struct igb_adapter *adapter, int vf);
 168static int igb_find_enabled_vfs(struct igb_adapter *adapter);
 169static int igb_check_vf_assignment(struct igb_adapter *adapter);
 170#endif
 171
 172#ifdef CONFIG_PM
 173static int igb_suspend(struct pci_dev *, pm_message_t);
 174static int igb_resume(struct pci_dev *);
 175#endif
 176static void igb_shutdown(struct pci_dev *);
 177#ifdef CONFIG_IGB_DCA
 178static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
 179static struct notifier_block dca_notifier = {
 180        .notifier_call  = igb_notify_dca,
 181        .next           = NULL,
 182        .priority       = 0
 183};
 184#endif
 185#ifdef CONFIG_NET_POLL_CONTROLLER
 186/* for netdump / net console */
 187static void igb_netpoll(struct net_device *);
 188#endif
 189#ifdef CONFIG_PCI_IOV
 190static unsigned int max_vfs = 0;
 191module_param(max_vfs, uint, 0);
 192MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
 193                 "per physical function");
 194#endif /* CONFIG_PCI_IOV */
 195
 196static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
 197                     pci_channel_state_t);
 198static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
 199static void igb_io_resume(struct pci_dev *);
 200
 201static struct pci_error_handlers igb_err_handler = {
 202        .error_detected = igb_io_error_detected,
 203        .slot_reset = igb_io_slot_reset,
 204        .resume = igb_io_resume,
 205};
 206
 207static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
 208
 209static struct pci_driver igb_driver = {
 210        .name     = igb_driver_name,
 211        .id_table = igb_pci_tbl,
 212        .probe    = igb_probe,
 213        .remove   = __devexit_p(igb_remove),
 214#ifdef CONFIG_PM
 215        /* Power Management Hooks */
 216        .suspend  = igb_suspend,
 217        .resume   = igb_resume,
 218#endif
 219        .shutdown = igb_shutdown,
 220        .err_handler = &igb_err_handler
 221};
 222
 223MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 224MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
 225MODULE_LICENSE("GPL");
 226MODULE_VERSION(DRV_VERSION);
 227
 228struct igb_reg_info {
 229        u32 ofs;
 230        char *name;
 231};
 232
 233static const struct igb_reg_info igb_reg_info_tbl[] = {
 234
 235        /* General Registers */
 236        {E1000_CTRL, "CTRL"},
 237        {E1000_STATUS, "STATUS"},
 238        {E1000_CTRL_EXT, "CTRL_EXT"},
 239
 240        /* Interrupt Registers */
 241        {E1000_ICR, "ICR"},
 242
 243        /* RX Registers */
 244        {E1000_RCTL, "RCTL"},
 245        {E1000_RDLEN(0), "RDLEN"},
 246        {E1000_RDH(0), "RDH"},
 247        {E1000_RDT(0), "RDT"},
 248        {E1000_RXDCTL(0), "RXDCTL"},
 249        {E1000_RDBAL(0), "RDBAL"},
 250        {E1000_RDBAH(0), "RDBAH"},
 251
 252        /* TX Registers */
 253        {E1000_TCTL, "TCTL"},
 254        {E1000_TDBAL(0), "TDBAL"},
 255        {E1000_TDBAH(0), "TDBAH"},
 256        {E1000_TDLEN(0), "TDLEN"},
 257        {E1000_TDH(0), "TDH"},
 258        {E1000_TDT(0), "TDT"},
 259        {E1000_TXDCTL(0), "TXDCTL"},
 260        {E1000_TDFH, "TDFH"},
 261        {E1000_TDFT, "TDFT"},
 262        {E1000_TDFHS, "TDFHS"},
 263        {E1000_TDFPC, "TDFPC"},
 264
 265        /* List Terminator */
 266        {}
 267};
 268
 269/*
 270 * igb_regdump - register printout routine
 271 */
 272static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
 273{
 274        int n = 0;
 275        char rname[16];
 276        u32 regs[8];
 277
 278        switch (reginfo->ofs) {
 279        case E1000_RDLEN(0):
 280                for (n = 0; n < 4; n++)
 281                        regs[n] = rd32(E1000_RDLEN(n));
 282                break;
 283        case E1000_RDH(0):
 284                for (n = 0; n < 4; n++)
 285                        regs[n] = rd32(E1000_RDH(n));
 286                break;
 287        case E1000_RDT(0):
 288                for (n = 0; n < 4; n++)
 289                        regs[n] = rd32(E1000_RDT(n));
 290                break;
 291        case E1000_RXDCTL(0):
 292                for (n = 0; n < 4; n++)
 293                        regs[n] = rd32(E1000_RXDCTL(n));
 294                break;
 295        case E1000_RDBAL(0):
 296                for (n = 0; n < 4; n++)
 297                        regs[n] = rd32(E1000_RDBAL(n));
 298                break;
 299        case E1000_RDBAH(0):
 300                for (n = 0; n < 4; n++)
 301                        regs[n] = rd32(E1000_RDBAH(n));
 302                break;
 303        case E1000_TDBAL(0):
 304                for (n = 0; n < 4; n++)
 305                        regs[n] = rd32(E1000_RDBAL(n));
 306                break;
 307        case E1000_TDBAH(0):
 308                for (n = 0; n < 4; n++)
 309                        regs[n] = rd32(E1000_TDBAH(n));
 310                break;
 311        case E1000_TDLEN(0):
 312                for (n = 0; n < 4; n++)
 313                        regs[n] = rd32(E1000_TDLEN(n));
 314                break;
 315        case E1000_TDH(0):
 316                for (n = 0; n < 4; n++)
 317                        regs[n] = rd32(E1000_TDH(n));
 318                break;
 319        case E1000_TDT(0):
 320                for (n = 0; n < 4; n++)
 321                        regs[n] = rd32(E1000_TDT(n));
 322                break;
 323        case E1000_TXDCTL(0):
 324                for (n = 0; n < 4; n++)
 325                        regs[n] = rd32(E1000_TXDCTL(n));
 326                break;
 327        default:
 328                printk(KERN_INFO "%-15s %08x\n",
 329                        reginfo->name, rd32(reginfo->ofs));
 330                return;
 331        }
 332
 333        snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
 334        printk(KERN_INFO "%-15s ", rname);
 335        for (n = 0; n < 4; n++)
 336                printk(KERN_CONT "%08x ", regs[n]);
 337        printk(KERN_CONT "\n");
 338}
 339
 340/*
 341 * igb_dump - Print registers, tx-rings and rx-rings
 342 */
 343static void igb_dump(struct igb_adapter *adapter)
 344{
 345        struct net_device *netdev = adapter->netdev;
 346        struct e1000_hw *hw = &adapter->hw;
 347        struct igb_reg_info *reginfo;
 348        struct igb_ring *tx_ring;
 349        union e1000_adv_tx_desc *tx_desc;
 350        struct my_u0 { u64 a; u64 b; } *u0;
 351        struct igb_ring *rx_ring;
 352        union e1000_adv_rx_desc *rx_desc;
 353        u32 staterr;
 354        u16 i, n;
 355
 356        if (!netif_msg_hw(adapter))
 357                return;
 358
 359        /* Print netdevice Info */
 360        if (netdev) {
 361                dev_info(&adapter->pdev->dev, "Net device Info\n");
 362                printk(KERN_INFO "Device Name     state            "
 363                        "trans_start      last_rx\n");
 364                printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
 365                netdev->name,
 366                netdev->state,
 367                netdev->trans_start,
 368                netdev->last_rx);
 369        }
 370
 371        /* Print Registers */
 372        dev_info(&adapter->pdev->dev, "Register Dump\n");
 373        printk(KERN_INFO " Register Name   Value\n");
 374        for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
 375             reginfo->name; reginfo++) {
 376                igb_regdump(hw, reginfo);
 377        }
 378
 379        /* Print TX Ring Summary */
 380        if (!netdev || !netif_running(netdev))
 381                goto exit;
 382
 383        dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
 384        printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
 385                " leng ntw timestamp\n");
 386        for (n = 0; n < adapter->num_tx_queues; n++) {
 387                struct igb_tx_buffer *buffer_info;
 388                tx_ring = adapter->tx_ring[n];
 389                buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
 390                printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
 391                           n, tx_ring->next_to_use, tx_ring->next_to_clean,
 392                           (u64)buffer_info->dma,
 393                           buffer_info->length,
 394                           buffer_info->next_to_watch,
 395                           (u64)buffer_info->time_stamp);
 396        }
 397
 398        /* Print TX Rings */
 399        if (!netif_msg_tx_done(adapter))
 400                goto rx_ring_summary;
 401
 402        dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
 403
 404        /* Transmit Descriptor Formats
 405         *
 406         * Advanced Transmit Descriptor
 407         *   +--------------------------------------------------------------+
 408         * 0 |         Buffer Address [63:0]                                |
 409         *   +--------------------------------------------------------------+
 410         * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
 411         *   +--------------------------------------------------------------+
 412         *   63      46 45    40 39 38 36 35 32 31   24             15       0
 413         */
 414
 415        for (n = 0; n < adapter->num_tx_queues; n++) {
 416                tx_ring = adapter->tx_ring[n];
 417                printk(KERN_INFO "------------------------------------\n");
 418                printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
 419                printk(KERN_INFO "------------------------------------\n");
 420                printk(KERN_INFO "T [desc]     [address 63:0  ] "
 421                        "[PlPOCIStDDM Ln] [bi->dma       ] "
 422                        "leng  ntw timestamp        bi->skb\n");
 423
 424                for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
 425                        struct igb_tx_buffer *buffer_info;
 426                        tx_desc = IGB_TX_DESC(tx_ring, i);
 427                        buffer_info = &tx_ring->tx_buffer_info[i];
 428                        u0 = (struct my_u0 *)tx_desc;
 429                        printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
 430                                " %04X  %p %016llX %p", i,
 431                                le64_to_cpu(u0->a),
 432                                le64_to_cpu(u0->b),
 433                                (u64)buffer_info->dma,
 434                                buffer_info->length,
 435                                buffer_info->next_to_watch,
 436                                (u64)buffer_info->time_stamp,
 437                                buffer_info->skb);
 438                        if (i == tx_ring->next_to_use &&
 439                                i == tx_ring->next_to_clean)
 440                                printk(KERN_CONT " NTC/U\n");
 441                        else if (i == tx_ring->next_to_use)
 442                                printk(KERN_CONT " NTU\n");
 443                        else if (i == tx_ring->next_to_clean)
 444                                printk(KERN_CONT " NTC\n");
 445                        else
 446                                printk(KERN_CONT "\n");
 447
 448                        if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
 449                                print_hex_dump(KERN_INFO, "",
 450                                        DUMP_PREFIX_ADDRESS,
 451                                        16, 1, phys_to_virt(buffer_info->dma),
 452                                        buffer_info->length, true);
 453                }
 454        }
 455
 456        /* Print RX Rings Summary */
 457rx_ring_summary:
 458        dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
 459        printk(KERN_INFO "Queue [NTU] [NTC]\n");
 460        for (n = 0; n < adapter->num_rx_queues; n++) {
 461                rx_ring = adapter->rx_ring[n];
 462                printk(KERN_INFO " %5d %5X %5X\n", n,
 463                           rx_ring->next_to_use, rx_ring->next_to_clean);
 464        }
 465
 466        /* Print RX Rings */
 467        if (!netif_msg_rx_status(adapter))
 468                goto exit;
 469
 470        dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
 471
 472        /* Advanced Receive Descriptor (Read) Format
 473         *    63                                           1        0
 474         *    +-----------------------------------------------------+
 475         *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
 476         *    +----------------------------------------------+------+
 477         *  8 |       Header Buffer Address [63:1]           |  DD  |
 478         *    +-----------------------------------------------------+
 479         *
 480         *
 481         * Advanced Receive Descriptor (Write-Back) Format
 482         *
 483         *   63       48 47    32 31  30      21 20 17 16   4 3     0
 484         *   +------------------------------------------------------+
 485         * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
 486         *   | Checksum   Ident  |   |           |    | Type | Type |
 487         *   +------------------------------------------------------+
 488         * 8 | VLAN Tag | Length | Extended Error | Extended Status |
 489         *   +------------------------------------------------------+
 490         *   63       48 47    32 31            20 19               0
 491         */
 492
 493        for (n = 0; n < adapter->num_rx_queues; n++) {
 494                rx_ring = adapter->rx_ring[n];
 495                printk(KERN_INFO "------------------------------------\n");
 496                printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
 497                printk(KERN_INFO "------------------------------------\n");
 498                printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
 499                        "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
 500                        "<-- Adv Rx Read format\n");
 501                printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
 502                        "[vl er S cks ln] ---------------- [bi->skb] "
 503                        "<-- Adv Rx Write-Back format\n");
 504
 505                for (i = 0; i < rx_ring->count; i++) {
 506                        struct igb_rx_buffer *buffer_info;
 507                        buffer_info = &rx_ring->rx_buffer_info[i];
 508                        rx_desc = IGB_RX_DESC(rx_ring, i);
 509                        u0 = (struct my_u0 *)rx_desc;
 510                        staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 511                        if (staterr & E1000_RXD_STAT_DD) {
 512                                /* Descriptor Done */
 513                                printk(KERN_INFO "RWB[0x%03X]     %016llX "
 514                                        "%016llX ---------------- %p", i,
 515                                        le64_to_cpu(u0->a),
 516                                        le64_to_cpu(u0->b),
 517                                        buffer_info->skb);
 518                        } else {
 519                                printk(KERN_INFO "R  [0x%03X]     %016llX "
 520                                        "%016llX %016llX %p", i,
 521                                        le64_to_cpu(u0->a),
 522                                        le64_to_cpu(u0->b),
 523                                        (u64)buffer_info->dma,
 524                                        buffer_info->skb);
 525
 526                                if (netif_msg_pktdata(adapter)) {
 527                                        print_hex_dump(KERN_INFO, "",
 528                                                DUMP_PREFIX_ADDRESS,
 529                                                16, 1,
 530                                                phys_to_virt(buffer_info->dma),
 531                                                IGB_RX_HDR_LEN, true);
 532                                        print_hex_dump(KERN_INFO, "",
 533                                          DUMP_PREFIX_ADDRESS,
 534                                          16, 1,
 535                                          phys_to_virt(
 536                                            buffer_info->page_dma +
 537                                            buffer_info->page_offset),
 538                                          PAGE_SIZE/2, true);
 539                                }
 540                        }
 541
 542                        if (i == rx_ring->next_to_use)
 543                                printk(KERN_CONT " NTU\n");
 544                        else if (i == rx_ring->next_to_clean)
 545                                printk(KERN_CONT " NTC\n");
 546                        else
 547                                printk(KERN_CONT "\n");
 548
 549                }
 550        }
 551
 552exit:
 553        return;
 554}
 555
 556
 557/**
 558 * igb_read_clock - read raw cycle counter (to be used by time counter)
 559 */
 560static cycle_t igb_read_clock(const struct cyclecounter *tc)
 561{
 562        struct igb_adapter *adapter =
 563                container_of(tc, struct igb_adapter, cycles);
 564        struct e1000_hw *hw = &adapter->hw;
 565        u64 stamp = 0;
 566        int shift = 0;
 567
 568        /*
 569         * The timestamp latches on lowest register read. For the 82580
 570         * the lowest register is SYSTIMR instead of SYSTIML.  However we never
 571         * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
 572         */
 573        if (hw->mac.type >= e1000_82580) {
 574                stamp = rd32(E1000_SYSTIMR) >> 8;
 575                shift = IGB_82580_TSYNC_SHIFT;
 576        }
 577
 578        stamp |= (u64)rd32(E1000_SYSTIML) << shift;
 579        stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
 580        return stamp;
 581}
 582
 583/**
 584 * igb_get_hw_dev - return device
 585 * used by hardware layer to print debugging information
 586 **/
 587struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
 588{
 589        struct igb_adapter *adapter = hw->back;
 590        return adapter->netdev;
 591}
 592
 593/**
 594 * igb_init_module - Driver Registration Routine
 595 *
 596 * igb_init_module is the first routine called when the driver is
 597 * loaded. All it does is register with the PCI subsystem.
 598 **/
 599static int __init igb_init_module(void)
 600{
 601        int ret;
 602        printk(KERN_INFO "%s - version %s\n",
 603               igb_driver_string, igb_driver_version);
 604
 605        printk(KERN_INFO "%s\n", igb_copyright);
 606
 607#ifdef CONFIG_IGB_DCA
 608        dca_register_notify(&dca_notifier);
 609#endif
 610        ret = pci_register_driver(&igb_driver);
 611        return ret;
 612}
 613
 614module_init(igb_init_module);
 615
 616/**
 617 * igb_exit_module - Driver Exit Cleanup Routine
 618 *
 619 * igb_exit_module is called just before the driver is removed
 620 * from memory.
 621 **/
 622static void __exit igb_exit_module(void)
 623{
 624#ifdef CONFIG_IGB_DCA
 625        dca_unregister_notify(&dca_notifier);
 626#endif
 627        pci_unregister_driver(&igb_driver);
 628}
 629
 630module_exit(igb_exit_module);
 631
 632#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
 633/**
 634 * igb_cache_ring_register - Descriptor ring to register mapping
 635 * @adapter: board private structure to initialize
 636 *
 637 * Once we know the feature-set enabled for the device, we'll cache
 638 * the register offset the descriptor ring is assigned to.
 639 **/
 640static void igb_cache_ring_register(struct igb_adapter *adapter)
 641{
 642        int i = 0, j = 0;
 643        u32 rbase_offset = adapter->vfs_allocated_count;
 644
 645        switch (adapter->hw.mac.type) {
 646        case e1000_82576:
 647                /* The queues are allocated for virtualization such that VF 0
 648                 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
 649                 * In order to avoid collision we start at the first free queue
 650                 * and continue consuming queues in the same sequence
 651                 */
 652                if (adapter->vfs_allocated_count) {
 653                        for (; i < adapter->rss_queues; i++)
 654                                adapter->rx_ring[i]->reg_idx = rbase_offset +
 655                                                               Q_IDX_82576(i);
 656                }
 657        case e1000_82575:
 658        case e1000_82580:
 659        case e1000_i350:
 660        default:
 661                for (; i < adapter->num_rx_queues; i++)
 662                        adapter->rx_ring[i]->reg_idx = rbase_offset + i;
 663                for (; j < adapter->num_tx_queues; j++)
 664                        adapter->tx_ring[j]->reg_idx = rbase_offset + j;
 665                break;
 666        }
 667}
 668
 669static void igb_free_queues(struct igb_adapter *adapter)
 670{
 671        int i;
 672
 673        for (i = 0; i < adapter->num_tx_queues; i++) {
 674                kfree(adapter->tx_ring[i]);
 675                adapter->tx_ring[i] = NULL;
 676        }
 677        for (i = 0; i < adapter->num_rx_queues; i++) {
 678                kfree(adapter->rx_ring[i]);
 679                adapter->rx_ring[i] = NULL;
 680        }
 681        adapter->num_rx_queues = 0;
 682        adapter->num_tx_queues = 0;
 683}
 684
 685/**
 686 * igb_alloc_queues - Allocate memory for all rings
 687 * @adapter: board private structure to initialize
 688 *
 689 * We allocate one ring per queue at run-time since we don't know the
 690 * number of queues at compile-time.
 691 **/
 692static int igb_alloc_queues(struct igb_adapter *adapter)
 693{
 694        struct igb_ring *ring;
 695        int i;
 696        int orig_node = adapter->node;
 697
 698        for (i = 0; i < adapter->num_tx_queues; i++) {
 699                if (orig_node == -1) {
 700                        int cur_node = next_online_node(adapter->node);
 701                        if (cur_node == MAX_NUMNODES)
 702                                cur_node = first_online_node;
 703                        adapter->node = cur_node;
 704                }
 705                ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
 706                                    adapter->node);
 707                if (!ring)
 708                        ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
 709                if (!ring)
 710                        goto err;
 711                ring->count = adapter->tx_ring_count;
 712                ring->queue_index = i;
 713                ring->dev = &adapter->pdev->dev;
 714                ring->netdev = adapter->netdev;
 715                ring->numa_node = adapter->node;
 716                /* For 82575, context index must be unique per ring. */
 717                if (adapter->hw.mac.type == e1000_82575)
 718                        set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
 719                adapter->tx_ring[i] = ring;
 720        }
 721        /* Restore the adapter's original node */
 722        adapter->node = orig_node;
 723
 724        for (i = 0; i < adapter->num_rx_queues; i++) {
 725                if (orig_node == -1) {
 726                        int cur_node = next_online_node(adapter->node);
 727                        if (cur_node == MAX_NUMNODES)
 728                                cur_node = first_online_node;
 729                        adapter->node = cur_node;
 730                }
 731                ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
 732                                    adapter->node);
 733                if (!ring)
 734                        ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
 735                if (!ring)
 736                        goto err;
 737                ring->count = adapter->rx_ring_count;
 738                ring->queue_index = i;
 739                ring->dev = &adapter->pdev->dev;
 740                ring->netdev = adapter->netdev;
 741                ring->numa_node = adapter->node;
 742                /* set flag indicating ring supports SCTP checksum offload */
 743                if (adapter->hw.mac.type >= e1000_82576)
 744                        set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
 745
 746                /* On i350, loopback VLAN packets have the tag byte-swapped. */
 747                if (adapter->hw.mac.type == e1000_i350)
 748                        set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
 749
 750                adapter->rx_ring[i] = ring;
 751        }
 752        /* Restore the adapter's original node */
 753        adapter->node = orig_node;
 754
 755        igb_cache_ring_register(adapter);
 756
 757        return 0;
 758
 759err:
 760        /* Restore the adapter's original node */
 761        adapter->node = orig_node;
 762        igb_free_queues(adapter);
 763
 764        return -ENOMEM;
 765}
 766
 767/**
 768 *  igb_write_ivar - configure ivar for given MSI-X vector
 769 *  @hw: pointer to the HW structure
 770 *  @msix_vector: vector number we are allocating to a given ring
 771 *  @index: row index of IVAR register to write within IVAR table
 772 *  @offset: column offset of in IVAR, should be multiple of 8
 773 *
 774 *  This function is intended to handle the writing of the IVAR register
 775 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
 776 *  each containing an cause allocation for an Rx and Tx ring, and a
 777 *  variable number of rows depending on the number of queues supported.
 778 **/
 779static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
 780                           int index, int offset)
 781{
 782        u32 ivar = array_rd32(E1000_IVAR0, index);
 783
 784        /* clear any bits that are currently set */
 785        ivar &= ~((u32)0xFF << offset);
 786
 787        /* write vector and valid bit */
 788        ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
 789
 790        array_wr32(E1000_IVAR0, index, ivar);
 791}
 792
 793#define IGB_N0_QUEUE -1
 794static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
 795{
 796        struct igb_adapter *adapter = q_vector->adapter;
 797        struct e1000_hw *hw = &adapter->hw;
 798        int rx_queue = IGB_N0_QUEUE;
 799        int tx_queue = IGB_N0_QUEUE;
 800        u32 msixbm = 0;
 801
 802        if (q_vector->rx.ring)
 803                rx_queue = q_vector->rx.ring->reg_idx;
 804        if (q_vector->tx.ring)
 805                tx_queue = q_vector->tx.ring->reg_idx;
 806
 807        switch (hw->mac.type) {
 808        case e1000_82575:
 809                /* The 82575 assigns vectors using a bitmask, which matches the
 810                   bitmask for the EICR/EIMS/EIMC registers.  To assign one
 811                   or more queues to a vector, we write the appropriate bits
 812                   into the MSIXBM register for that vector. */
 813                if (rx_queue > IGB_N0_QUEUE)
 814                        msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
 815                if (tx_queue > IGB_N0_QUEUE)
 816                        msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
 817                if (!adapter->msix_entries && msix_vector == 0)
 818                        msixbm |= E1000_EIMS_OTHER;
 819                array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
 820                q_vector->eims_value = msixbm;
 821                break;
 822        case e1000_82576:
 823                /*
 824                 * 82576 uses a table that essentially consists of 2 columns
 825                 * with 8 rows.  The ordering is column-major so we use the
 826                 * lower 3 bits as the row index, and the 4th bit as the
 827                 * column offset.
 828                 */
 829                if (rx_queue > IGB_N0_QUEUE)
 830                        igb_write_ivar(hw, msix_vector,
 831                                       rx_queue & 0x7,
 832                                       (rx_queue & 0x8) << 1);
 833                if (tx_queue > IGB_N0_QUEUE)
 834                        igb_write_ivar(hw, msix_vector,
 835                                       tx_queue & 0x7,
 836                                       ((tx_queue & 0x8) << 1) + 8);
 837                q_vector->eims_value = 1 << msix_vector;
 838                break;
 839        case e1000_82580:
 840        case e1000_i350:
 841                /*
 842                 * On 82580 and newer adapters the scheme is similar to 82576
 843                 * however instead of ordering column-major we have things
 844                 * ordered row-major.  So we traverse the table by using
 845                 * bit 0 as the column offset, and the remaining bits as the
 846                 * row index.
 847                 */
 848                if (rx_queue > IGB_N0_QUEUE)
 849                        igb_write_ivar(hw, msix_vector,
 850                                       rx_queue >> 1,
 851                                       (rx_queue & 0x1) << 4);
 852                if (tx_queue > IGB_N0_QUEUE)
 853                        igb_write_ivar(hw, msix_vector,
 854                                       tx_queue >> 1,
 855                                       ((tx_queue & 0x1) << 4) + 8);
 856                q_vector->eims_value = 1 << msix_vector;
 857                break;
 858        default:
 859                BUG();
 860                break;
 861        }
 862
 863        /* add q_vector eims value to global eims_enable_mask */
 864        adapter->eims_enable_mask |= q_vector->eims_value;
 865
 866        /* configure q_vector to set itr on first interrupt */
 867        q_vector->set_itr = 1;
 868}
 869
 870/**
 871 * igb_configure_msix - Configure MSI-X hardware
 872 *
 873 * igb_configure_msix sets up the hardware to properly
 874 * generate MSI-X interrupts.
 875 **/
 876static void igb_configure_msix(struct igb_adapter *adapter)
 877{
 878        u32 tmp;
 879        int i, vector = 0;
 880        struct e1000_hw *hw = &adapter->hw;
 881
 882        adapter->eims_enable_mask = 0;
 883
 884        /* set vector for other causes, i.e. link changes */
 885        switch (hw->mac.type) {
 886        case e1000_82575:
 887                tmp = rd32(E1000_CTRL_EXT);
 888                /* enable MSI-X PBA support*/
 889                tmp |= E1000_CTRL_EXT_PBA_CLR;
 890
 891                /* Auto-Mask interrupts upon ICR read. */
 892                tmp |= E1000_CTRL_EXT_EIAME;
 893                tmp |= E1000_CTRL_EXT_IRCA;
 894
 895                wr32(E1000_CTRL_EXT, tmp);
 896
 897                /* enable msix_other interrupt */
 898                array_wr32(E1000_MSIXBM(0), vector++,
 899                                      E1000_EIMS_OTHER);
 900                adapter->eims_other = E1000_EIMS_OTHER;
 901
 902                break;
 903
 904        case e1000_82576:
 905        case e1000_82580:
 906        case e1000_i350:
 907                /* Turn on MSI-X capability first, or our settings
 908                 * won't stick.  And it will take days to debug. */
 909                wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
 910                                E1000_GPIE_PBA | E1000_GPIE_EIAME |
 911                                E1000_GPIE_NSICR);
 912
 913                /* enable msix_other interrupt */
 914                adapter->eims_other = 1 << vector;
 915                tmp = (vector++ | E1000_IVAR_VALID) << 8;
 916
 917                wr32(E1000_IVAR_MISC, tmp);
 918                break;
 919        default:
 920                /* do nothing, since nothing else supports MSI-X */
 921                break;
 922        } /* switch (hw->mac.type) */
 923
 924        adapter->eims_enable_mask |= adapter->eims_other;
 925
 926        for (i = 0; i < adapter->num_q_vectors; i++)
 927                igb_assign_vector(adapter->q_vector[i], vector++);
 928
 929        wrfl();
 930}
 931
 932/**
 933 * igb_request_msix - Initialize MSI-X interrupts
 934 *
 935 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
 936 * kernel.
 937 **/
 938static int igb_request_msix(struct igb_adapter *adapter)
 939{
 940        struct net_device *netdev = adapter->netdev;
 941        struct e1000_hw *hw = &adapter->hw;
 942        int i, err = 0, vector = 0;
 943
 944        err = request_irq(adapter->msix_entries[vector].vector,
 945                          igb_msix_other, 0, netdev->name, adapter);
 946        if (err)
 947                goto out;
 948        vector++;
 949
 950        for (i = 0; i < adapter->num_q_vectors; i++) {
 951                struct igb_q_vector *q_vector = adapter->q_vector[i];
 952
 953                q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
 954
 955                if (q_vector->rx.ring && q_vector->tx.ring)
 956                        sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
 957                                q_vector->rx.ring->queue_index);
 958                else if (q_vector->tx.ring)
 959                        sprintf(q_vector->name, "%s-tx-%u", netdev->name,
 960                                q_vector->tx.ring->queue_index);
 961                else if (q_vector->rx.ring)
 962                        sprintf(q_vector->name, "%s-rx-%u", netdev->name,
 963                                q_vector->rx.ring->queue_index);
 964                else
 965                        sprintf(q_vector->name, "%s-unused", netdev->name);
 966
 967                err = request_irq(adapter->msix_entries[vector].vector,
 968                                  igb_msix_ring, 0, q_vector->name,
 969                                  q_vector);
 970                if (err)
 971                        goto out;
 972                vector++;
 973        }
 974
 975        igb_configure_msix(adapter);
 976        return 0;
 977out:
 978        return err;
 979}
 980
 981static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
 982{
 983        if (adapter->msix_entries) {
 984                pci_disable_msix(adapter->pdev);
 985                kfree(adapter->msix_entries);
 986                adapter->msix_entries = NULL;
 987        } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
 988                pci_disable_msi(adapter->pdev);
 989        }
 990}
 991
 992/**
 993 * igb_free_q_vectors - Free memory allocated for interrupt vectors
 994 * @adapter: board private structure to initialize
 995 *
 996 * This function frees the memory allocated to the q_vectors.  In addition if
 997 * NAPI is enabled it will delete any references to the NAPI struct prior
 998 * to freeing the q_vector.
 999 **/
1000static void igb_free_q_vectors(struct igb_adapter *adapter)
1001{
1002        int v_idx;
1003
1004        for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1005                struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1006                adapter->q_vector[v_idx] = NULL;
1007                if (!q_vector)
1008                        continue;
1009                netif_napi_del(&q_vector->napi);
1010                kfree(q_vector);
1011        }
1012        adapter->num_q_vectors = 0;
1013}
1014
1015/**
1016 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1017 *
1018 * This function resets the device so that it has 0 rx queues, tx queues, and
1019 * MSI-X interrupts allocated.
1020 */
1021static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1022{
1023        igb_free_queues(adapter);
1024        igb_free_q_vectors(adapter);
1025        igb_reset_interrupt_capability(adapter);
1026}
1027
1028/**
1029 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1030 *
1031 * Attempt to configure interrupts using the best available
1032 * capabilities of the hardware and kernel.
1033 **/
1034static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1035{
1036        int err;
1037        int numvecs, i;
1038
1039        /* Number of supported queues. */
1040        adapter->num_rx_queues = adapter->rss_queues;
1041        if (adapter->vfs_allocated_count)
1042                adapter->num_tx_queues = 1;
1043        else
1044                adapter->num_tx_queues = adapter->rss_queues;
1045
1046        /* start with one vector for every rx queue */
1047        numvecs = adapter->num_rx_queues;
1048
1049        /* if tx handler is separate add 1 for every tx queue */
1050        if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1051                numvecs += adapter->num_tx_queues;
1052
1053        /* store the number of vectors reserved for queues */
1054        adapter->num_q_vectors = numvecs;
1055
1056        /* add 1 vector for link status interrupts */
1057        numvecs++;
1058        adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1059                                        GFP_KERNEL);
1060        if (!adapter->msix_entries)
1061                goto msi_only;
1062
1063        for (i = 0; i < numvecs; i++)
1064                adapter->msix_entries[i].entry = i;
1065
1066        err = pci_enable_msix(adapter->pdev,
1067                              adapter->msix_entries,
1068                              numvecs);
1069        if (err == 0)
1070                goto out;
1071
1072        igb_reset_interrupt_capability(adapter);
1073
1074        /* If we can't do MSI-X, try MSI */
1075msi_only:
1076#ifdef CONFIG_PCI_IOV
1077        /* disable SR-IOV for non MSI-X configurations */
1078        if (adapter->vf_data) {
1079                struct e1000_hw *hw = &adapter->hw;
1080                /* disable iov and allow time for transactions to clear */
1081                pci_disable_sriov(adapter->pdev);
1082                msleep(500);
1083
1084                kfree(adapter->vf_data);
1085                adapter->vf_data = NULL;
1086                wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1087                wrfl();
1088                msleep(100);
1089                dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1090        }
1091#endif
1092        adapter->vfs_allocated_count = 0;
1093        adapter->rss_queues = 1;
1094        adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1095        adapter->num_rx_queues = 1;
1096        adapter->num_tx_queues = 1;
1097        adapter->num_q_vectors = 1;
1098        if (!pci_enable_msi(adapter->pdev))
1099                adapter->flags |= IGB_FLAG_HAS_MSI;
1100out:
1101        /* Notify the stack of the (possibly) reduced queue counts. */
1102        netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103        return netif_set_real_num_rx_queues(adapter->netdev,
1104                                            adapter->num_rx_queues);
1105}
1106
1107/**
1108 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109 * @adapter: board private structure to initialize
1110 *
1111 * We allocate one q_vector per queue interrupt.  If allocation fails we
1112 * return -ENOMEM.
1113 **/
1114static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1115{
1116        struct igb_q_vector *q_vector;
1117        struct e1000_hw *hw = &adapter->hw;
1118        int v_idx;
1119        int orig_node = adapter->node;
1120
1121        for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1122                if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1123                                                adapter->num_tx_queues)) &&
1124                    (adapter->num_rx_queues == v_idx))
1125                        adapter->node = orig_node;
1126                if (orig_node == -1) {
1127                        int cur_node = next_online_node(adapter->node);
1128                        if (cur_node == MAX_NUMNODES)
1129                                cur_node = first_online_node;
1130                        adapter->node = cur_node;
1131                }
1132                q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1133                                        adapter->node);
1134                if (!q_vector)
1135                        q_vector = kzalloc(sizeof(struct igb_q_vector),
1136                                           GFP_KERNEL);
1137                if (!q_vector)
1138                        goto err_out;
1139                q_vector->adapter = adapter;
1140                q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1141                q_vector->itr_val = IGB_START_ITR;
1142                netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1143                adapter->q_vector[v_idx] = q_vector;
1144        }
1145        /* Restore the adapter's original node */
1146        adapter->node = orig_node;
1147
1148        return 0;
1149
1150err_out:
1151        /* Restore the adapter's original node */
1152        adapter->node = orig_node;
1153        igb_free_q_vectors(adapter);
1154        return -ENOMEM;
1155}
1156
1157static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1158                                      int ring_idx, int v_idx)
1159{
1160        struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1161
1162        q_vector->rx.ring = adapter->rx_ring[ring_idx];
1163        q_vector->rx.ring->q_vector = q_vector;
1164        q_vector->rx.count++;
1165        q_vector->itr_val = adapter->rx_itr_setting;
1166        if (q_vector->itr_val && q_vector->itr_val <= 3)
1167                q_vector->itr_val = IGB_START_ITR;
1168}
1169
1170static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1171                                      int ring_idx, int v_idx)
1172{
1173        struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1174
1175        q_vector->tx.ring = adapter->tx_ring[ring_idx];
1176        q_vector->tx.ring->q_vector = q_vector;
1177        q_vector->tx.count++;
1178        q_vector->itr_val = adapter->tx_itr_setting;
1179        q_vector->tx.work_limit = adapter->tx_work_limit;
1180        if (q_vector->itr_val && q_vector->itr_val <= 3)
1181                q_vector->itr_val = IGB_START_ITR;
1182}
1183
1184/**
1185 * igb_map_ring_to_vector - maps allocated queues to vectors
1186 *
1187 * This function maps the recently allocated queues to vectors.
1188 **/
1189static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1190{
1191        int i;
1192        int v_idx = 0;
1193
1194        if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1195            (adapter->num_q_vectors < adapter->num_tx_queues))
1196                return -ENOMEM;
1197
1198        if (adapter->num_q_vectors >=
1199            (adapter->num_rx_queues + adapter->num_tx_queues)) {
1200                for (i = 0; i < adapter->num_rx_queues; i++)
1201                        igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202                for (i = 0; i < adapter->num_tx_queues; i++)
1203                        igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1204        } else {
1205                for (i = 0; i < adapter->num_rx_queues; i++) {
1206                        if (i < adapter->num_tx_queues)
1207                                igb_map_tx_ring_to_vector(adapter, i, v_idx);
1208                        igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209                }
1210                for (; i < adapter->num_tx_queues; i++)
1211                        igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1212        }
1213        return 0;
1214}
1215
1216/**
1217 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1218 *
1219 * This function initializes the interrupts and allocates all of the queues.
1220 **/
1221static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1222{
1223        struct pci_dev *pdev = adapter->pdev;
1224        int err;
1225
1226        err = igb_set_interrupt_capability(adapter);
1227        if (err)
1228                return err;
1229
1230        err = igb_alloc_q_vectors(adapter);
1231        if (err) {
1232                dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1233                goto err_alloc_q_vectors;
1234        }
1235
1236        err = igb_alloc_queues(adapter);
1237        if (err) {
1238                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1239                goto err_alloc_queues;
1240        }
1241
1242        err = igb_map_ring_to_vector(adapter);
1243        if (err) {
1244                dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1245                goto err_map_queues;
1246        }
1247
1248
1249        return 0;
1250err_map_queues:
1251        igb_free_queues(adapter);
1252err_alloc_queues:
1253        igb_free_q_vectors(adapter);
1254err_alloc_q_vectors:
1255        igb_reset_interrupt_capability(adapter);
1256        return err;
1257}
1258
1259/**
1260 * igb_request_irq - initialize interrupts
1261 *
1262 * Attempts to configure interrupts using the best available
1263 * capabilities of the hardware and kernel.
1264 **/
1265static int igb_request_irq(struct igb_adapter *adapter)
1266{
1267        struct net_device *netdev = adapter->netdev;
1268        struct pci_dev *pdev = adapter->pdev;
1269        int err = 0;
1270
1271        if (adapter->msix_entries) {
1272                err = igb_request_msix(adapter);
1273                if (!err)
1274                        goto request_done;
1275                /* fall back to MSI */
1276                igb_clear_interrupt_scheme(adapter);
1277                if (!pci_enable_msi(pdev))
1278                        adapter->flags |= IGB_FLAG_HAS_MSI;
1279                igb_free_all_tx_resources(adapter);
1280                igb_free_all_rx_resources(adapter);
1281                adapter->num_tx_queues = 1;
1282                adapter->num_rx_queues = 1;
1283                adapter->num_q_vectors = 1;
1284                err = igb_alloc_q_vectors(adapter);
1285                if (err) {
1286                        dev_err(&pdev->dev,
1287                                "Unable to allocate memory for vectors\n");
1288                        goto request_done;
1289                }
1290                err = igb_alloc_queues(adapter);
1291                if (err) {
1292                        dev_err(&pdev->dev,
1293                                "Unable to allocate memory for queues\n");
1294                        igb_free_q_vectors(adapter);
1295                        goto request_done;
1296                }
1297                igb_setup_all_tx_resources(adapter);
1298                igb_setup_all_rx_resources(adapter);
1299        }
1300
1301        igb_assign_vector(adapter->q_vector[0], 0);
1302
1303        if (adapter->flags & IGB_FLAG_HAS_MSI) {
1304                err = request_irq(pdev->irq, igb_intr_msi, 0,
1305                                  netdev->name, adapter);
1306                if (!err)
1307                        goto request_done;
1308
1309                /* fall back to legacy interrupts */
1310                igb_reset_interrupt_capability(adapter);
1311                adapter->flags &= ~IGB_FLAG_HAS_MSI;
1312        }
1313
1314        err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1315                          netdev->name, adapter);
1316
1317        if (err)
1318                dev_err(&pdev->dev, "Error %d getting interrupt\n",
1319                        err);
1320
1321request_done:
1322        return err;
1323}
1324
1325static void igb_free_irq(struct igb_adapter *adapter)
1326{
1327        if (adapter->msix_entries) {
1328                int vector = 0, i;
1329
1330                free_irq(adapter->msix_entries[vector++].vector, adapter);
1331
1332                for (i = 0; i < adapter->num_q_vectors; i++)
1333                        free_irq(adapter->msix_entries[vector++].vector,
1334                                 adapter->q_vector[i]);
1335        } else {
1336                free_irq(adapter->pdev->irq, adapter);
1337        }
1338}
1339
1340/**
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1343 **/
1344static void igb_irq_disable(struct igb_adapter *adapter)
1345{
1346        struct e1000_hw *hw = &adapter->hw;
1347
1348        /*
1349         * we need to be careful when disabling interrupts.  The VFs are also
1350         * mapped into these registers and so clearing the bits can cause
1351         * issues on the VF drivers so we only need to clear what we set
1352         */
1353        if (adapter->msix_entries) {
1354                u32 regval = rd32(E1000_EIAM);
1355                wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356                wr32(E1000_EIMC, adapter->eims_enable_mask);
1357                regval = rd32(E1000_EIAC);
1358                wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1359        }
1360
1361        wr32(E1000_IAM, 0);
1362        wr32(E1000_IMC, ~0);
1363        wrfl();
1364        if (adapter->msix_entries) {
1365                int i;
1366                for (i = 0; i < adapter->num_q_vectors; i++)
1367                        synchronize_irq(adapter->msix_entries[i].vector);
1368        } else {
1369                synchronize_irq(adapter->pdev->irq);
1370        }
1371}
1372
1373/**
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1376 **/
1377static void igb_irq_enable(struct igb_adapter *adapter)
1378{
1379        struct e1000_hw *hw = &adapter->hw;
1380
1381        if (adapter->msix_entries) {
1382                u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1383                u32 regval = rd32(E1000_EIAC);
1384                wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385                regval = rd32(E1000_EIAM);
1386                wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387                wr32(E1000_EIMS, adapter->eims_enable_mask);
1388                if (adapter->vfs_allocated_count) {
1389                        wr32(E1000_MBVFIMR, 0xFF);
1390                        ims |= E1000_IMS_VMMB;
1391                }
1392                wr32(E1000_IMS, ims);
1393        } else {
1394                wr32(E1000_IMS, IMS_ENABLE_MASK |
1395                                E1000_IMS_DRSTA);
1396                wr32(E1000_IAM, IMS_ENABLE_MASK |
1397                                E1000_IMS_DRSTA);
1398        }
1399}
1400
1401static void igb_update_mng_vlan(struct igb_adapter *adapter)
1402{
1403        struct e1000_hw *hw = &adapter->hw;
1404        u16 vid = adapter->hw.mng_cookie.vlan_id;
1405        u16 old_vid = adapter->mng_vlan_id;
1406
1407        if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408                /* add VID to filter table */
1409                igb_vfta_set(hw, vid, true);
1410                adapter->mng_vlan_id = vid;
1411        } else {
1412                adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1413        }
1414
1415        if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1416            (vid != old_vid) &&
1417            !test_bit(old_vid, adapter->active_vlans)) {
1418                /* remove VID from filter table */
1419                igb_vfta_set(hw, old_vid, false);
1420        }
1421}
1422
1423/**
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1426 *
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1430 *
1431 **/
1432static void igb_release_hw_control(struct igb_adapter *adapter)
1433{
1434        struct e1000_hw *hw = &adapter->hw;
1435        u32 ctrl_ext;
1436
1437        /* Let firmware take over control of h/w */
1438        ctrl_ext = rd32(E1000_CTRL_EXT);
1439        wr32(E1000_CTRL_EXT,
1440                        ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1441}
1442
1443/**
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1446 *
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1450 *
1451 **/
1452static void igb_get_hw_control(struct igb_adapter *adapter)
1453{
1454        struct e1000_hw *hw = &adapter->hw;
1455        u32 ctrl_ext;
1456
1457        /* Let firmware know the driver has taken over */
1458        ctrl_ext = rd32(E1000_CTRL_EXT);
1459        wr32(E1000_CTRL_EXT,
1460                        ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1461}
1462
1463/**
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1466 **/
1467static void igb_configure(struct igb_adapter *adapter)
1468{
1469        struct net_device *netdev = adapter->netdev;
1470        int i;
1471
1472        igb_get_hw_control(adapter);
1473        igb_set_rx_mode(netdev);
1474
1475        igb_restore_vlan(adapter);
1476
1477        igb_setup_tctl(adapter);
1478        igb_setup_mrqc(adapter);
1479        igb_setup_rctl(adapter);
1480
1481        igb_configure_tx(adapter);
1482        igb_configure_rx(adapter);
1483
1484        igb_rx_fifo_flush_82575(&adapter->hw);
1485
1486        /* call igb_desc_unused which always leaves
1487         * at least 1 descriptor unused to make sure
1488         * next_to_use != next_to_clean */
1489        for (i = 0; i < adapter->num_rx_queues; i++) {
1490                struct igb_ring *ring = adapter->rx_ring[i];
1491                igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1492        }
1493}
1494
1495/**
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1498 **/
1499void igb_power_up_link(struct igb_adapter *adapter)
1500{
1501        if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502                igb_power_up_phy_copper(&adapter->hw);
1503        else
1504                igb_power_up_serdes_link_82575(&adapter->hw);
1505}
1506
1507/**
1508 * igb_power_down_link - Power down the phy/serdes link
1509 * @adapter: address of board private structure
1510 */
1511static void igb_power_down_link(struct igb_adapter *adapter)
1512{
1513        if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514                igb_power_down_phy_copper_82575(&adapter->hw);
1515        else
1516                igb_shutdown_serdes_link_82575(&adapter->hw);
1517}
1518
1519/**
1520 * igb_up - Open the interface and prepare it to handle traffic
1521 * @adapter: board private structure
1522 **/
1523int igb_up(struct igb_adapter *adapter)
1524{
1525        struct e1000_hw *hw = &adapter->hw;
1526        int i;
1527
1528        /* hardware has been reset, we need to reload some things */
1529        igb_configure(adapter);
1530
1531        clear_bit(__IGB_DOWN, &adapter->state);
1532
1533        for (i = 0; i < adapter->num_q_vectors; i++)
1534                napi_enable(&(adapter->q_vector[i]->napi));
1535
1536        if (adapter->msix_entries)
1537                igb_configure_msix(adapter);
1538        else
1539                igb_assign_vector(adapter->q_vector[0], 0);
1540
1541        /* Clear any pending interrupts. */
1542        rd32(E1000_ICR);
1543        igb_irq_enable(adapter);
1544
1545        /* notify VFs that reset has been completed */
1546        if (adapter->vfs_allocated_count) {
1547                u32 reg_data = rd32(E1000_CTRL_EXT);
1548                reg_data |= E1000_CTRL_EXT_PFRSTD;
1549                wr32(E1000_CTRL_EXT, reg_data);
1550        }
1551
1552        netif_tx_start_all_queues(adapter->netdev);
1553
1554        /* start the watchdog. */
1555        hw->mac.get_link_status = 1;
1556        schedule_work(&adapter->watchdog_task);
1557
1558        return 0;
1559}
1560
1561void igb_down(struct igb_adapter *adapter)
1562{
1563        struct net_device *netdev = adapter->netdev;
1564        struct e1000_hw *hw = &adapter->hw;
1565        u32 tctl, rctl;
1566        int i;
1567
1568        /* signal that we're down so the interrupt handler does not
1569         * reschedule our watchdog timer */
1570        set_bit(__IGB_DOWN, &adapter->state);
1571
1572        /* disable receives in the hardware */
1573        rctl = rd32(E1000_RCTL);
1574        wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1575        /* flush and sleep below */
1576
1577        netif_tx_stop_all_queues(netdev);
1578
1579        /* disable transmits in the hardware */
1580        tctl = rd32(E1000_TCTL);
1581        tctl &= ~E1000_TCTL_EN;
1582        wr32(E1000_TCTL, tctl);
1583        /* flush both disables and wait for them to finish */
1584        wrfl();
1585        msleep(10);
1586
1587        for (i = 0; i < adapter->num_q_vectors; i++)
1588                napi_disable(&(adapter->q_vector[i]->napi));
1589
1590        igb_irq_disable(adapter);
1591
1592        del_timer_sync(&adapter->watchdog_timer);
1593        del_timer_sync(&adapter->phy_info_timer);
1594
1595        netif_carrier_off(netdev);
1596
1597        /* record the stats before reset*/
1598        spin_lock(&adapter->stats64_lock);
1599        igb_update_stats(adapter, &adapter->stats64);
1600        spin_unlock(&adapter->stats64_lock);
1601
1602        adapter->link_speed = 0;
1603        adapter->link_duplex = 0;
1604
1605        if (!pci_channel_offline(adapter->pdev))
1606                igb_reset(adapter);
1607        igb_clean_all_tx_rings(adapter);
1608        igb_clean_all_rx_rings(adapter);
1609#ifdef CONFIG_IGB_DCA
1610
1611        /* since we reset the hardware DCA settings were cleared */
1612        igb_setup_dca(adapter);
1613#endif
1614}
1615
1616void igb_reinit_locked(struct igb_adapter *adapter)
1617{
1618        WARN_ON(in_interrupt());
1619        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1620                msleep(1);
1621        igb_down(adapter);
1622        igb_up(adapter);
1623        clear_bit(__IGB_RESETTING, &adapter->state);
1624}
1625
1626void igb_reset(struct igb_adapter *adapter)
1627{
1628        struct pci_dev *pdev = adapter->pdev;
1629        struct e1000_hw *hw = &adapter->hw;
1630        struct e1000_mac_info *mac = &hw->mac;
1631        struct e1000_fc_info *fc = &hw->fc;
1632        u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1633        u16 hwm;
1634
1635        /* Repartition Pba for greater than 9k mtu
1636         * To take effect CTRL.RST is required.
1637         */
1638        switch (mac->type) {
1639        case e1000_i350:
1640        case e1000_82580:
1641                pba = rd32(E1000_RXPBS);
1642                pba = igb_rxpbs_adjust_82580(pba);
1643                break;
1644        case e1000_82576:
1645                pba = rd32(E1000_RXPBS);
1646                pba &= E1000_RXPBS_SIZE_MASK_82576;
1647                break;
1648        case e1000_82575:
1649        default:
1650                pba = E1000_PBA_34K;
1651                break;
1652        }
1653
1654        if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1655            (mac->type < e1000_82576)) {
1656                /* adjust PBA for jumbo frames */
1657                wr32(E1000_PBA, pba);
1658
1659                /* To maintain wire speed transmits, the Tx FIFO should be
1660                 * large enough to accommodate two full transmit packets,
1661                 * rounded up to the next 1KB and expressed in KB.  Likewise,
1662                 * the Rx FIFO should be large enough to accommodate at least
1663                 * one full receive packet and is similarly rounded up and
1664                 * expressed in KB. */
1665                pba = rd32(E1000_PBA);
1666                /* upper 16 bits has Tx packet buffer allocation size in KB */
1667                tx_space = pba >> 16;
1668                /* lower 16 bits has Rx packet buffer allocation size in KB */
1669                pba &= 0xffff;
1670                /* the tx fifo also stores 16 bytes of information about the tx
1671                 * but don't include ethernet FCS because hardware appends it */
1672                min_tx_space = (adapter->max_frame_size +
1673                                sizeof(union e1000_adv_tx_desc) -
1674                                ETH_FCS_LEN) * 2;
1675                min_tx_space = ALIGN(min_tx_space, 1024);
1676                min_tx_space >>= 10;
1677                /* software strips receive CRC, so leave room for it */
1678                min_rx_space = adapter->max_frame_size;
1679                min_rx_space = ALIGN(min_rx_space, 1024);
1680                min_rx_space >>= 10;
1681
1682                /* If current Tx allocation is less than the min Tx FIFO size,
1683                 * and the min Tx FIFO size is less than the current Rx FIFO
1684                 * allocation, take space away from current Rx allocation */
1685                if (tx_space < min_tx_space &&
1686                    ((min_tx_space - tx_space) < pba)) {
1687                        pba = pba - (min_tx_space - tx_space);
1688
1689                        /* if short on rx space, rx wins and must trump tx
1690                         * adjustment */
1691                        if (pba < min_rx_space)
1692                                pba = min_rx_space;
1693                }
1694                wr32(E1000_PBA, pba);
1695        }
1696
1697        /* flow control settings */
1698        /* The high water mark must be low enough to fit one full frame
1699         * (or the size used for early receive) above it in the Rx FIFO.
1700         * Set it to the lower of:
1701         * - 90% of the Rx FIFO size, or
1702         * - the full Rx FIFO size minus one full frame */
1703        hwm = min(((pba << 10) * 9 / 10),
1704                        ((pba << 10) - 2 * adapter->max_frame_size));
1705
1706        fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1707        fc->low_water = fc->high_water - 16;
1708        fc->pause_time = 0xFFFF;
1709        fc->send_xon = 1;
1710        fc->current_mode = fc->requested_mode;
1711
1712        /* disable receive for all VFs and wait one second */
1713        if (adapter->vfs_allocated_count) {
1714                int i;
1715                for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1716                        adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1717
1718                /* ping all the active vfs to let them know we are going down */
1719                igb_ping_all_vfs(adapter);
1720
1721                /* disable transmits and receives */
1722                wr32(E1000_VFRE, 0);
1723                wr32(E1000_VFTE, 0);
1724        }
1725
1726        /* Allow time for pending master requests to run */
1727        hw->mac.ops.reset_hw(hw);
1728        wr32(E1000_WUC, 0);
1729
1730        if (hw->mac.ops.init_hw(hw))
1731                dev_err(&pdev->dev, "Hardware Error\n");
1732
1733        igb_init_dmac(adapter, pba);
1734        if (!netif_running(adapter->netdev))
1735                igb_power_down_link(adapter);
1736
1737        igb_update_mng_vlan(adapter);
1738
1739        /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740        wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1741
1742        igb_get_phy_info(hw);
1743}
1744
1745static u32 igb_fix_features(struct net_device *netdev, u32 features)
1746{
1747        /*
1748         * Since there is no support for separate rx/tx vlan accel
1749         * enable/disable make sure tx flag is always in same state as rx.
1750         */
1751        if (features & NETIF_F_HW_VLAN_RX)
1752                features |= NETIF_F_HW_VLAN_TX;
1753        else
1754                features &= ~NETIF_F_HW_VLAN_TX;
1755
1756        return features;
1757}
1758
1759static int igb_set_features(struct net_device *netdev, u32 features)
1760{
1761        u32 changed = netdev->features ^ features;
1762
1763        if (changed & NETIF_F_HW_VLAN_RX)
1764                igb_vlan_mode(netdev, features);
1765
1766        return 0;
1767}
1768
1769static const struct net_device_ops igb_netdev_ops = {
1770        .ndo_open               = igb_open,
1771        .ndo_stop               = igb_close,
1772        .ndo_start_xmit         = igb_xmit_frame,
1773        .ndo_get_stats64        = igb_get_stats64,
1774        .ndo_set_rx_mode        = igb_set_rx_mode,
1775        .ndo_set_mac_address    = igb_set_mac,
1776        .ndo_change_mtu         = igb_change_mtu,
1777        .ndo_do_ioctl           = igb_ioctl,
1778        .ndo_tx_timeout         = igb_tx_timeout,
1779        .ndo_validate_addr      = eth_validate_addr,
1780        .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1781        .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1782        .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1783        .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1784        .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1785        .ndo_get_vf_config      = igb_ndo_get_vf_config,
1786#ifdef CONFIG_NET_POLL_CONTROLLER
1787        .ndo_poll_controller    = igb_netpoll,
1788#endif
1789        .ndo_fix_features       = igb_fix_features,
1790        .ndo_set_features       = igb_set_features,
1791};
1792
1793/**
1794 * igb_probe - Device Initialization Routine
1795 * @pdev: PCI device information struct
1796 * @ent: entry in igb_pci_tbl
1797 *
1798 * Returns 0 on success, negative on failure
1799 *
1800 * igb_probe initializes an adapter identified by a pci_dev structure.
1801 * The OS initialization, configuring of the adapter private structure,
1802 * and a hardware reset occur.
1803 **/
1804static int __devinit igb_probe(struct pci_dev *pdev,
1805                               const struct pci_device_id *ent)
1806{
1807        struct net_device *netdev;
1808        struct igb_adapter *adapter;
1809        struct e1000_hw *hw;
1810        u16 eeprom_data = 0;
1811        s32 ret_val;
1812        static int global_quad_port_a; /* global quad port a indication */
1813        const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1814        unsigned long mmio_start, mmio_len;
1815        int err, pci_using_dac;
1816        u16 eeprom_apme_mask = IGB_EEPROM_APME;
1817        u8 part_str[E1000_PBANUM_LENGTH];
1818
1819        /* Catch broken hardware that put the wrong VF device ID in
1820         * the PCIe SR-IOV capability.
1821         */
1822        if (pdev->is_virtfn) {
1823                WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1824                     pci_name(pdev), pdev->vendor, pdev->device);
1825                return -EINVAL;
1826        }
1827
1828        err = pci_enable_device_mem(pdev);
1829        if (err)
1830                return err;
1831
1832        pci_using_dac = 0;
1833        err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1834        if (!err) {
1835                err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1836                if (!err)
1837                        pci_using_dac = 1;
1838        } else {
1839                err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1840                if (err) {
1841                        err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1842                        if (err) {
1843                                dev_err(&pdev->dev, "No usable DMA "
1844                                        "configuration, aborting\n");
1845                                goto err_dma;
1846                        }
1847                }
1848        }
1849
1850        err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1851                                           IORESOURCE_MEM),
1852                                           igb_driver_name);
1853        if (err)
1854                goto err_pci_reg;
1855
1856        pci_enable_pcie_error_reporting(pdev);
1857
1858        pci_set_master(pdev);
1859        pci_save_state(pdev);
1860
1861        err = -ENOMEM;
1862        netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1863                                   IGB_MAX_TX_QUEUES);
1864        if (!netdev)
1865                goto err_alloc_etherdev;
1866
1867        SET_NETDEV_DEV(netdev, &pdev->dev);
1868
1869        pci_set_drvdata(pdev, netdev);
1870        adapter = netdev_priv(netdev);
1871        adapter->netdev = netdev;
1872        adapter->pdev = pdev;
1873        hw = &adapter->hw;
1874        hw->back = adapter;
1875        adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1876
1877        mmio_start = pci_resource_start(pdev, 0);
1878        mmio_len = pci_resource_len(pdev, 0);
1879
1880        err = -EIO;
1881        hw->hw_addr = ioremap(mmio_start, mmio_len);
1882        if (!hw->hw_addr)
1883                goto err_ioremap;
1884
1885        netdev->netdev_ops = &igb_netdev_ops;
1886        igb_set_ethtool_ops(netdev);
1887        netdev->watchdog_timeo = 5 * HZ;
1888
1889        strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1890
1891        netdev->mem_start = mmio_start;
1892        netdev->mem_end = mmio_start + mmio_len;
1893
1894        /* PCI config space info */
1895        hw->vendor_id = pdev->vendor;
1896        hw->device_id = pdev->device;
1897        hw->revision_id = pdev->revision;
1898        hw->subsystem_vendor_id = pdev->subsystem_vendor;
1899        hw->subsystem_device_id = pdev->subsystem_device;
1900
1901        /* Copy the default MAC, PHY and NVM function pointers */
1902        memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1903        memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1904        memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1905        /* Initialize skew-specific constants */
1906        err = ei->get_invariants(hw);
1907        if (err)
1908                goto err_sw_init;
1909
1910        /* setup the private structure */
1911        err = igb_sw_init(adapter);
1912        if (err)
1913                goto err_sw_init;
1914
1915        igb_get_bus_info_pcie(hw);
1916
1917        hw->phy.autoneg_wait_to_complete = false;
1918
1919        /* Copper options */
1920        if (hw->phy.media_type == e1000_media_type_copper) {
1921                hw->phy.mdix = AUTO_ALL_MODES;
1922                hw->phy.disable_polarity_correction = false;
1923                hw->phy.ms_type = e1000_ms_hw_default;
1924        }
1925
1926        if (igb_check_reset_block(hw))
1927                dev_info(&pdev->dev,
1928                        "PHY reset is blocked due to SOL/IDER session.\n");
1929
1930        /*
1931         * features is initialized to 0 in allocation, it might have bits
1932         * set by igb_sw_init so we should use an or instead of an
1933         * assignment.
1934         */
1935        netdev->features |= NETIF_F_SG |
1936                            NETIF_F_IP_CSUM |
1937                            NETIF_F_IPV6_CSUM |
1938                            NETIF_F_TSO |
1939                            NETIF_F_TSO6 |
1940                            NETIF_F_RXHASH |
1941                            NETIF_F_RXCSUM |
1942                            NETIF_F_HW_VLAN_RX |
1943                            NETIF_F_HW_VLAN_TX;
1944
1945        /* copy netdev features into list of user selectable features */
1946        netdev->hw_features |= netdev->features;
1947
1948        /* set this bit last since it cannot be part of hw_features */
1949        netdev->features |= NETIF_F_HW_VLAN_FILTER;
1950
1951        netdev->vlan_features |= NETIF_F_TSO |
1952                                 NETIF_F_TSO6 |
1953                                 NETIF_F_IP_CSUM |
1954                                 NETIF_F_IPV6_CSUM |
1955                                 NETIF_F_SG;
1956
1957        if (pci_using_dac) {
1958                netdev->features |= NETIF_F_HIGHDMA;
1959                netdev->vlan_features |= NETIF_F_HIGHDMA;
1960        }
1961
1962        if (hw->mac.type >= e1000_82576) {
1963                netdev->hw_features |= NETIF_F_SCTP_CSUM;
1964                netdev->features |= NETIF_F_SCTP_CSUM;
1965        }
1966
1967        netdev->priv_flags |= IFF_UNICAST_FLT;
1968
1969        adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1970
1971        /* before reading the NVM, reset the controller to put the device in a
1972         * known good starting state */
1973        hw->mac.ops.reset_hw(hw);
1974
1975        /* make sure the NVM is good */
1976        if (hw->nvm.ops.validate(hw) < 0) {
1977                dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1978                err = -EIO;
1979                goto err_eeprom;
1980        }
1981
1982        /* copy the MAC address out of the NVM */
1983        if (hw->mac.ops.read_mac_addr(hw))
1984                dev_err(&pdev->dev, "NVM Read Error\n");
1985
1986        memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1987        memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1988
1989        if (!is_valid_ether_addr(netdev->perm_addr)) {
1990                dev_err(&pdev->dev, "Invalid MAC Address\n");
1991                err = -EIO;
1992                goto err_eeprom;
1993        }
1994
1995        setup_timer(&adapter->watchdog_timer, igb_watchdog,
1996                    (unsigned long) adapter);
1997        setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1998                    (unsigned long) adapter);
1999
2000        INIT_WORK(&adapter->reset_task, igb_reset_task);
2001        INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2002
2003        /* Initialize link properties that are user-changeable */
2004        adapter->fc_autoneg = true;
2005        hw->mac.autoneg = true;
2006        hw->phy.autoneg_advertised = 0x2f;
2007
2008        hw->fc.requested_mode = e1000_fc_default;
2009        hw->fc.current_mode = e1000_fc_default;
2010
2011        igb_validate_mdi_setting(hw);
2012
2013        /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2014         * enable the ACPI Magic Packet filter
2015         */
2016
2017        if (hw->bus.func == 0)
2018                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2019        else if (hw->mac.type >= e1000_82580)
2020                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2021                                 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2022                                 &eeprom_data);
2023        else if (hw->bus.func == 1)
2024                hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2025
2026        if (eeprom_data & eeprom_apme_mask)
2027                adapter->eeprom_wol |= E1000_WUFC_MAG;
2028
2029        /* now that we have the eeprom settings, apply the special cases where
2030         * the eeprom may be wrong or the board simply won't support wake on
2031         * lan on a particular port */
2032        switch (pdev->device) {
2033        case E1000_DEV_ID_82575GB_QUAD_COPPER:
2034                adapter->eeprom_wol = 0;
2035                break;
2036        case E1000_DEV_ID_82575EB_FIBER_SERDES:
2037        case E1000_DEV_ID_82576_FIBER:
2038        case E1000_DEV_ID_82576_SERDES:
2039                /* Wake events only supported on port A for dual fiber
2040                 * regardless of eeprom setting */
2041                if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2042                        adapter->eeprom_wol = 0;
2043                break;
2044        case E1000_DEV_ID_82576_QUAD_COPPER:
2045        case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2046                /* if quad port adapter, disable WoL on all but port A */
2047                if (global_quad_port_a != 0)
2048                        adapter->eeprom_wol = 0;
2049                else
2050                        adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2051                /* Reset for multiple quad port adapters */
2052                if (++global_quad_port_a == 4)
2053                        global_quad_port_a = 0;
2054                break;
2055        }
2056
2057        /* initialize the wol settings based on the eeprom settings */
2058        adapter->wol = adapter->eeprom_wol;
2059        device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2060
2061        /* reset the hardware with the new settings */
2062        igb_reset(adapter);
2063
2064        /* let the f/w know that the h/w is now under the control of the
2065         * driver. */
2066        igb_get_hw_control(adapter);
2067
2068        strcpy(netdev->name, "eth%d");
2069        err = register_netdev(netdev);
2070        if (err)
2071                goto err_register;
2072
2073        /* carrier off reporting is important to ethtool even BEFORE open */
2074        netif_carrier_off(netdev);
2075
2076#ifdef CONFIG_IGB_DCA
2077        if (dca_add_requester(&pdev->dev) == 0) {
2078                adapter->flags |= IGB_FLAG_DCA_ENABLED;
2079                dev_info(&pdev->dev, "DCA enabled\n");
2080                igb_setup_dca(adapter);
2081        }
2082
2083#endif
2084        /* do hw tstamp init after resetting */
2085        igb_init_hw_timer(adapter);
2086
2087        dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2088        /* print bus type/speed/width info */
2089        dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2090                 netdev->name,
2091                 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2092                  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2093                                                            "unknown"),
2094                 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2095                  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2096                  (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2097                   "unknown"),
2098                 netdev->dev_addr);
2099
2100        ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2101        if (ret_val)
2102                strcpy(part_str, "Unknown");
2103        dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2104        dev_info(&pdev->dev,
2105                "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2106                adapter->msix_entries ? "MSI-X" :
2107                (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2108                adapter->num_rx_queues, adapter->num_tx_queues);
2109        switch (hw->mac.type) {
2110        case e1000_i350:
2111                igb_set_eee_i350(hw);
2112                break;
2113        default:
2114                break;
2115        }
2116        return 0;
2117
2118err_register:
2119        igb_release_hw_control(adapter);
2120err_eeprom:
2121        if (!igb_check_reset_block(hw))
2122                igb_reset_phy(hw);
2123
2124        if (hw->flash_address)
2125                iounmap(hw->flash_address);
2126err_sw_init:
2127        igb_clear_interrupt_scheme(adapter);
2128        iounmap(hw->hw_addr);
2129err_ioremap:
2130        free_netdev(netdev);
2131err_alloc_etherdev:
2132        pci_release_selected_regions(pdev,
2133                                     pci_select_bars(pdev, IORESOURCE_MEM));
2134err_pci_reg:
2135err_dma:
2136        pci_disable_device(pdev);
2137        return err;
2138}
2139
2140/**
2141 * igb_remove - Device Removal Routine
2142 * @pdev: PCI device information struct
2143 *
2144 * igb_remove is called by the PCI subsystem to alert the driver
2145 * that it should release a PCI device.  The could be caused by a
2146 * Hot-Plug event, or because the driver is going to be removed from
2147 * memory.
2148 **/
2149static void __devexit igb_remove(struct pci_dev *pdev)
2150{
2151        struct net_device *netdev = pci_get_drvdata(pdev);
2152        struct igb_adapter *adapter = netdev_priv(netdev);
2153        struct e1000_hw *hw = &adapter->hw;
2154
2155        /*
2156         * The watchdog timer may be rescheduled, so explicitly
2157         * disable watchdog from being rescheduled.
2158         */
2159        set_bit(__IGB_DOWN, &adapter->state);
2160        del_timer_sync(&adapter->watchdog_timer);
2161        del_timer_sync(&adapter->phy_info_timer);
2162
2163        cancel_work_sync(&adapter->reset_task);
2164        cancel_work_sync(&adapter->watchdog_task);
2165
2166#ifdef CONFIG_IGB_DCA
2167        if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2168                dev_info(&pdev->dev, "DCA disabled\n");
2169                dca_remove_requester(&pdev->dev);
2170                adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2171                wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2172        }
2173#endif
2174
2175        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2176         * would have already happened in close and is redundant. */
2177        igb_release_hw_control(adapter);
2178
2179        unregister_netdev(netdev);
2180
2181        igb_clear_interrupt_scheme(adapter);
2182
2183#ifdef CONFIG_PCI_IOV
2184        /* reclaim resources allocated to VFs */
2185        if (adapter->vf_data) {
2186                /* disable iov and allow time for transactions to clear */
2187                if (!igb_check_vf_assignment(adapter)) {
2188                        pci_disable_sriov(pdev);
2189                        msleep(500);
2190                } else {
2191                        dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2192                }
2193
2194                kfree(adapter->vf_data);
2195                adapter->vf_data = NULL;
2196                wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2197                wrfl();
2198                msleep(100);
2199                dev_info(&pdev->dev, "IOV Disabled\n");
2200        }
2201#endif
2202
2203        iounmap(hw->hw_addr);
2204        if (hw->flash_address)
2205                iounmap(hw->flash_address);
2206        pci_release_selected_regions(pdev,
2207                                     pci_select_bars(pdev, IORESOURCE_MEM));
2208
2209        kfree(adapter->shadow_vfta);
2210        free_netdev(netdev);
2211
2212        pci_disable_pcie_error_reporting(pdev);
2213
2214        pci_disable_device(pdev);
2215}
2216
2217/**
2218 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2219 * @adapter: board private structure to initialize
2220 *
2221 * This function initializes the vf specific data storage and then attempts to
2222 * allocate the VFs.  The reason for ordering it this way is because it is much
2223 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2224 * the memory for the VFs.
2225 **/
2226static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2227{
2228#ifdef CONFIG_PCI_IOV
2229        struct pci_dev *pdev = adapter->pdev;
2230        int old_vfs = igb_find_enabled_vfs(adapter);
2231        int i;
2232
2233        if (old_vfs) {
2234                dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2235                         "max_vfs setting of %d\n", old_vfs, max_vfs);
2236                adapter->vfs_allocated_count = old_vfs;
2237        }
2238
2239        if (!adapter->vfs_allocated_count)
2240                return;
2241
2242        adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2243                                sizeof(struct vf_data_storage), GFP_KERNEL);
2244        /* if allocation failed then we do not support SR-IOV */
2245        if (!adapter->vf_data) {
2246                adapter->vfs_allocated_count = 0;
2247                dev_err(&pdev->dev, "Unable to allocate memory for VF "
2248                        "Data Storage\n");
2249                goto out;
2250        }
2251
2252        if (!old_vfs) {
2253                if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2254                        goto err_out;
2255        }
2256        dev_info(&pdev->dev, "%d VFs allocated\n",
2257                 adapter->vfs_allocated_count);
2258        for (i = 0; i < adapter->vfs_allocated_count; i++)
2259                igb_vf_configure(adapter, i);
2260
2261        /* DMA Coalescing is not supported in IOV mode. */
2262        adapter->flags &= ~IGB_FLAG_DMAC;
2263        goto out;
2264err_out:
2265        kfree(adapter->vf_data);
2266        adapter->vf_data = NULL;
2267        adapter->vfs_allocated_count = 0;
2268out:
2269        return;
2270#endif /* CONFIG_PCI_IOV */
2271}
2272
2273/**
2274 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2275 * @adapter: board private structure to initialize
2276 *
2277 * igb_init_hw_timer initializes the function pointer and values for the hw
2278 * timer found in hardware.
2279 **/
2280static void igb_init_hw_timer(struct igb_adapter *adapter)
2281{
2282        struct e1000_hw *hw = &adapter->hw;
2283
2284        switch (hw->mac.type) {
2285        case e1000_i350:
2286        case e1000_82580:
2287                memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2288                adapter->cycles.read = igb_read_clock;
2289                adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2290                adapter->cycles.mult = 1;
2291                /*
2292                 * The 82580 timesync updates the system timer every 8ns by 8ns
2293                 * and the value cannot be shifted.  Instead we need to shift
2294                 * the registers to generate a 64bit timer value.  As a result
2295                 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2296                 * 24 in order to generate a larger value for synchronization.
2297                 */
2298                adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2299                /* disable system timer temporarily by setting bit 31 */
2300                wr32(E1000_TSAUXC, 0x80000000);
2301                wrfl();
2302
2303                /* Set registers so that rollover occurs soon to test this. */
2304                wr32(E1000_SYSTIMR, 0x00000000);
2305                wr32(E1000_SYSTIML, 0x80000000);
2306                wr32(E1000_SYSTIMH, 0x000000FF);
2307                wrfl();
2308
2309                /* enable system timer by clearing bit 31 */
2310                wr32(E1000_TSAUXC, 0x0);
2311                wrfl();
2312
2313                timecounter_init(&adapter->clock,
2314                                 &adapter->cycles,
2315                                 ktime_to_ns(ktime_get_real()));
2316                /*
2317                 * Synchronize our NIC clock against system wall clock. NIC
2318                 * time stamp reading requires ~3us per sample, each sample
2319                 * was pretty stable even under load => only require 10
2320                 * samples for each offset comparison.
2321                 */
2322                memset(&adapter->compare, 0, sizeof(adapter->compare));
2323                adapter->compare.source = &adapter->clock;
2324                adapter->compare.target = ktime_get_real;
2325                adapter->compare.num_samples = 10;
2326                timecompare_update(&adapter->compare, 0);
2327                break;
2328        case e1000_82576:
2329                /*
2330                 * Initialize hardware timer: we keep it running just in case
2331                 * that some program needs it later on.
2332                 */
2333                memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2334                adapter->cycles.read = igb_read_clock;
2335                adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2336                adapter->cycles.mult = 1;
2337                /**
2338                 * Scale the NIC clock cycle by a large factor so that
2339                 * relatively small clock corrections can be added or
2340                 * subtracted at each clock tick. The drawbacks of a large
2341                 * factor are a) that the clock register overflows more quickly
2342                 * (not such a big deal) and b) that the increment per tick has
2343                 * to fit into 24 bits.  As a result we need to use a shift of
2344                 * 19 so we can fit a value of 16 into the TIMINCA register.
2345                 */
2346                adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2347                wr32(E1000_TIMINCA,
2348                                (1 << E1000_TIMINCA_16NS_SHIFT) |
2349                                (16 << IGB_82576_TSYNC_SHIFT));
2350
2351                /* Set registers so that rollover occurs soon to test this. */
2352                wr32(E1000_SYSTIML, 0x00000000);
2353                wr32(E1000_SYSTIMH, 0xFF800000);
2354                wrfl();
2355
2356                timecounter_init(&adapter->clock,
2357                                 &adapter->cycles,
2358                                 ktime_to_ns(ktime_get_real()));
2359                /*
2360                 * Synchronize our NIC clock against system wall clock. NIC
2361                 * time stamp reading requires ~3us per sample, each sample
2362                 * was pretty stable even under load => only require 10
2363                 * samples for each offset comparison.
2364                 */
2365                memset(&adapter->compare, 0, sizeof(adapter->compare));
2366                adapter->compare.source = &adapter->clock;
2367                adapter->compare.target = ktime_get_real;
2368                adapter->compare.num_samples = 10;
2369                timecompare_update(&adapter->compare, 0);
2370                break;
2371        case e1000_82575:
2372                /* 82575 does not support timesync */
2373        default:
2374                break;
2375        }
2376
2377}
2378
2379/**
2380 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2381 * @adapter: board private structure to initialize
2382 *
2383 * igb_sw_init initializes the Adapter private data structure.
2384 * Fields are initialized based on PCI device information and
2385 * OS network device settings (MTU size).
2386 **/
2387static int __devinit igb_sw_init(struct igb_adapter *adapter)
2388{
2389        struct e1000_hw *hw = &adapter->hw;
2390        struct net_device *netdev = adapter->netdev;
2391        struct pci_dev *pdev = adapter->pdev;
2392
2393        pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2394
2395        /* set default ring sizes */
2396        adapter->tx_ring_count = IGB_DEFAULT_TXD;
2397        adapter->rx_ring_count = IGB_DEFAULT_RXD;
2398
2399        /* set default ITR values */
2400        adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2401        adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2402
2403        /* set default work limits */
2404        adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2405
2406        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2407                                  VLAN_HLEN;
2408        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2409
2410        adapter->node = -1;
2411
2412        spin_lock_init(&adapter->stats64_lock);
2413#ifdef CONFIG_PCI_IOV
2414        switch (hw->mac.type) {
2415        case e1000_82576:
2416        case e1000_i350:
2417                if (max_vfs > 7) {
2418                        dev_warn(&pdev->dev,
2419                                 "Maximum of 7 VFs per PF, using max\n");
2420                        adapter->vfs_allocated_count = 7;
2421                } else
2422                        adapter->vfs_allocated_count = max_vfs;
2423                break;
2424        default:
2425                break;
2426        }
2427#endif /* CONFIG_PCI_IOV */
2428        adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2429        /* i350 cannot do RSS and SR-IOV at the same time */
2430        if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2431                adapter->rss_queues = 1;
2432
2433        /*
2434         * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435         * then we should combine the queues into a queue pair in order to
2436         * conserve interrupts due to limited supply
2437         */
2438        if ((adapter->rss_queues > 4) ||
2439            ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2440                adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2441
2442        /* Setup and initialize a copy of the hw vlan table array */
2443        adapter->shadow_vfta = kzalloc(sizeof(u32) *
2444                                E1000_VLAN_FILTER_TBL_SIZE,
2445                                GFP_ATOMIC);
2446
2447        /* This call may decrease the number of queues */
2448        if (igb_init_interrupt_scheme(adapter)) {
2449                dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2450                return -ENOMEM;
2451        }
2452
2453        igb_probe_vfs(adapter);
2454
2455        /* Explicitly disable IRQ since the NIC can be in any state. */
2456        igb_irq_disable(adapter);
2457
2458        if (hw->mac.type == e1000_i350)
2459                adapter->flags &= ~IGB_FLAG_DMAC;
2460
2461        set_bit(__IGB_DOWN, &adapter->state);
2462        return 0;
2463}
2464
2465/**
2466 * igb_open - Called when a network interface is made active
2467 * @netdev: network interface device structure
2468 *
2469 * Returns 0 on success, negative value on failure
2470 *
2471 * The open entry point is called when a network interface is made
2472 * active by the system (IFF_UP).  At this point all resources needed
2473 * for transmit and receive operations are allocated, the interrupt
2474 * handler is registered with the OS, the watchdog timer is started,
2475 * and the stack is notified that the interface is ready.
2476 **/
2477static int igb_open(struct net_device *netdev)
2478{
2479        struct igb_adapter *adapter = netdev_priv(netdev);
2480        struct e1000_hw *hw = &adapter->hw;
2481        int err;
2482        int i;
2483
2484        /* disallow open during test */
2485        if (test_bit(__IGB_TESTING, &adapter->state))
2486                return -EBUSY;
2487
2488        netif_carrier_off(netdev);
2489
2490        /* allocate transmit descriptors */
2491        err = igb_setup_all_tx_resources(adapter);
2492        if (err)
2493                goto err_setup_tx;
2494
2495        /* allocate receive descriptors */
2496        err = igb_setup_all_rx_resources(adapter);
2497        if (err)
2498                goto err_setup_rx;
2499
2500        igb_power_up_link(adapter);
2501
2502        /* before we allocate an interrupt, we must be ready to handle it.
2503         * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2504         * as soon as we call pci_request_irq, so we have to setup our
2505         * clean_rx handler before we do so.  */
2506        igb_configure(adapter);
2507
2508        err = igb_request_irq(adapter);
2509        if (err)
2510                goto err_req_irq;
2511
2512        /* From here on the code is the same as igb_up() */
2513        clear_bit(__IGB_DOWN, &adapter->state);
2514
2515        for (i = 0; i < adapter->num_q_vectors; i++)
2516                napi_enable(&(adapter->q_vector[i]->napi));
2517
2518        /* Clear any pending interrupts. */
2519        rd32(E1000_ICR);
2520
2521        igb_irq_enable(adapter);
2522
2523        /* notify VFs that reset has been completed */
2524        if (adapter->vfs_allocated_count) {
2525                u32 reg_data = rd32(E1000_CTRL_EXT);
2526                reg_data |= E1000_CTRL_EXT_PFRSTD;
2527                wr32(E1000_CTRL_EXT, reg_data);
2528        }
2529
2530        netif_tx_start_all_queues(netdev);
2531
2532        /* start the watchdog. */
2533        hw->mac.get_link_status = 1;
2534        schedule_work(&adapter->watchdog_task);
2535
2536        return 0;
2537
2538err_req_irq:
2539        igb_release_hw_control(adapter);
2540        igb_power_down_link(adapter);
2541        igb_free_all_rx_resources(adapter);
2542err_setup_rx:
2543        igb_free_all_tx_resources(adapter);
2544err_setup_tx:
2545        igb_reset(adapter);
2546
2547        return err;
2548}
2549
2550/**
2551 * igb_close - Disables a network interface
2552 * @netdev: network interface device structure
2553 *
2554 * Returns 0, this is not allowed to fail
2555 *
2556 * The close entry point is called when an interface is de-activated
2557 * by the OS.  The hardware is still under the driver's control, but
2558 * needs to be disabled.  A global MAC reset is issued to stop the
2559 * hardware, and all transmit and receive resources are freed.
2560 **/
2561static int igb_close(struct net_device *netdev)
2562{
2563        struct igb_adapter *adapter = netdev_priv(netdev);
2564
2565        WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2566        igb_down(adapter);
2567
2568        igb_free_irq(adapter);
2569
2570        igb_free_all_tx_resources(adapter);
2571        igb_free_all_rx_resources(adapter);
2572
2573        return 0;
2574}
2575
2576/**
2577 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2578 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2579 *
2580 * Return 0 on success, negative on failure
2581 **/
2582int igb_setup_tx_resources(struct igb_ring *tx_ring)
2583{
2584        struct device *dev = tx_ring->dev;
2585        int orig_node = dev_to_node(dev);
2586        int size;
2587
2588        size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2589        tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2590        if (!tx_ring->tx_buffer_info)
2591                tx_ring->tx_buffer_info = vzalloc(size);
2592        if (!tx_ring->tx_buffer_info)
2593                goto err;
2594
2595        /* round up to nearest 4K */
2596        tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2597        tx_ring->size = ALIGN(tx_ring->size, 4096);
2598
2599        set_dev_node(dev, tx_ring->numa_node);
2600        tx_ring->desc = dma_alloc_coherent(dev,
2601                                           tx_ring->size,
2602                                           &tx_ring->dma,
2603                                           GFP_KERNEL);
2604        set_dev_node(dev, orig_node);
2605        if (!tx_ring->desc)
2606                tx_ring->desc = dma_alloc_coherent(dev,
2607                                                   tx_ring->size,
2608                                                   &tx_ring->dma,
2609                                                   GFP_KERNEL);
2610
2611        if (!tx_ring->desc)
2612                goto err;
2613
2614        tx_ring->next_to_use = 0;
2615        tx_ring->next_to_clean = 0;
2616
2617        return 0;
2618
2619err:
2620        vfree(tx_ring->tx_buffer_info);
2621        dev_err(dev,
2622                "Unable to allocate memory for the transmit descriptor ring\n");
2623        return -ENOMEM;
2624}
2625
2626/**
2627 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2628 *                                (Descriptors) for all queues
2629 * @adapter: board private structure
2630 *
2631 * Return 0 on success, negative on failure
2632 **/
2633static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2634{
2635        struct pci_dev *pdev = adapter->pdev;
2636        int i, err = 0;
2637
2638        for (i = 0; i < adapter->num_tx_queues; i++) {
2639                err = igb_setup_tx_resources(adapter->tx_ring[i]);
2640                if (err) {
2641                        dev_err(&pdev->dev,
2642                                "Allocation for Tx Queue %u failed\n", i);
2643                        for (i--; i >= 0; i--)
2644                                igb_free_tx_resources(adapter->tx_ring[i]);
2645                        break;
2646                }
2647        }
2648
2649        return err;
2650}
2651
2652/**
2653 * igb_setup_tctl - configure the transmit control registers
2654 * @adapter: Board private structure
2655 **/
2656void igb_setup_tctl(struct igb_adapter *adapter)
2657{
2658        struct e1000_hw *hw = &adapter->hw;
2659        u32 tctl;
2660
2661        /* disable queue 0 which is enabled by default on 82575 and 82576 */
2662        wr32(E1000_TXDCTL(0), 0);
2663
2664        /* Program the Transmit Control Register */
2665        tctl = rd32(E1000_TCTL);
2666        tctl &= ~E1000_TCTL_CT;
2667        tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2668                (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2669
2670        igb_config_collision_dist(hw);
2671
2672        /* Enable transmits */
2673        tctl |= E1000_TCTL_EN;
2674
2675        wr32(E1000_TCTL, tctl);
2676}
2677
2678/**
2679 * igb_configure_tx_ring - Configure transmit ring after Reset
2680 * @adapter: board private structure
2681 * @ring: tx ring to configure
2682 *
2683 * Configure a transmit ring after a reset.
2684 **/
2685void igb_configure_tx_ring(struct igb_adapter *adapter,
2686                           struct igb_ring *ring)
2687{
2688        struct e1000_hw *hw = &adapter->hw;
2689        u32 txdctl = 0;
2690        u64 tdba = ring->dma;
2691        int reg_idx = ring->reg_idx;
2692
2693        /* disable the queue */
2694        wr32(E1000_TXDCTL(reg_idx), 0);
2695        wrfl();
2696        mdelay(10);
2697
2698        wr32(E1000_TDLEN(reg_idx),
2699                        ring->count * sizeof(union e1000_adv_tx_desc));
2700        wr32(E1000_TDBAL(reg_idx),
2701                        tdba & 0x00000000ffffffffULL);
2702        wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2703
2704        ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2705        wr32(E1000_TDH(reg_idx), 0);
2706        writel(0, ring->tail);
2707
2708        txdctl |= IGB_TX_PTHRESH;
2709        txdctl |= IGB_TX_HTHRESH << 8;
2710        txdctl |= IGB_TX_WTHRESH << 16;
2711
2712        txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2713        wr32(E1000_TXDCTL(reg_idx), txdctl);
2714}
2715
2716/**
2717 * igb_configure_tx - Configure transmit Unit after Reset
2718 * @adapter: board private structure
2719 *
2720 * Configure the Tx unit of the MAC after a reset.
2721 **/
2722static void igb_configure_tx(struct igb_adapter *adapter)
2723{
2724        int i;
2725
2726        for (i = 0; i < adapter->num_tx_queues; i++)
2727                igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2728}
2729
2730/**
2731 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2732 * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2733 *
2734 * Returns 0 on success, negative on failure
2735 **/
2736int igb_setup_rx_resources(struct igb_ring *rx_ring)
2737{
2738        struct device *dev = rx_ring->dev;
2739        int orig_node = dev_to_node(dev);
2740        int size, desc_len;
2741
2742        size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2743        rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2744        if (!rx_ring->rx_buffer_info)
2745                rx_ring->rx_buffer_info = vzalloc(size);
2746        if (!rx_ring->rx_buffer_info)
2747                goto err;
2748
2749        desc_len = sizeof(union e1000_adv_rx_desc);
2750
2751        /* Round up to nearest 4K */
2752        rx_ring->size = rx_ring->count * desc_len;
2753        rx_ring->size = ALIGN(rx_ring->size, 4096);
2754
2755        set_dev_node(dev, rx_ring->numa_node);
2756        rx_ring->desc = dma_alloc_coherent(dev,
2757                                           rx_ring->size,
2758                                           &rx_ring->dma,
2759                                           GFP_KERNEL);
2760        set_dev_node(dev, orig_node);
2761        if (!rx_ring->desc)
2762                rx_ring->desc = dma_alloc_coherent(dev,
2763                                                   rx_ring->size,
2764                                                   &rx_ring->dma,
2765                                                   GFP_KERNEL);
2766
2767        if (!rx_ring->desc)
2768                goto err;
2769
2770        rx_ring->next_to_clean = 0;
2771        rx_ring->next_to_use = 0;
2772
2773        return 0;
2774
2775err:
2776        vfree(rx_ring->rx_buffer_info);
2777        rx_ring->rx_buffer_info = NULL;
2778        dev_err(dev, "Unable to allocate memory for the receive descriptor"
2779                " ring\n");
2780        return -ENOMEM;
2781}
2782
2783/**
2784 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2785 *                                (Descriptors) for all queues
2786 * @adapter: board private structure
2787 *
2788 * Return 0 on success, negative on failure
2789 **/
2790static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2791{
2792        struct pci_dev *pdev = adapter->pdev;
2793        int i, err = 0;
2794
2795        for (i = 0; i < adapter->num_rx_queues; i++) {
2796                err = igb_setup_rx_resources(adapter->rx_ring[i]);
2797                if (err) {
2798                        dev_err(&pdev->dev,
2799                                "Allocation for Rx Queue %u failed\n", i);
2800                        for (i--; i >= 0; i--)
2801                                igb_free_rx_resources(adapter->rx_ring[i]);
2802                        break;
2803                }
2804        }
2805
2806        return err;
2807}
2808
2809/**
2810 * igb_setup_mrqc - configure the multiple receive queue control registers
2811 * @adapter: Board private structure
2812 **/
2813static void igb_setup_mrqc(struct igb_adapter *adapter)
2814{
2815        struct e1000_hw *hw = &adapter->hw;
2816        u32 mrqc, rxcsum;
2817        u32 j, num_rx_queues, shift = 0, shift2 = 0;
2818        union e1000_reta {
2819                u32 dword;
2820                u8  bytes[4];
2821        } reta;
2822        static const u8 rsshash[40] = {
2823                0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2824                0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2825                0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2826                0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2827
2828        /* Fill out hash function seeds */
2829        for (j = 0; j < 10; j++) {
2830                u32 rsskey = rsshash[(j * 4)];
2831                rsskey |= rsshash[(j * 4) + 1] << 8;
2832                rsskey |= rsshash[(j * 4) + 2] << 16;
2833                rsskey |= rsshash[(j * 4) + 3] << 24;
2834                array_wr32(E1000_RSSRK(0), j, rsskey);
2835        }
2836
2837        num_rx_queues = adapter->rss_queues;
2838
2839        if (adapter->vfs_allocated_count) {
2840                /* 82575 and 82576 supports 2 RSS queues for VMDq */
2841                switch (hw->mac.type) {
2842                case e1000_i350:
2843                case e1000_82580:
2844                        num_rx_queues = 1;
2845                        shift = 0;
2846                        break;
2847                case e1000_82576:
2848                        shift = 3;
2849                        num_rx_queues = 2;
2850                        break;
2851                case e1000_82575:
2852                        shift = 2;
2853                        shift2 = 6;
2854                default:
2855                        break;
2856                }
2857        } else {
2858                if (hw->mac.type == e1000_82575)
2859                        shift = 6;
2860        }
2861
2862        for (j = 0; j < (32 * 4); j++) {
2863                reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2864                if (shift2)
2865                        reta.bytes[j & 3] |= num_rx_queues << shift2;
2866                if ((j & 3) == 3)
2867                        wr32(E1000_RETA(j >> 2), reta.dword);
2868        }
2869
2870        /*
2871         * Disable raw packet checksumming so that RSS hash is placed in
2872         * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2873         * offloads as they are enabled by default
2874         */
2875        rxcsum = rd32(E1000_RXCSUM);
2876        rxcsum |= E1000_RXCSUM_PCSD;
2877
2878        if (adapter->hw.mac.type >= e1000_82576)
2879                /* Enable Receive Checksum Offload for SCTP */
2880                rxcsum |= E1000_RXCSUM_CRCOFL;
2881
2882        /* Don't need to set TUOFL or IPOFL, they default to 1 */
2883        wr32(E1000_RXCSUM, rxcsum);
2884
2885        /* If VMDq is enabled then we set the appropriate mode for that, else
2886         * we default to RSS so that an RSS hash is calculated per packet even
2887         * if we are only using one queue */
2888        if (adapter->vfs_allocated_count) {
2889                if (hw->mac.type > e1000_82575) {
2890                        /* Set the default pool for the PF's first queue */
2891                        u32 vtctl = rd32(E1000_VT_CTL);
2892                        vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2893                                   E1000_VT_CTL_DISABLE_DEF_POOL);
2894                        vtctl |= adapter->vfs_allocated_count <<
2895                                E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2896                        wr32(E1000_VT_CTL, vtctl);
2897                }
2898                if (adapter->rss_queues > 1)
2899                        mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2900                else
2901                        mrqc = E1000_MRQC_ENABLE_VMDQ;
2902        } else {
2903                mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2904        }
2905        igb_vmm_control(adapter);
2906
2907        /*
2908         * Generate RSS hash based on TCP port numbers and/or
2909         * IPv4/v6 src and dst addresses since UDP cannot be
2910         * hashed reliably due to IP fragmentation
2911         */
2912        mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2913                E1000_MRQC_RSS_FIELD_IPV4_TCP |
2914                E1000_MRQC_RSS_FIELD_IPV6 |
2915                E1000_MRQC_RSS_FIELD_IPV6_TCP |
2916                E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2917
2918        wr32(E1000_MRQC, mrqc);
2919}
2920
2921/**
2922 * igb_setup_rctl - configure the receive control registers
2923 * @adapter: Board private structure
2924 **/
2925void igb_setup_rctl(struct igb_adapter *adapter)
2926{
2927        struct e1000_hw *hw = &adapter->hw;
2928        u32 rctl;
2929
2930        rctl = rd32(E1000_RCTL);
2931
2932        rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2933        rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2934
2935        rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2936                (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2937
2938        /*
2939         * enable stripping of CRC. It's unlikely this will break BMC
2940         * redirection as it did with e1000. Newer features require
2941         * that the HW strips the CRC.
2942         */
2943        rctl |= E1000_RCTL_SECRC;
2944
2945        /* disable store bad packets and clear size bits. */
2946        rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2947
2948        /* enable LPE to prevent packets larger than max_frame_size */
2949        rctl |= E1000_RCTL_LPE;
2950
2951        /* disable queue 0 to prevent tail write w/o re-config */
2952        wr32(E1000_RXDCTL(0), 0);
2953
2954        /* Attention!!!  For SR-IOV PF driver operations you must enable
2955         * queue drop for all VF and PF queues to prevent head of line blocking
2956         * if an un-trusted VF does not provide descriptors to hardware.
2957         */
2958        if (adapter->vfs_allocated_count) {
2959                /* set all queue drop enable bits */
2960                wr32(E1000_QDE, ALL_QUEUES);
2961        }
2962
2963        wr32(E1000_RCTL, rctl);
2964}
2965
2966static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2967                                   int vfn)
2968{
2969        struct e1000_hw *hw = &adapter->hw;
2970        u32 vmolr;
2971
2972        /* if it isn't the PF check to see if VFs are enabled and
2973         * increase the size to support vlan tags */
2974        if (vfn < adapter->vfs_allocated_count &&
2975            adapter->vf_data[vfn].vlans_enabled)
2976                size += VLAN_TAG_SIZE;
2977
2978        vmolr = rd32(E1000_VMOLR(vfn));
2979        vmolr &= ~E1000_VMOLR_RLPML_MASK;
2980        vmolr |= size | E1000_VMOLR_LPE;
2981        wr32(E1000_VMOLR(vfn), vmolr);
2982
2983        return 0;
2984}
2985
2986/**
2987 * igb_rlpml_set - set maximum receive packet size
2988 * @adapter: board private structure
2989 *
2990 * Configure maximum receivable packet size.
2991 **/
2992static void igb_rlpml_set(struct igb_adapter *adapter)
2993{
2994        u32 max_frame_size = adapter->max_frame_size;
2995        struct e1000_hw *hw = &adapter->hw;
2996        u16 pf_id = adapter->vfs_allocated_count;
2997
2998        if (pf_id) {
2999                igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3000                /*
3001                 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3002                 * to our max jumbo frame size, in case we need to enable
3003                 * jumbo frames on one of the rings later.
3004                 * This will not pass over-length frames into the default
3005                 * queue because it's gated by the VMOLR.RLPML.
3006                 */
3007                max_frame_size = MAX_JUMBO_FRAME_SIZE;
3008        }
3009
3010        wr32(E1000_RLPML, max_frame_size);
3011}
3012
3013static inline void igb_set_vmolr(struct igb_adapter *adapter,
3014                                 int vfn, bool aupe)
3015{
3016        struct e1000_hw *hw = &adapter->hw;
3017        u32 vmolr;
3018
3019        /*
3020         * This register exists only on 82576 and newer so if we are older then
3021         * we should exit and do nothing
3022         */
3023        if (hw->mac.type < e1000_82576)
3024                return;
3025
3026        vmolr = rd32(E1000_VMOLR(vfn));
3027        vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3028        if (aupe)
3029                vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3030        else
3031                vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3032
3033        /* clear all bits that might not be set */
3034        vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3035
3036        if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3037                vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3038        /*
3039         * for VMDq only allow the VFs and pool 0 to accept broadcast and
3040         * multicast packets
3041         */
3042        if (vfn <= adapter->vfs_allocated_count)
3043                vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3044
3045        wr32(E1000_VMOLR(vfn), vmolr);
3046}
3047
3048/**
3049 * igb_configure_rx_ring - Configure a receive ring after Reset
3050 * @adapter: board private structure
3051 * @ring: receive ring to be configured
3052 *
3053 * Configure the Rx unit of the MAC after a reset.
3054 **/
3055void igb_configure_rx_ring(struct igb_adapter *adapter,
3056                           struct igb_ring *ring)
3057{
3058        struct e1000_hw *hw = &adapter->hw;
3059        u64 rdba = ring->dma;
3060        int reg_idx = ring->reg_idx;
3061        u32 srrctl = 0, rxdctl = 0;
3062
3063        /* disable the queue */
3064        wr32(E1000_RXDCTL(reg_idx), 0);
3065
3066        /* Set DMA base address registers */
3067        wr32(E1000_RDBAL(reg_idx),
3068             rdba & 0x00000000ffffffffULL);
3069        wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3070        wr32(E1000_RDLEN(reg_idx),
3071                       ring->count * sizeof(union e1000_adv_rx_desc));
3072
3073        /* initialize head and tail */
3074        ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3075        wr32(E1000_RDH(reg_idx), 0);
3076        writel(0, ring->tail);
3077
3078        /* set descriptor configuration */
3079        srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3080#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3081        srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3082#else
3083        srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3084#endif
3085        srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3086        if (hw->mac.type >= e1000_82580)
3087                srrctl |= E1000_SRRCTL_TIMESTAMP;
3088        /* Only set Drop Enable if we are supporting multiple queues */
3089        if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3090                srrctl |= E1000_SRRCTL_DROP_EN;
3091
3092        wr32(E1000_SRRCTL(reg_idx), srrctl);
3093
3094        /* set filtering for VMDQ pools */
3095        igb_set_vmolr(adapter, reg_idx & 0x7, true);
3096
3097        rxdctl |= IGB_RX_PTHRESH;
3098        rxdctl |= IGB_RX_HTHRESH << 8;
3099        rxdctl |= IGB_RX_WTHRESH << 16;
3100
3101        /* enable receive descriptor fetching */
3102        rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3103        wr32(E1000_RXDCTL(reg_idx), rxdctl);
3104}
3105
3106/**
3107 * igb_configure_rx - Configure receive Unit after Reset
3108 * @adapter: board private structure
3109 *
3110 * Configure the Rx unit of the MAC after a reset.
3111 **/
3112static void igb_configure_rx(struct igb_adapter *adapter)
3113{
3114        int i;
3115
3116        /* set UTA to appropriate mode */
3117        igb_set_uta(adapter);
3118
3119        /* set the correct pool for the PF default MAC address in entry 0 */
3120        igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3121                         adapter->vfs_allocated_count);
3122
3123        /* Setup the HW Rx Head and Tail Descriptor Pointers and
3124         * the Base and Length of the Rx Descriptor Ring */
3125        for (i = 0; i < adapter->num_rx_queues; i++)
3126                igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3127}
3128
3129/**
3130 * igb_free_tx_resources - Free Tx Resources per Queue
3131 * @tx_ring: Tx descriptor ring for a specific queue
3132 *
3133 * Free all transmit software resources
3134 **/
3135void igb_free_tx_resources(struct igb_ring *tx_ring)
3136{
3137        igb_clean_tx_ring(tx_ring);
3138
3139        vfree(tx_ring->tx_buffer_info);
3140        tx_ring->tx_buffer_info = NULL;
3141
3142        /* if not set, then don't free */
3143        if (!tx_ring->desc)
3144                return;
3145
3146        dma_free_coherent(tx_ring->dev, tx_ring->size,
3147                          tx_ring->desc, tx_ring->dma);
3148
3149        tx_ring->desc = NULL;
3150}
3151
3152/**
3153 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3154 * @adapter: board private structure
3155 *
3156 * Free all transmit software resources
3157 **/
3158static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3159{
3160        int i;
3161
3162        for (i = 0; i < adapter->num_tx_queues; i++)
3163                igb_free_tx_resources(adapter->tx_ring[i]);
3164}
3165
3166void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3167                                    struct igb_tx_buffer *tx_buffer)
3168{
3169        if (tx_buffer->skb) {
3170                dev_kfree_skb_any(tx_buffer->skb);
3171                if (tx_buffer->dma)
3172                        dma_unmap_single(ring->dev,
3173                                         tx_buffer->dma,
3174                                         tx_buffer->length,
3175                                         DMA_TO_DEVICE);
3176        } else if (tx_buffer->dma) {
3177                dma_unmap_page(ring->dev,
3178                               tx_buffer->dma,
3179                               tx_buffer->length,
3180                               DMA_TO_DEVICE);
3181        }
3182        tx_buffer->next_to_watch = NULL;
3183        tx_buffer->skb = NULL;
3184        tx_buffer->dma = 0;
3185        /* buffer_info must be completely set up in the transmit path */
3186}
3187
3188/**
3189 * igb_clean_tx_ring - Free Tx Buffers
3190 * @tx_ring: ring to be cleaned
3191 **/
3192static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3193{
3194        struct igb_tx_buffer *buffer_info;
3195        unsigned long size;
3196        u16 i;
3197
3198        if (!tx_ring->tx_buffer_info)
3199                return;
3200        /* Free all the Tx ring sk_buffs */
3201
3202        for (i = 0; i < tx_ring->count; i++) {
3203                buffer_info = &tx_ring->tx_buffer_info[i];
3204                igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3205        }
3206
3207        size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3208        memset(tx_ring->tx_buffer_info, 0, size);
3209
3210        /* Zero out the descriptor ring */
3211        memset(tx_ring->desc, 0, tx_ring->size);
3212
3213        tx_ring->next_to_use = 0;
3214        tx_ring->next_to_clean = 0;
3215}
3216
3217/**
3218 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3219 * @adapter: board private structure
3220 **/
3221static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3222{
3223        int i;
3224
3225        for (i = 0; i < adapter->num_tx_queues; i++)
3226                igb_clean_tx_ring(adapter->tx_ring[i]);
3227}
3228
3229/**
3230 * igb_free_rx_resources - Free Rx Resources
3231 * @rx_ring: ring to clean the resources from
3232 *
3233 * Free all receive software resources
3234 **/
3235void igb_free_rx_resources(struct igb_ring *rx_ring)
3236{
3237        igb_clean_rx_ring(rx_ring);
3238
3239        vfree(rx_ring->rx_buffer_info);
3240        rx_ring->rx_buffer_info = NULL;
3241
3242        /* if not set, then don't free */
3243        if (!rx_ring->desc)
3244                return;
3245
3246        dma_free_coherent(rx_ring->dev, rx_ring->size,
3247                          rx_ring->desc, rx_ring->dma);
3248
3249        rx_ring->desc = NULL;
3250}
3251
3252/**
3253 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3254 * @adapter: board private structure
3255 *
3256 * Free all receive software resources
3257 **/
3258static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3259{
3260        int i;
3261
3262        for (i = 0; i < adapter->num_rx_queues; i++)
3263                igb_free_rx_resources(adapter->rx_ring[i]);
3264}
3265
3266/**
3267 * igb_clean_rx_ring - Free Rx Buffers per Queue
3268 * @rx_ring: ring to free buffers from
3269 **/
3270static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3271{
3272        unsigned long size;
3273        u16 i;
3274
3275        if (!rx_ring->rx_buffer_info)
3276                return;
3277
3278        /* Free all the Rx ring sk_buffs */
3279        for (i = 0; i < rx_ring->count; i++) {
3280                struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3281                if (buffer_info->dma) {
3282                        dma_unmap_single(rx_ring->dev,
3283                                         buffer_info->dma,
3284                                         IGB_RX_HDR_LEN,
3285                                         DMA_FROM_DEVICE);
3286                        buffer_info->dma = 0;
3287                }
3288
3289                if (buffer_info->skb) {
3290                        dev_kfree_skb(buffer_info->skb);
3291                        buffer_info->skb = NULL;
3292                }
3293                if (buffer_info->page_dma) {
3294                        dma_unmap_page(rx_ring->dev,
3295                                       buffer_info->page_dma,
3296                                       PAGE_SIZE / 2,
3297                                       DMA_FROM_DEVICE);
3298                        buffer_info->page_dma = 0;
3299                }
3300                if (buffer_info->page) {
3301                        put_page(buffer_info->page);
3302                        buffer_info->page = NULL;
3303                        buffer_info->page_offset = 0;
3304                }
3305        }
3306
3307        size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3308        memset(rx_ring->rx_buffer_info, 0, size);
3309
3310        /* Zero out the descriptor ring */
3311        memset(rx_ring->desc, 0, rx_ring->size);
3312
3313        rx_ring->next_to_clean = 0;
3314        rx_ring->next_to_use = 0;
3315}
3316
3317/**
3318 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3319 * @adapter: board private structure
3320 **/
3321static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3322{
3323        int i;
3324
3325        for (i = 0; i < adapter->num_rx_queues; i++)
3326                igb_clean_rx_ring(adapter->rx_ring[i]);
3327}
3328
3329/**
3330 * igb_set_mac - Change the Ethernet Address of the NIC
3331 * @netdev: network interface device structure
3332 * @p: pointer to an address structure
3333 *
3334 * Returns 0 on success, negative on failure
3335 **/
3336static int igb_set_mac(struct net_device *netdev, void *p)
3337{
3338        struct igb_adapter *adapter = netdev_priv(netdev);
3339        struct e1000_hw *hw = &adapter->hw;
3340        struct sockaddr *addr = p;
3341
3342        if (!is_valid_ether_addr(addr->sa_data))
3343                return -EADDRNOTAVAIL;
3344
3345        memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3346        memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3347
3348        /* set the correct pool for the new PF MAC address in entry 0 */
3349        igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3350                         adapter->vfs_allocated_count);
3351
3352        return 0;
3353}
3354
3355/**
3356 * igb_write_mc_addr_list - write multicast addresses to MTA
3357 * @netdev: network interface device structure
3358 *
3359 * Writes multicast address list to the MTA hash table.
3360 * Returns: -ENOMEM on failure
3361 *                0 on no addresses written
3362 *                X on writing X addresses to MTA
3363 **/
3364static int igb_write_mc_addr_list(struct net_device *netdev)
3365{
3366        struct igb_adapter *adapter = netdev_priv(netdev);
3367        struct e1000_hw *hw = &adapter->hw;
3368        struct netdev_hw_addr *ha;
3369        u8  *mta_list;
3370        int i;
3371
3372        if (netdev_mc_empty(netdev)) {
3373                /* nothing to program, so clear mc list */
3374                igb_update_mc_addr_list(hw, NULL, 0);
3375                igb_restore_vf_multicasts(adapter);
3376                return 0;
3377        }
3378
3379        mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3380        if (!mta_list)
3381                return -ENOMEM;
3382
3383        /* The shared function expects a packed array of only addresses. */
3384        i = 0;
3385        netdev_for_each_mc_addr(ha, netdev)
3386                memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3387
3388        igb_update_mc_addr_list(hw, mta_list, i);
3389        kfree(mta_list);
3390
3391        return netdev_mc_count(netdev);
3392}
3393
3394/**
3395 * igb_write_uc_addr_list - write unicast addresses to RAR table
3396 * @netdev: network interface device structure
3397 *
3398 * Writes unicast address list to the RAR table.
3399 * Returns: -ENOMEM on failure/insufficient address space
3400 *                0 on no addresses written
3401 *                X on writing X addresses to the RAR table
3402 **/
3403static int igb_write_uc_addr_list(struct net_device *netdev)
3404{
3405        struct igb_adapter *adapter = netdev_priv(netdev);
3406        struct e1000_hw *hw = &adapter->hw;
3407        unsigned int vfn = adapter->vfs_allocated_count;
3408        unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3409        int count = 0;
3410
3411        /* return ENOMEM indicating insufficient memory for addresses */
3412        if (netdev_uc_count(netdev) > rar_entries)
3413                return -ENOMEM;
3414
3415        if (!netdev_uc_empty(netdev) && rar_entries) {
3416                struct netdev_hw_addr *ha;
3417
3418                netdev_for_each_uc_addr(ha, netdev) {
3419                        if (!rar_entries)
3420                                break;
3421                        igb_rar_set_qsel(adapter, ha->addr,
3422                                         rar_entries--,
3423                                         vfn);
3424                        count++;
3425                }
3426        }
3427        /* write the addresses in reverse order to avoid write combining */
3428        for (; rar_entries > 0 ; rar_entries--) {
3429                wr32(E1000_RAH(rar_entries), 0);
3430                wr32(E1000_RAL(rar_entries), 0);
3431        }
3432        wrfl();
3433
3434        return count;
3435}
3436
3437/**
3438 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3439 * @netdev: network interface device structure
3440 *
3441 * The set_rx_mode entry point is called whenever the unicast or multicast
3442 * address lists or the network interface flags are updated.  This routine is
3443 * responsible for configuring the hardware for proper unicast, multicast,
3444 * promiscuous mode, and all-multi behavior.
3445 **/
3446static void igb_set_rx_mode(struct net_device *netdev)
3447{
3448        struct igb_adapter *adapter = netdev_priv(netdev);
3449        struct e1000_hw *hw = &adapter->hw;
3450        unsigned int vfn = adapter->vfs_allocated_count;
3451        u32 rctl, vmolr = 0;
3452        int count;
3453
3454        /* Check for Promiscuous and All Multicast modes */
3455        rctl = rd32(E1000_RCTL);
3456
3457        /* clear the effected bits */
3458        rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3459
3460        if (netdev->flags & IFF_PROMISC) {
3461                rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3462                vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3463        } else {
3464                if (netdev->flags & IFF_ALLMULTI) {
3465                        rctl |= E1000_RCTL_MPE;
3466                        vmolr |= E1000_VMOLR_MPME;
3467                } else {
3468                        /*
3469                         * Write addresses to the MTA, if the attempt fails
3470                         * then we should just turn on promiscuous mode so
3471                         * that we can at least receive multicast traffic
3472                         */
3473                        count = igb_write_mc_addr_list(netdev);
3474                        if (count < 0) {
3475                                rctl |= E1000_RCTL_MPE;
3476                                vmolr |= E1000_VMOLR_MPME;
3477                        } else if (count) {
3478                                vmolr |= E1000_VMOLR_ROMPE;
3479                        }
3480                }
3481                /*
3482                 * Write addresses to available RAR registers, if there is not
3483                 * sufficient space to store all the addresses then enable
3484                 * unicast promiscuous mode
3485                 */
3486                count = igb_write_uc_addr_list(netdev);
3487                if (count < 0) {
3488                        rctl |= E1000_RCTL_UPE;
3489                        vmolr |= E1000_VMOLR_ROPE;
3490                }
3491                rctl |= E1000_RCTL_VFE;
3492        }
3493        wr32(E1000_RCTL, rctl);
3494
3495        /*
3496         * In order to support SR-IOV and eventually VMDq it is necessary to set
3497         * the VMOLR to enable the appropriate modes.  Without this workaround
3498         * we will have issues with VLAN tag stripping not being done for frames
3499         * that are only arriving because we are the default pool
3500         */
3501        if (hw->mac.type < e1000_82576)
3502                return;
3503
3504        vmolr |= rd32(E1000_VMOLR(vfn)) &
3505                 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3506        wr32(E1000_VMOLR(vfn), vmolr);
3507        igb_restore_vf_multicasts(adapter);
3508}
3509
3510static void igb_check_wvbr(struct igb_adapter *adapter)
3511{
3512        struct e1000_hw *hw = &adapter->hw;
3513        u32 wvbr = 0;
3514
3515        switch (hw->mac.type) {
3516        case e1000_82576:
3517        case e1000_i350:
3518                if (!(wvbr = rd32(E1000_WVBR)))
3519                        return;
3520                break;
3521        default:
3522                break;
3523        }
3524
3525        adapter->wvbr |= wvbr;
3526}
3527
3528#define IGB_STAGGERED_QUEUE_OFFSET 8
3529
3530static void igb_spoof_check(struct igb_adapter *adapter)
3531{
3532        int j;
3533
3534        if (!adapter->wvbr)
3535                return;
3536
3537        for(j = 0; j < adapter->vfs_allocated_count; j++) {
3538                if (adapter->wvbr & (1 << j) ||
3539                    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3540                        dev_warn(&adapter->pdev->dev,
3541                                "Spoof event(s) detected on VF %d\n", j);
3542                        adapter->wvbr &=
3543                                ~((1 << j) |
3544                                  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3545                }
3546        }
3547}
3548
3549/* Need to wait a few seconds after link up to get diagnostic information from
3550 * the phy */
3551static void igb_update_phy_info(unsigned long data)
3552{
3553        struct igb_adapter *adapter = (struct igb_adapter *) data;
3554        igb_get_phy_info(&adapter->hw);
3555}
3556
3557/**
3558 * igb_has_link - check shared code for link and determine up/down
3559 * @adapter: pointer to driver private info
3560 **/
3561bool igb_has_link(struct igb_adapter *adapter)
3562{
3563        struct e1000_hw *hw = &adapter->hw;
3564        bool link_active = false;
3565        s32 ret_val = 0;
3566
3567        /* get_link_status is set on LSC (link status) interrupt or
3568         * rx sequence error interrupt.  get_link_status will stay
3569         * false until the e1000_check_for_link establishes link
3570         * for copper adapters ONLY
3571         */
3572        switch (hw->phy.media_type) {
3573        case e1000_media_type_copper:
3574                if (hw->mac.get_link_status) {
3575                        ret_val = hw->mac.ops.check_for_link(hw);
3576                        link_active = !hw->mac.get_link_status;
3577                } else {
3578                        link_active = true;
3579                }
3580                break;
3581        case e1000_media_type_internal_serdes:
3582                ret_val = hw->mac.ops.check_for_link(hw);
3583                link_active = hw->mac.serdes_has_link;
3584                break;
3585        default:
3586        case e1000_media_type_unknown:
3587                break;
3588        }
3589
3590        return link_active;
3591}
3592
3593static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3594{
3595        bool ret = false;
3596        u32 ctrl_ext, thstat;
3597
3598        /* check for thermal sensor event on i350, copper only */
3599        if (hw->mac.type == e1000_i350) {
3600                thstat = rd32(E1000_THSTAT);
3601                ctrl_ext = rd32(E1000_CTRL_EXT);
3602
3603                if ((hw->phy.media_type == e1000_media_type_copper) &&
3604                    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3605                        ret = !!(thstat & event);
3606                }
3607        }
3608
3609        return ret;
3610}
3611
3612/**
3613 * igb_watchdog - Timer Call-back
3614 * @data: pointer to adapter cast into an unsigned long
3615 **/
3616static void igb_watchdog(unsigned long data)
3617{
3618        struct igb_adapter *adapter = (struct igb_adapter *)data;
3619        /* Do the rest outside of interrupt context */
3620        schedule_work(&adapter->watchdog_task);
3621}
3622
3623static void igb_watchdog_task(struct work_struct *work)
3624{
3625        struct igb_adapter *adapter = container_of(work,
3626                                                   struct igb_adapter,
3627                                                   watchdog_task);
3628        struct e1000_hw *hw = &adapter->hw;
3629        struct net_device *netdev = adapter->netdev;
3630        u32 link;
3631        int i;
3632
3633        link = igb_has_link(adapter);
3634        if (link) {
3635                if (!netif_carrier_ok(netdev)) {
3636                        u32 ctrl;
3637                        hw->mac.ops.get_speed_and_duplex(hw,
3638                                                         &adapter->link_speed,
3639                                                         &adapter->link_duplex);
3640
3641                        ctrl = rd32(E1000_CTRL);
3642                        /* Links status message must follow this format */
3643                        printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3644                                 "Flow Control: %s\n",
3645                               netdev->name,
3646                               adapter->link_speed,
3647                               adapter->link_duplex == FULL_DUPLEX ?
3648                                 "Full Duplex" : "Half Duplex",
3649                               ((ctrl & E1000_CTRL_TFCE) &&
3650                                (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3651                               ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3652                               ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3653
3654                        /* check for thermal sensor event */
3655                        if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3656                                printk(KERN_INFO "igb: %s The network adapter "
3657                                                 "link speed was downshifted "
3658                                                 "because it overheated.\n",
3659                                                 netdev->name);
3660                        }
3661
3662                        /* adjust timeout factor according to speed/duplex */
3663                        adapter->tx_timeout_factor = 1;
3664                        switch (adapter->link_speed) {
3665                        case SPEED_10:
3666                                adapter->tx_timeout_factor = 14;
3667                                break;
3668                        case SPEED_100:
3669                                /* maybe add some timeout factor ? */
3670                                break;
3671                        }
3672
3673                        netif_carrier_on(netdev);
3674
3675                        igb_ping_all_vfs(adapter);
3676                        igb_check_vf_rate_limit(adapter);
3677
3678                        /* link state has changed, schedule phy info update */
3679                        if (!test_bit(__IGB_DOWN, &adapter->state))
3680                                mod_timer(&adapter->phy_info_timer,
3681                                          round_jiffies(jiffies + 2 * HZ));
3682                }
3683        } else {
3684                if (netif_carrier_ok(netdev)) {
3685                        adapter->link_speed = 0;
3686                        adapter->link_duplex = 0;
3687
3688                        /* check for thermal sensor event */
3689                        if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3690                                printk(KERN_ERR "igb: %s The network adapter "
3691                                                "was stopped because it "
3692                                                "overheated.\n",
3693                                                netdev->name);
3694                        }
3695
3696                        /* Links status message must follow this format */
3697                        printk(KERN_INFO "igb: %s NIC Link is Down\n",
3698                               netdev->name);
3699                        netif_carrier_off(netdev);
3700
3701                        igb_ping_all_vfs(adapter);
3702
3703                        /* link state has changed, schedule phy info update */
3704                        if (!test_bit(__IGB_DOWN, &adapter->state))
3705                                mod_timer(&adapter->phy_info_timer,
3706                                          round_jiffies(jiffies + 2 * HZ));
3707                }
3708        }
3709
3710        spin_lock(&adapter->stats64_lock);
3711        igb_update_stats(adapter, &adapter->stats64);
3712        spin_unlock(&adapter->stats64_lock);
3713
3714        for (i = 0; i < adapter->num_tx_queues; i++) {
3715                struct igb_ring *tx_ring = adapter->tx_ring[i];
3716                if (!netif_carrier_ok(netdev)) {
3717                        /* We've lost link, so the controller stops DMA,
3718                         * but we've got queued Tx work that's never going
3719                         * to get done, so reset controller to flush Tx.
3720                         * (Do the reset outside of interrupt context). */
3721                        if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3722                                adapter->tx_timeout_count++;
3723                                schedule_work(&adapter->reset_task);
3724                                /* return immediately since reset is imminent */
3725                                return;
3726                        }
3727                }
3728
3729                /* Force detection of hung controller every watchdog period */
3730                set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3731        }
3732
3733        /* Cause software interrupt to ensure rx ring is cleaned */
3734        if (adapter->msix_entries) {
3735                u32 eics = 0;
3736                for (i = 0; i < adapter->num_q_vectors; i++)
3737                        eics |= adapter->q_vector[i]->eims_value;
3738                wr32(E1000_EICS, eics);
3739        } else {
3740                wr32(E1000_ICS, E1000_ICS_RXDMT0);
3741        }
3742
3743        igb_spoof_check(adapter);
3744
3745        /* Reset the timer */
3746        if (!test_bit(__IGB_DOWN, &adapter->state))
3747                mod_timer(&adapter->watchdog_timer,
3748                          round_jiffies(jiffies + 2 * HZ));
3749}
3750
3751enum latency_range {
3752        lowest_latency = 0,
3753        low_latency = 1,
3754        bulk_latency = 2,
3755        latency_invalid = 255
3756};
3757
3758/**
3759 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3760 *
3761 *      Stores a new ITR value based on strictly on packet size.  This
3762 *      algorithm is less sophisticated than that used in igb_update_itr,
3763 *      due to the difficulty of synchronizing statistics across multiple
3764 *      receive rings.  The divisors and thresholds used by this function
3765 *      were determined based on theoretical maximum wire speed and testing
3766 *      data, in order to minimize response time while increasing bulk
3767 *      throughput.
3768 *      This functionality is controlled by the InterruptThrottleRate module
3769 *      parameter (see igb_param.c)
3770 *      NOTE:  This function is called only when operating in a multiqueue
3771 *             receive environment.
3772 * @q_vector: pointer to q_vector
3773 **/
3774static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3775{
3776        int new_val = q_vector->itr_val;
3777        int avg_wire_size = 0;
3778        struct igb_adapter *adapter = q_vector->adapter;
3779        unsigned int packets;
3780
3781        /* For non-gigabit speeds, just fix the interrupt rate at 4000
3782         * ints/sec - ITR timer value of 120 ticks.
3783         */
3784        if (adapter->link_speed != SPEED_1000) {
3785                new_val = IGB_4K_ITR;
3786                goto set_itr_val;
3787        }
3788
3789        packets = q_vector->rx.total_packets;
3790        if (packets)
3791                avg_wire_size = q_vector->rx.total_bytes / packets;
3792
3793        packets = q_vector->tx.total_packets;
3794        if (packets)
3795                avg_wire_size = max_t(u32, avg_wire_size,
3796                                      q_vector->tx.total_bytes / packets);
3797
3798        /* if avg_wire_size isn't set no work was done */
3799        if (!avg_wire_size)
3800                goto clear_counts;
3801
3802        /* Add 24 bytes to size to account for CRC, preamble, and gap */
3803        avg_wire_size += 24;
3804
3805        /* Don't starve jumbo frames */
3806        avg_wire_size = min(avg_wire_size, 3000);
3807
3808        /* Give a little boost to mid-size frames */
3809        if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3810                new_val = avg_wire_size / 3;
3811        else
3812                new_val = avg_wire_size / 2;
3813
3814        /* conservative mode (itr 3) eliminates the lowest_latency setting */
3815        if (new_val < IGB_20K_ITR &&
3816            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3817             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3818                new_val = IGB_20K_ITR;
3819
3820set_itr_val:
3821        if (new_val != q_vector->itr_val) {
3822                q_vector->itr_val = new_val;
3823                q_vector->set_itr = 1;
3824        }
3825clear_counts:
3826        q_vector->rx.total_bytes = 0;
3827        q_vector->rx.total_packets = 0;
3828        q_vector->tx.total_bytes = 0;
3829        q_vector->tx.total_packets = 0;
3830}
3831
3832/**
3833 * igb_update_itr - update the dynamic ITR value based on statistics
3834 *      Stores a new ITR value based on packets and byte
3835 *      counts during the last interrupt.  The advantage of per interrupt
3836 *      computation is faster updates and more accurate ITR for the current
3837 *      traffic pattern.  Constants in this function were computed
3838 *      based on theoretical maximum wire speed and thresholds were set based
3839 *      on testing data as well as attempting to minimize response time
3840 *      while increasing bulk throughput.
3841 *      this functionality is controlled by the InterruptThrottleRate module
3842 *      parameter (see igb_param.c)
3843 *      NOTE:  These calculations are only valid when operating in a single-
3844 *             queue environment.
3845 * @q_vector: pointer to q_vector
3846 * @ring_container: ring info to update the itr for
3847 **/
3848static void igb_update_itr(struct igb_q_vector *q_vector,
3849                           struct igb_ring_container *ring_container)
3850{
3851        unsigned int packets = ring_container->total_packets;
3852        unsigned int bytes = ring_container->total_bytes;
3853        u8 itrval = ring_container->itr;
3854
3855        /* no packets, exit with status unchanged */
3856        if (packets == 0)
3857                return;
3858
3859        switch (itrval) {
3860        case lowest_latency:
3861                /* handle TSO and jumbo frames */
3862                if (bytes/packets > 8000)
3863                        itrval = bulk_latency;
3864                else if ((packets < 5) && (bytes > 512))
3865                        itrval = low_latency;
3866                break;
3867        case low_latency:  /* 50 usec aka 20000 ints/s */
3868                if (bytes > 10000) {
3869                        /* this if handles the TSO accounting */
3870                        if (bytes/packets > 8000) {
3871                                itrval = bulk_latency;
3872                        } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3873                                itrval = bulk_latency;
3874                        } else if ((packets > 35)) {
3875                                itrval = lowest_latency;
3876                        }
3877                } else if (bytes/packets > 2000) {
3878                        itrval = bulk_latency;
3879                } else if (packets <= 2 && bytes < 512) {
3880                        itrval = lowest_latency;
3881                }
3882                break;
3883        case bulk_latency: /* 250 usec aka 4000 ints/s */
3884                if (bytes > 25000) {
3885                        if (packets > 35)
3886                                itrval = low_latency;
3887                } else if (bytes < 1500) {
3888                        itrval = low_latency;
3889                }
3890                break;
3891        }
3892
3893        /* clear work counters since we have the values we need */
3894        ring_container->total_bytes = 0;
3895        ring_container->total_packets = 0;
3896
3897        /* write updated itr to ring container */
3898        ring_container->itr = itrval;
3899}
3900
3901static void igb_set_itr(struct igb_q_vector *q_vector)
3902{
3903        struct igb_adapter *adapter = q_vector->adapter;
3904        u32 new_itr = q_vector->itr_val;
3905        u8 current_itr = 0;
3906
3907        /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3908        if (adapter->link_speed != SPEED_1000) {
3909                current_itr = 0;
3910                new_itr = IGB_4K_ITR;
3911                goto set_itr_now;
3912        }
3913
3914        igb_update_itr(q_vector, &q_vector->tx);
3915        igb_update_itr(q_vector, &q_vector->rx);
3916
3917        current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3918
3919        /* conservative mode (itr 3) eliminates the lowest_latency setting */
3920        if (current_itr == lowest_latency &&
3921            ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3922             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3923                current_itr = low_latency;
3924
3925        switch (current_itr) {
3926        /* counts and packets in update_itr are dependent on these numbers */
3927        case lowest_latency:
3928                new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3929                break;
3930        case low_latency:
3931                new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3932                break;
3933        case bulk_latency:
3934                new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
3935                break;
3936        default:
3937                break;
3938        }
3939
3940set_itr_now:
3941        if (new_itr != q_vector->itr_val) {
3942                /* this attempts to bias the interrupt rate towards Bulk
3943                 * by adding intermediate steps when interrupt rate is
3944                 * increasing */
3945                new_itr = new_itr > q_vector->itr_val ?
3946                             max((new_itr * q_vector->itr_val) /
3947                                 (new_itr + (q_vector->itr_val >> 2)),
3948                                 new_itr) :
3949                             new_itr;
3950                /* Don't write the value here; it resets the adapter's
3951                 * internal timer, and causes us to delay far longer than
3952                 * we should between interrupts.  Instead, we write the ITR
3953                 * value at the beginning of the next interrupt so the timing
3954                 * ends up being correct.
3955                 */
3956                q_vector->itr_val = new_itr;
3957                q_vector->set_itr = 1;
3958        }
3959}
3960
3961void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3962                     u32 type_tucmd, u32 mss_l4len_idx)
3963{
3964        struct e1000_adv_tx_context_desc *context_desc;
3965        u16 i = tx_ring->next_to_use;
3966
3967        context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3968
3969        i++;
3970        tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3971
3972        /* set bits to identify this as an advanced context descriptor */
3973        type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3974
3975        /* For 82575, context index must be unique per ring. */
3976        if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3977                mss_l4len_idx |= tx_ring->reg_idx << 4;
3978
3979        context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
3980        context_desc->seqnum_seed       = 0;
3981        context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
3982        context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
3983}
3984
3985static int igb_tso(struct igb_ring *tx_ring,
3986                   struct igb_tx_buffer *first,
3987                   u8 *hdr_len)
3988{
3989        struct sk_buff *skb = first->skb;
3990        u32 vlan_macip_lens, type_tucmd;
3991        u32 mss_l4len_idx, l4len;
3992
3993        if (!skb_is_gso(skb))
3994                return 0;
3995
3996        if (skb_header_cloned(skb)) {
3997                int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3998                if (err)
3999                        return err;
4000        }
4001
4002        /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4003        type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4004
4005        if (first->protocol == __constant_htons(ETH_P_IP)) {
4006                struct iphdr *iph = ip_hdr(skb);
4007                iph->tot_len = 0;
4008                iph->check = 0;
4009                tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4010                                                         iph->daddr, 0,
4011                                                         IPPROTO_TCP,
4012                                                         0);
4013                type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4014                first->tx_flags |= IGB_TX_FLAGS_TSO |
4015                                   IGB_TX_FLAGS_CSUM |
4016                                   IGB_TX_FLAGS_IPV4;
4017        } else if (skb_is_gso_v6(skb)) {
4018                ipv6_hdr(skb)->payload_len = 0;
4019                tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4020                                                       &ipv6_hdr(skb)->daddr,
4021                                                       0, IPPROTO_TCP, 0);
4022                first->tx_flags |= IGB_TX_FLAGS_TSO |
4023                                   IGB_TX_FLAGS_CSUM;
4024        }
4025
4026        /* compute header lengths */
4027        l4len = tcp_hdrlen(skb);
4028        *hdr_len = skb_transport_offset(skb) + l4len;
4029
4030        /* update gso size and bytecount with header size */
4031        first->gso_segs = skb_shinfo(skb)->gso_segs;
4032        first->bytecount += (first->gso_segs - 1) * *hdr_len;
4033
4034        /* MSS L4LEN IDX */
4035        mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4036        mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4037
4038        /* VLAN MACLEN IPLEN */
4039        vlan_macip_lens = skb_network_header_len(skb);
4040        vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4041        vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4042
4043        igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4044
4045        return 1;
4046}
4047
4048static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4049{
4050        struct sk_buff *skb = first->skb;
4051        u32 vlan_macip_lens = 0;
4052        u32 mss_l4len_idx = 0;
4053        u32 type_tucmd = 0;
4054
4055        if (skb->ip_summed != CHECKSUM_PARTIAL) {
4056                if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4057                        return;
4058        } else {
4059                u8 l4_hdr = 0;
4060                switch (first->protocol) {
4061                case __constant_htons(ETH_P_IP):
4062                        vlan_macip_lens |= skb_network_header_len(skb);
4063                        type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4064                        l4_hdr = ip_hdr(skb)->protocol;
4065                        break;
4066                case __constant_htons(ETH_P_IPV6):
4067                        vlan_macip_lens |= skb_network_header_len(skb);
4068                        l4_hdr = ipv6_hdr(skb)->nexthdr;
4069                        break;
4070                default:
4071                        if (unlikely(net_ratelimit())) {
4072                                dev_warn(tx_ring->dev,
4073                                 "partial checksum but proto=%x!\n",
4074                                 first->protocol);
4075                        }
4076                        break;
4077                }
4078
4079                switch (l4_hdr) {
4080                case IPPROTO_TCP:
4081                        type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4082                        mss_l4len_idx = tcp_hdrlen(skb) <<
4083                                        E1000_ADVTXD_L4LEN_SHIFT;
4084                        break;
4085                case IPPROTO_SCTP:
4086                        type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4087                        mss_l4len_idx = sizeof(struct sctphdr) <<
4088                                        E1000_ADVTXD_L4LEN_SHIFT;
4089                        break;
4090                case IPPROTO_UDP:
4091                        mss_l4len_idx = sizeof(struct udphdr) <<
4092                                        E1000_ADVTXD_L4LEN_SHIFT;
4093                        break;
4094                default:
4095                        if (unlikely(net_ratelimit())) {
4096                                dev_warn(tx_ring->dev,
4097                                 "partial checksum but l4 proto=%x!\n",
4098                                 l4_hdr);
4099                        }
4100                        break;
4101                }
4102
4103                /* update TX checksum flag */
4104                first->tx_flags |= IGB_TX_FLAGS_CSUM;
4105        }
4106
4107        vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4108        vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4109
4110        igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4111}
4112
4113static __le32 igb_tx_cmd_type(u32 tx_flags)
4114{
4115        /* set type for advanced descriptor with frame checksum insertion */
4116        __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4117                                      E1000_ADVTXD_DCMD_IFCS |
4118                                      E1000_ADVTXD_DCMD_DEXT);
4119
4120        /* set HW vlan bit if vlan is present */
4121        if (tx_flags & IGB_TX_FLAGS_VLAN)
4122                cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4123
4124        /* set timestamp bit if present */
4125        if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4126                cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4127
4128        /* set segmentation bits for TSO */
4129        if (tx_flags & IGB_TX_FLAGS_TSO)
4130                cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4131
4132        return cmd_type;
4133}
4134
4135static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4136                                 union e1000_adv_tx_desc *tx_desc,
4137                                 u32 tx_flags, unsigned int paylen)
4138{
4139        u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4140
4141        /* 82575 requires a unique index per ring if any offload is enabled */
4142        if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4143            test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4144                olinfo_status |= tx_ring->reg_idx << 4;
4145
4146        /* insert L4 checksum */
4147        if (tx_flags & IGB_TX_FLAGS_CSUM) {
4148                olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4149
4150                /* insert IPv4 checksum */
4151                if (tx_flags & IGB_TX_FLAGS_IPV4)
4152                        olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4153        }
4154
4155        tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4156}
4157
4158/*
4159 * The largest size we can write to the descriptor is 65535.  In order to
4160 * maintain a power of two alignment we have to limit ourselves to 32K.
4161 */
4162#define IGB_MAX_TXD_PWR 15
4163#define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4164
4165static void igb_tx_map(struct igb_ring *tx_ring,
4166                       struct igb_tx_buffer *first,
4167                       const u8 hdr_len)
4168{
4169        struct sk_buff *skb = first->skb;
4170        struct igb_tx_buffer *tx_buffer_info;
4171        union e1000_adv_tx_desc *tx_desc;
4172        dma_addr_t dma;
4173        struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4174        unsigned int data_len = skb->data_len;
4175        unsigned int size = skb_headlen(skb);
4176        unsigned int paylen = skb->len - hdr_len;
4177        __le32 cmd_type;
4178        u32 tx_flags = first->tx_flags;
4179        u16 i = tx_ring->next_to_use;
4180
4181        tx_desc = IGB_TX_DESC(tx_ring, i);
4182
4183        igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4184        cmd_type = igb_tx_cmd_type(tx_flags);
4185
4186        dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4187        if (dma_mapping_error(tx_ring->dev, dma))
4188                goto dma_error;
4189
4190        /* record length, and DMA address */
4191        first->length = size;
4192        first->dma = dma;
4193        tx_desc->read.buffer_addr = cpu_to_le64(dma);
4194
4195        for (;;) {
4196                while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4197                        tx_desc->read.cmd_type_len =
4198                                cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4199
4200                        i++;
4201                        tx_desc++;
4202                        if (i == tx_ring->count) {
4203                                tx_desc = IGB_TX_DESC(tx_ring, 0);
4204                                i = 0;
4205                        }
4206
4207                        dma += IGB_MAX_DATA_PER_TXD;
4208                        size -= IGB_MAX_DATA_PER_TXD;
4209
4210                        tx_desc->read.olinfo_status = 0;
4211                        tx_desc->read.buffer_addr = cpu_to_le64(dma);
4212                }
4213
4214                if (likely(!data_len))
4215                        break;
4216
4217                tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4218
4219                i++;
4220                tx_desc++;
4221                if (i == tx_ring->count) {
4222                        tx_desc = IGB_TX_DESC(tx_ring, 0);
4223                        i = 0;
4224                }
4225
4226                size = skb_frag_size(frag);
4227                data_len -= size;
4228
4229                dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4230                                   size, DMA_TO_DEVICE);
4231                if (dma_mapping_error(tx_ring->dev, dma))
4232                        goto dma_error;
4233
4234                tx_buffer_info = &tx_ring->tx_buffer_info[i];
4235                tx_buffer_info->length = size;
4236                tx_buffer_info->dma = dma;
4237
4238                tx_desc->read.olinfo_status = 0;
4239                tx_desc->read.buffer_addr = cpu_to_le64(dma);
4240
4241                frag++;
4242        }
4243
4244        /* write last descriptor with RS and EOP bits */
4245        cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4246        tx_desc->read.cmd_type_len = cmd_type;
4247
4248        /* set the timestamp */
4249        first->time_stamp = jiffies;
4250
4251        /*
4252         * Force memory writes to complete before letting h/w know there
4253         * are new descriptors to fetch.  (Only applicable for weak-ordered
4254         * memory model archs, such as IA-64).
4255         *
4256         * We also need this memory barrier to make certain all of the
4257         * status bits have been updated before next_to_watch is written.
4258         */
4259        wmb();
4260
4261        /* set next_to_watch value indicating a packet is present */
4262        first->next_to_watch = tx_desc;
4263
4264        i++;
4265        if (i == tx_ring->count)
4266                i = 0;
4267
4268        tx_ring->next_to_use = i;
4269
4270        writel(i, tx_ring->tail);
4271
4272        /* we need this if more than one processor can write to our tail
4273         * at a time, it syncronizes IO on IA64/Altix systems */
4274        mmiowb();
4275
4276        return;
4277
4278dma_error:
4279        dev_err(tx_ring->dev, "TX DMA map failed\n");
4280
4281        /* clear dma mappings for failed tx_buffer_info map */
4282        for (;;) {
4283                tx_buffer_info = &tx_ring->tx_buffer_info[i];
4284                igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4285                if (tx_buffer_info == first)
4286                        break;
4287                if (i == 0)
4288                        i = tx_ring->count;
4289                i--;
4290        }
4291
4292        tx_ring->next_to_use = i;
4293}
4294
4295static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4296{
4297        struct net_device *netdev = tx_ring->netdev;
4298
4299        netif_stop_subqueue(netdev, tx_ring->queue_index);
4300
4301        /* Herbert's original patch had:
4302         *  smp_mb__after_netif_stop_queue();
4303         * but since that doesn't exist yet, just open code it. */
4304        smp_mb();
4305
4306        /* We need to check again in a case another CPU has just
4307         * made room available. */
4308        if (igb_desc_unused(tx_ring) < size)
4309                return -EBUSY;
4310
4311        /* A reprieve! */
4312        netif_wake_subqueue(netdev, tx_ring->queue_index);
4313
4314        u64_stats_update_begin(&tx_ring->tx_syncp2);
4315        tx_ring->tx_stats.restart_queue2++;
4316        u64_stats_update_end(&tx_ring->tx_syncp2);
4317
4318        return 0;
4319}
4320
4321static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4322{
4323        if (igb_desc_unused(tx_ring) >= size)
4324                return 0;
4325        return __igb_maybe_stop_tx(tx_ring, size);
4326}
4327
4328netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4329                                struct igb_ring *tx_ring)
4330{
4331        struct igb_tx_buffer *first;
4332        int tso;
4333        u32 tx_flags = 0;
4334        __be16 protocol = vlan_get_protocol(skb);
4335        u8 hdr_len = 0;
4336
4337        /* need: 1 descriptor per page,
4338         *       + 2 desc gap to keep tail from touching head,
4339         *       + 1 desc for skb->data,
4340         *       + 1 desc for context descriptor,
4341         * otherwise try next time */
4342        if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4343                /* this is a hard error */
4344                return NETDEV_TX_BUSY;
4345        }
4346
4347        /* record the location of the first descriptor for this packet */
4348        first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4349        first->skb = skb;
4350        first->bytecount = skb->len;
4351        first->gso_segs = 1;
4352
4353        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4354                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4355                tx_flags |= IGB_TX_FLAGS_TSTAMP;
4356        }
4357
4358        if (vlan_tx_tag_present(skb)) {
4359                tx_flags |= IGB_TX_FLAGS_VLAN;
4360                tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4361        }
4362
4363        /* record initial flags and protocol */
4364        first->tx_flags = tx_flags;
4365        first->protocol = protocol;
4366
4367        tso = igb_tso(tx_ring, first, &hdr_len);
4368        if (tso < 0)
4369                goto out_drop;
4370        else if (!tso)
4371                igb_tx_csum(tx_ring, first);
4372
4373        igb_tx_map(tx_ring, first, hdr_len);
4374
4375        /* Make sure there is space in the ring for the next send. */
4376        igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4377
4378        return NETDEV_TX_OK;
4379
4380out_drop:
4381        igb_unmap_and_free_tx_resource(tx_ring, first);
4382
4383        return NETDEV_TX_OK;
4384}
4385
4386static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4387                                                    struct sk_buff *skb)
4388{
4389        unsigned int r_idx = skb->queue_mapping;
4390
4391        if (r_idx >= adapter->num_tx_queues)
4392                r_idx = r_idx % adapter->num_tx_queues;
4393
4394        return adapter->tx_ring[r_idx];
4395}
4396
4397static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4398                                  struct net_device *netdev)
4399{
4400        struct igb_adapter *adapter = netdev_priv(netdev);
4401
4402        if (test_bit(__IGB_DOWN, &adapter->state)) {
4403                dev_kfree_skb_any(skb);
4404                return NETDEV_TX_OK;
4405        }
4406
4407        if (skb->len <= 0) {
4408                dev_kfree_skb_any(skb);
4409                return NETDEV_TX_OK;
4410        }
4411
4412        /*
4413         * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4414         * in order to meet this minimum size requirement.
4415         */
4416        if (skb->len < 17) {
4417                if (skb_padto(skb, 17))
4418                        return NETDEV_TX_OK;
4419                skb->len = 17;
4420        }
4421
4422        return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4423}
4424
4425/**
4426 * igb_tx_timeout - Respond to a Tx Hang
4427 * @netdev: network interface device structure
4428 **/
4429static void igb_tx_timeout(struct net_device *netdev)
4430{
4431        struct igb_adapter *adapter = netdev_priv(netdev);
4432        struct e1000_hw *hw = &adapter->hw;
4433
4434        /* Do the reset outside of interrupt context */
4435        adapter->tx_timeout_count++;
4436
4437        if (hw->mac.type >= e1000_82580)
4438                hw->dev_spec._82575.global_device_reset = true;
4439
4440        schedule_work(&adapter->reset_task);
4441        wr32(E1000_EICS,
4442             (adapter->eims_enable_mask & ~adapter->eims_other));
4443}
4444
4445static void igb_reset_task(struct work_struct *work)
4446{
4447        struct igb_adapter *adapter;
4448        adapter = container_of(work, struct igb_adapter, reset_task);
4449
4450        igb_dump(adapter);
4451        netdev_err(adapter->netdev, "Reset adapter\n");
4452        igb_reinit_locked(adapter);
4453}
4454
4455/**
4456 * igb_get_stats64 - Get System Network Statistics
4457 * @netdev: network interface device structure
4458 * @stats: rtnl_link_stats64 pointer
4459 *
4460 **/
4461static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4462                                                 struct rtnl_link_stats64 *stats)
4463{
4464        struct igb_adapter *adapter = netdev_priv(netdev);
4465
4466        spin_lock(&adapter->stats64_lock);
4467        igb_update_stats(adapter, &adapter->stats64);
4468        memcpy(stats, &adapter->stats64, sizeof(*stats));
4469        spin_unlock(&adapter->stats64_lock);
4470
4471        return stats;
4472}
4473
4474/**
4475 * igb_change_mtu - Change the Maximum Transfer Unit
4476 * @netdev: network interface device structure
4477 * @new_mtu: new value for maximum frame size
4478 *
4479 * Returns 0 on success, negative on failure
4480 **/
4481static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4482{
4483        struct igb_adapter *adapter = netdev_priv(netdev);
4484        struct pci_dev *pdev = adapter->pdev;
4485        int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4486
4487        if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4488                dev_err(&pdev->dev, "Invalid MTU setting\n");
4489                return -EINVAL;
4490        }
4491
4492#define MAX_STD_JUMBO_FRAME_SIZE 9238
4493        if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4494                dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4495                return -EINVAL;
4496        }
4497
4498        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4499                msleep(1);
4500
4501        /* igb_down has a dependency on max_frame_size */
4502        adapter->max_frame_size = max_frame;
4503
4504        if (netif_running(netdev))
4505                igb_down(adapter);
4506
4507        dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4508                 netdev->mtu, new_mtu);
4509        netdev->mtu = new_mtu;
4510
4511        if (netif_running(netdev))
4512                igb_up(adapter);
4513        else
4514                igb_reset(adapter);
4515
4516        clear_bit(__IGB_RESETTING, &adapter->state);
4517
4518        return 0;
4519}
4520
4521/**
4522 * igb_update_stats - Update the board statistics counters
4523 * @adapter: board private structure
4524 **/
4525
4526void igb_update_stats(struct igb_adapter *adapter,
4527                      struct rtnl_link_stats64 *net_stats)
4528{
4529        struct e1000_hw *hw = &adapter->hw;
4530        struct pci_dev *pdev = adapter->pdev;
4531        u32 reg, mpc;
4532        u16 phy_tmp;
4533        int i;
4534        u64 bytes, packets;
4535        unsigned int start;
4536        u64 _bytes, _packets;
4537
4538#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4539
4540        /*
4541         * Prevent stats update while adapter is being reset, or if the pci
4542         * connection is down.
4543         */
4544        if (adapter->link_speed == 0)
4545                return;
4546        if (pci_channel_offline(pdev))
4547                return;
4548
4549        bytes = 0;
4550        packets = 0;
4551        for (i = 0; i < adapter->num_rx_queues; i++) {
4552                u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4553                struct igb_ring *ring = adapter->rx_ring[i];
4554
4555                ring->rx_stats.drops += rqdpc_tmp;
4556                net_stats->rx_fifo_errors += rqdpc_tmp;
4557
4558                do {
4559                        start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4560                        _bytes = ring->rx_stats.bytes;
4561                        _packets = ring->rx_stats.packets;
4562                } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4563                bytes += _bytes;
4564                packets += _packets;
4565        }
4566
4567        net_stats->rx_bytes = bytes;
4568        net_stats->rx_packets = packets;
4569
4570        bytes = 0;
4571        packets = 0;
4572        for (i = 0; i < adapter->num_tx_queues; i++) {
4573                struct igb_ring *ring = adapter->tx_ring[i];
4574                do {
4575                        start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4576                        _bytes = ring->tx_stats.bytes;
4577                        _packets = ring->tx_stats.packets;
4578                } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4579                bytes += _bytes;
4580                packets += _packets;
4581        }
4582        net_stats->tx_bytes = bytes;
4583        net_stats->tx_packets = packets;
4584
4585        /* read stats registers */
4586        adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4587        adapter->stats.gprc += rd32(E1000_GPRC);
4588        adapter->stats.gorc += rd32(E1000_GORCL);
4589        rd32(E1000_GORCH); /* clear GORCL */
4590        adapter->stats.bprc += rd32(E1000_BPRC);
4591        adapter->stats.mprc += rd32(E1000_MPRC);
4592        adapter->stats.roc += rd32(E1000_ROC);
4593
4594        adapter->stats.prc64 += rd32(E1000_PRC64);
4595        adapter->stats.prc127 += rd32(E1000_PRC127);
4596        adapter->stats.prc255 += rd32(E1000_PRC255);
4597        adapter->stats.prc511 += rd32(E1000_PRC511);
4598        adapter->stats.prc1023 += rd32(E1000_PRC1023);
4599        adapter->stats.prc1522 += rd32(E1000_PRC1522);
4600        adapter->stats.symerrs += rd32(E1000_SYMERRS);
4601        adapter->stats.sec += rd32(E1000_SEC);
4602
4603        mpc = rd32(E1000_MPC);
4604        adapter->stats.mpc += mpc;
4605        net_stats->rx_fifo_errors += mpc;
4606        adapter->stats.scc += rd32(E1000_SCC);
4607        adapter->stats.ecol += rd32(E1000_ECOL);
4608        adapter->stats.mcc += rd32(E1000_MCC);
4609        adapter->stats.latecol += rd32(E1000_LATECOL);
4610        adapter->stats.dc += rd32(E1000_DC);
4611        adapter->stats.rlec += rd32(E1000_RLEC);
4612        adapter->stats.xonrxc += rd32(E1000_XONRXC);
4613        adapter->stats.xontxc += rd32(E1000_XONTXC);
4614        adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4615        adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4616        adapter->stats.fcruc += rd32(E1000_FCRUC);
4617        adapter->stats.gptc += rd32(E1000_GPTC);
4618        adapter->stats.gotc += rd32(E1000_GOTCL);
4619        rd32(E1000_GOTCH); /* clear GOTCL */
4620        adapter->stats.rnbc += rd32(E1000_RNBC);
4621        adapter->stats.ruc += rd32(E1000_RUC);
4622        adapter->stats.rfc += rd32(E1000_RFC);
4623        adapter->stats.rjc += rd32(E1000_RJC);
4624        adapter->stats.tor += rd32(E1000_TORH);
4625        adapter->stats.tot += rd32(E1000_TOTH);
4626        adapter->stats.tpr += rd32(E1000_TPR);
4627
4628        adapter->stats.ptc64 += rd32(E1000_PTC64);
4629        adapter->stats.ptc127 += rd32(E1000_PTC127);
4630        adapter->stats.ptc255 += rd32(E1000_PTC255);
4631        adapter->stats.ptc511 += rd32(E1000_PTC511);
4632        adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4633        adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4634
4635        adapter->stats.mptc += rd32(E1000_MPTC);
4636        adapter->stats.bptc += rd32(E1000_BPTC);
4637
4638        adapter->stats.tpt += rd32(E1000_TPT);
4639        adapter->stats.colc += rd32(E1000_COLC);
4640
4641        adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4642        /* read internal phy specific stats */
4643        reg = rd32(E1000_CTRL_EXT);
4644        if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4645                adapter->stats.rxerrc += rd32(E1000_RXERRC);
4646                adapter->stats.tncrs += rd32(E1000_TNCRS);
4647        }
4648
4649        adapter->stats.tsctc += rd32(E1000_TSCTC);
4650        adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4651
4652        adapter->stats.iac += rd32(E1000_IAC);
4653        adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4654        adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4655        adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4656        adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4657        adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4658        adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4659        adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4660        adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4661
4662        /* Fill out the OS statistics structure */
4663        net_stats->multicast = adapter->stats.mprc;
4664        net_stats->collisions = adapter->stats.colc;
4665
4666        /* Rx Errors */
4667
4668        /* RLEC on some newer hardware can be incorrect so build
4669         * our own version based on RUC and ROC */
4670        net_stats->rx_errors = adapter->stats.rxerrc +
4671                adapter->stats.crcerrs + adapter->stats.algnerrc +
4672                adapter->stats.ruc + adapter->stats.roc +
4673                adapter->stats.cexterr;
4674        net_stats->rx_length_errors = adapter->stats.ruc +
4675                                      adapter->stats.roc;
4676        net_stats->rx_crc_errors = adapter->stats.crcerrs;
4677        net_stats->rx_frame_errors = adapter->stats.algnerrc;
4678        net_stats->rx_missed_errors = adapter->stats.mpc;
4679
4680        /* Tx Errors */
4681        net_stats->tx_errors = adapter->stats.ecol +
4682                               adapter->stats.latecol;
4683        net_stats->tx_aborted_errors = adapter->stats.ecol;
4684        net_stats->tx_window_errors = adapter->stats.latecol;
4685        net_stats->tx_carrier_errors = adapter->stats.tncrs;
4686
4687        /* Tx Dropped needs to be maintained elsewhere */
4688
4689        /* Phy Stats */
4690        if (hw->phy.media_type == e1000_media_type_copper) {
4691                if ((adapter->link_speed == SPEED_1000) &&
4692                   (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4693                        phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4694                        adapter->phy_stats.idle_errors += phy_tmp;
4695                }
4696        }
4697
4698        /* Management Stats */
4699        adapter->stats.mgptc += rd32(E1000_MGTPTC);
4700        adapter->stats.mgprc += rd32(E1000_MGTPRC);
4701        adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4702
4703        /* OS2BMC Stats */
4704        reg = rd32(E1000_MANC);
4705        if (reg & E1000_MANC_EN_BMC2OS) {
4706                adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4707                adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4708                adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4709                adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4710        }
4711}
4712
4713static irqreturn_t igb_msix_other(int irq, void *data)
4714{
4715        struct igb_adapter *adapter = data;
4716        struct e1000_hw *hw = &adapter->hw;
4717        u32 icr = rd32(E1000_ICR);
4718        /* reading ICR causes bit 31 of EICR to be cleared */
4719
4720        if (icr & E1000_ICR_DRSTA)
4721                schedule_work(&adapter->reset_task);
4722
4723        if (icr & E1000_ICR_DOUTSYNC) {
4724                /* HW is reporting DMA is out of sync */
4725                adapter->stats.doosync++;
4726                /* The DMA Out of Sync is also indication of a spoof event
4727                 * in IOV mode. Check the Wrong VM Behavior register to
4728                 * see if it is really a spoof event. */
4729                igb_check_wvbr(adapter);
4730        }
4731
4732        /* Check for a mailbox event */
4733        if (icr & E1000_ICR_VMMB)
4734                igb_msg_task(adapter);
4735
4736        if (icr & E1000_ICR_LSC) {
4737                hw->mac.get_link_status = 1;
4738                /* guard against interrupt when we're going down */
4739                if (!test_bit(__IGB_DOWN, &adapter->state))
4740                        mod_timer(&adapter->watchdog_timer, jiffies + 1);
4741        }
4742
4743        wr32(E1000_EIMS, adapter->eims_other);
4744
4745        return IRQ_HANDLED;
4746}
4747
4748static void igb_write_itr(struct igb_q_vector *q_vector)
4749{
4750        struct igb_adapter *adapter = q_vector->adapter;
4751        u32</