linux/drivers/net/virtio_net.c
<<
>>
Prefs
   1/* A network driver using virtio.
   2 *
   3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License as published by
   7 * the Free Software Foundation; either version 2 of the License, or
   8 * (at your option) any later version.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, write to the Free Software
  17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18 */
  19//#define DEBUG
  20#include <linux/netdevice.h>
  21#include <linux/etherdevice.h>
  22#include <linux/ethtool.h>
  23#include <linux/module.h>
  24#include <linux/virtio.h>
  25#include <linux/virtio_net.h>
  26#include <linux/scatterlist.h>
  27#include <linux/if_vlan.h>
  28#include <linux/slab.h>
  29#include <linux/cpu.h>
  30
  31static int napi_weight = 128;
  32module_param(napi_weight, int, 0444);
  33
  34static bool csum = true, gso = true;
  35module_param(csum, bool, 0444);
  36module_param(gso, bool, 0444);
  37
  38/* FIXME: MTU in config. */
  39#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
  40#define GOOD_COPY_LEN   128
  41
  42#define VIRTNET_SEND_COMMAND_SG_MAX    2
  43#define VIRTNET_DRIVER_VERSION "1.0.0"
  44
  45struct virtnet_stats {
  46        struct u64_stats_sync tx_syncp;
  47        struct u64_stats_sync rx_syncp;
  48        u64 tx_bytes;
  49        u64 tx_packets;
  50
  51        u64 rx_bytes;
  52        u64 rx_packets;
  53};
  54
  55/* Internal representation of a send virtqueue */
  56struct send_queue {
  57        /* Virtqueue associated with this send _queue */
  58        struct virtqueue *vq;
  59
  60        /* TX: fragments + linear part + virtio header */
  61        struct scatterlist sg[MAX_SKB_FRAGS + 2];
  62
  63        /* Name of the send queue: output.$index */
  64        char name[40];
  65};
  66
  67/* Internal representation of a receive virtqueue */
  68struct receive_queue {
  69        /* Virtqueue associated with this receive_queue */
  70        struct virtqueue *vq;
  71
  72        struct napi_struct napi;
  73
  74        /* Number of input buffers, and max we've ever had. */
  75        unsigned int num, max;
  76
  77        /* Chain pages by the private ptr. */
  78        struct page *pages;
  79
  80        /* RX: fragments + linear part + virtio header */
  81        struct scatterlist sg[MAX_SKB_FRAGS + 2];
  82
  83        /* Name of this receive queue: input.$index */
  84        char name[40];
  85};
  86
  87struct virtnet_info {
  88        struct virtio_device *vdev;
  89        struct virtqueue *cvq;
  90        struct net_device *dev;
  91        struct send_queue *sq;
  92        struct receive_queue *rq;
  93        unsigned int status;
  94
  95        /* Max # of queue pairs supported by the device */
  96        u16 max_queue_pairs;
  97
  98        /* # of queue pairs currently used by the driver */
  99        u16 curr_queue_pairs;
 100
 101        /* I like... big packets and I cannot lie! */
 102        bool big_packets;
 103
 104        /* Host will merge rx buffers for big packets (shake it! shake it!) */
 105        bool mergeable_rx_bufs;
 106
 107        /* Has control virtqueue */
 108        bool has_cvq;
 109
 110        /* enable config space updates */
 111        bool config_enable;
 112
 113        /* Active statistics */
 114        struct virtnet_stats __percpu *stats;
 115
 116        /* Work struct for refilling if we run low on memory. */
 117        struct delayed_work refill;
 118
 119        /* Work struct for config space updates */
 120        struct work_struct config_work;
 121
 122        /* Lock for config space updates */
 123        struct mutex config_lock;
 124
 125        /* Does the affinity hint is set for virtqueues? */
 126        bool affinity_hint_set;
 127
 128        /* Per-cpu variable to show the mapping from CPU to virtqueue */
 129        int __percpu *vq_index;
 130
 131        /* CPU hot plug notifier */
 132        struct notifier_block nb;
 133};
 134
 135struct skb_vnet_hdr {
 136        union {
 137                struct virtio_net_hdr hdr;
 138                struct virtio_net_hdr_mrg_rxbuf mhdr;
 139        };
 140};
 141
 142struct padded_vnet_hdr {
 143        struct virtio_net_hdr hdr;
 144        /*
 145         * virtio_net_hdr should be in a separated sg buffer because of a
 146         * QEMU bug, and data sg buffer shares same page with this header sg.
 147         * This padding makes next sg 16 byte aligned after virtio_net_hdr.
 148         */
 149        char padding[6];
 150};
 151
 152/* Converting between virtqueue no. and kernel tx/rx queue no.
 153 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
 154 */
 155static int vq2txq(struct virtqueue *vq)
 156{
 157        return (virtqueue_get_queue_index(vq) - 1) / 2;
 158}
 159
 160static int txq2vq(int txq)
 161{
 162        return txq * 2 + 1;
 163}
 164
 165static int vq2rxq(struct virtqueue *vq)
 166{
 167        return virtqueue_get_queue_index(vq) / 2;
 168}
 169
 170static int rxq2vq(int rxq)
 171{
 172        return rxq * 2;
 173}
 174
 175static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
 176{
 177        return (struct skb_vnet_hdr *)skb->cb;
 178}
 179
 180/*
 181 * private is used to chain pages for big packets, put the whole
 182 * most recent used list in the beginning for reuse
 183 */
 184static void give_pages(struct receive_queue *rq, struct page *page)
 185{
 186        struct page *end;
 187
 188        /* Find end of list, sew whole thing into vi->rq.pages. */
 189        for (end = page; end->private; end = (struct page *)end->private);
 190        end->private = (unsigned long)rq->pages;
 191        rq->pages = page;
 192}
 193
 194static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 195{
 196        struct page *p = rq->pages;
 197
 198        if (p) {
 199                rq->pages = (struct page *)p->private;
 200                /* clear private here, it is used to chain pages */
 201                p->private = 0;
 202        } else
 203                p = alloc_page(gfp_mask);
 204        return p;
 205}
 206
 207static void skb_xmit_done(struct virtqueue *vq)
 208{
 209        struct virtnet_info *vi = vq->vdev->priv;
 210
 211        /* Suppress further interrupts. */
 212        virtqueue_disable_cb(vq);
 213
 214        /* We were probably waiting for more output buffers. */
 215        netif_wake_subqueue(vi->dev, vq2txq(vq));
 216}
 217
 218static void set_skb_frag(struct sk_buff *skb, struct page *page,
 219                         unsigned int offset, unsigned int *len)
 220{
 221        int size = min((unsigned)PAGE_SIZE - offset, *len);
 222        int i = skb_shinfo(skb)->nr_frags;
 223
 224        __skb_fill_page_desc(skb, i, page, offset, size);
 225
 226        skb->data_len += size;
 227        skb->len += size;
 228        skb->truesize += PAGE_SIZE;
 229        skb_shinfo(skb)->nr_frags++;
 230        skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 231        *len -= size;
 232}
 233
 234/* Called from bottom half context */
 235static struct sk_buff *page_to_skb(struct receive_queue *rq,
 236                                   struct page *page, unsigned int len)
 237{
 238        struct virtnet_info *vi = rq->vq->vdev->priv;
 239        struct sk_buff *skb;
 240        struct skb_vnet_hdr *hdr;
 241        unsigned int copy, hdr_len, offset;
 242        char *p;
 243
 244        p = page_address(page);
 245
 246        /* copy small packet so we can reuse these pages for small data */
 247        skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
 248        if (unlikely(!skb))
 249                return NULL;
 250
 251        hdr = skb_vnet_hdr(skb);
 252
 253        if (vi->mergeable_rx_bufs) {
 254                hdr_len = sizeof hdr->mhdr;
 255                offset = hdr_len;
 256        } else {
 257                hdr_len = sizeof hdr->hdr;
 258                offset = sizeof(struct padded_vnet_hdr);
 259        }
 260
 261        memcpy(hdr, p, hdr_len);
 262
 263        len -= hdr_len;
 264        p += offset;
 265
 266        copy = len;
 267        if (copy > skb_tailroom(skb))
 268                copy = skb_tailroom(skb);
 269        memcpy(skb_put(skb, copy), p, copy);
 270
 271        len -= copy;
 272        offset += copy;
 273
 274        /*
 275         * Verify that we can indeed put this data into a skb.
 276         * This is here to handle cases when the device erroneously
 277         * tries to receive more than is possible. This is usually
 278         * the case of a broken device.
 279         */
 280        if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
 281                net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
 282                dev_kfree_skb(skb);
 283                return NULL;
 284        }
 285
 286        while (len) {
 287                set_skb_frag(skb, page, offset, &len);
 288                page = (struct page *)page->private;
 289                offset = 0;
 290        }
 291
 292        if (page)
 293                give_pages(rq, page);
 294
 295        return skb;
 296}
 297
 298static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
 299{
 300        struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 301        struct page *page;
 302        int num_buf, i, len;
 303
 304        num_buf = hdr->mhdr.num_buffers;
 305        while (--num_buf) {
 306                i = skb_shinfo(skb)->nr_frags;
 307                if (i >= MAX_SKB_FRAGS) {
 308                        pr_debug("%s: packet too long\n", skb->dev->name);
 309                        skb->dev->stats.rx_length_errors++;
 310                        return -EINVAL;
 311                }
 312                page = virtqueue_get_buf(rq->vq, &len);
 313                if (!page) {
 314                        pr_debug("%s: rx error: %d buffers missing\n",
 315                                 skb->dev->name, hdr->mhdr.num_buffers);
 316                        skb->dev->stats.rx_length_errors++;
 317                        return -EINVAL;
 318                }
 319
 320                if (len > PAGE_SIZE)
 321                        len = PAGE_SIZE;
 322
 323                set_skb_frag(skb, page, 0, &len);
 324
 325                --rq->num;
 326        }
 327        return 0;
 328}
 329
 330static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 331{
 332        struct virtnet_info *vi = rq->vq->vdev->priv;
 333        struct net_device *dev = vi->dev;
 334        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 335        struct sk_buff *skb;
 336        struct page *page;
 337        struct skb_vnet_hdr *hdr;
 338
 339        if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
 340                pr_debug("%s: short packet %i\n", dev->name, len);
 341                dev->stats.rx_length_errors++;
 342                if (vi->mergeable_rx_bufs || vi->big_packets)
 343                        give_pages(rq, buf);
 344                else
 345                        dev_kfree_skb(buf);
 346                return;
 347        }
 348
 349        if (!vi->mergeable_rx_bufs && !vi->big_packets) {
 350                skb = buf;
 351                len -= sizeof(struct virtio_net_hdr);
 352                skb_trim(skb, len);
 353        } else {
 354                page = buf;
 355                skb = page_to_skb(rq, page, len);
 356                if (unlikely(!skb)) {
 357                        dev->stats.rx_dropped++;
 358                        give_pages(rq, page);
 359                        return;
 360                }
 361                if (vi->mergeable_rx_bufs)
 362                        if (receive_mergeable(rq, skb)) {
 363                                dev_kfree_skb(skb);
 364                                return;
 365                        }
 366        }
 367
 368        hdr = skb_vnet_hdr(skb);
 369
 370        u64_stats_update_begin(&stats->rx_syncp);
 371        stats->rx_bytes += skb->len;
 372        stats->rx_packets++;
 373        u64_stats_update_end(&stats->rx_syncp);
 374
 375        if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 376                pr_debug("Needs csum!\n");
 377                if (!skb_partial_csum_set(skb,
 378                                          hdr->hdr.csum_start,
 379                                          hdr->hdr.csum_offset))
 380                        goto frame_err;
 381        } else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
 382                skb->ip_summed = CHECKSUM_UNNECESSARY;
 383        }
 384
 385        skb->protocol = eth_type_trans(skb, dev);
 386        pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
 387                 ntohs(skb->protocol), skb->len, skb->pkt_type);
 388
 389        if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 390                pr_debug("GSO!\n");
 391                switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 392                case VIRTIO_NET_HDR_GSO_TCPV4:
 393                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 394                        break;
 395                case VIRTIO_NET_HDR_GSO_UDP:
 396                        skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 397                        break;
 398                case VIRTIO_NET_HDR_GSO_TCPV6:
 399                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 400                        break;
 401                default:
 402                        net_warn_ratelimited("%s: bad gso type %u.\n",
 403                                             dev->name, hdr->hdr.gso_type);
 404                        goto frame_err;
 405                }
 406
 407                if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
 408                        skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 409
 410                skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
 411                if (skb_shinfo(skb)->gso_size == 0) {
 412                        net_warn_ratelimited("%s: zero gso size.\n", dev->name);
 413                        goto frame_err;
 414                }
 415
 416                /* Header must be checked, and gso_segs computed. */
 417                skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 418                skb_shinfo(skb)->gso_segs = 0;
 419        }
 420
 421        netif_receive_skb(skb);
 422        return;
 423
 424frame_err:
 425        dev->stats.rx_frame_errors++;
 426        dev_kfree_skb(skb);
 427}
 428
 429static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
 430{
 431        struct virtnet_info *vi = rq->vq->vdev->priv;
 432        struct sk_buff *skb;
 433        struct skb_vnet_hdr *hdr;
 434        int err;
 435
 436        skb = __netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN, gfp);
 437        if (unlikely(!skb))
 438                return -ENOMEM;
 439
 440        skb_put(skb, MAX_PACKET_LEN);
 441
 442        hdr = skb_vnet_hdr(skb);
 443        sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
 444
 445        skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
 446
 447        err = virtqueue_add_buf(rq->vq, rq->sg, 0, 2, skb, gfp);
 448        if (err < 0)
 449                dev_kfree_skb(skb);
 450
 451        return err;
 452}
 453
 454static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 455{
 456        struct page *first, *list = NULL;
 457        char *p;
 458        int i, err, offset;
 459
 460        /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
 461        for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
 462                first = get_a_page(rq, gfp);
 463                if (!first) {
 464                        if (list)
 465                                give_pages(rq, list);
 466                        return -ENOMEM;
 467                }
 468                sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
 469
 470                /* chain new page in list head to match sg */
 471                first->private = (unsigned long)list;
 472                list = first;
 473        }
 474
 475        first = get_a_page(rq, gfp);
 476        if (!first) {
 477                give_pages(rq, list);
 478                return -ENOMEM;
 479        }
 480        p = page_address(first);
 481
 482        /* rq->sg[0], rq->sg[1] share the same page */
 483        /* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */
 484        sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr));
 485
 486        /* rq->sg[1] for data packet, from offset */
 487        offset = sizeof(struct padded_vnet_hdr);
 488        sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
 489
 490        /* chain first in list head */
 491        first->private = (unsigned long)list;
 492        err = virtqueue_add_buf(rq->vq, rq->sg, 0, MAX_SKB_FRAGS + 2,
 493                                first, gfp);
 494        if (err < 0)
 495                give_pages(rq, first);
 496
 497        return err;
 498}
 499
 500static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 501{
 502        struct page *page;
 503        int err;
 504
 505        page = get_a_page(rq, gfp);
 506        if (!page)
 507                return -ENOMEM;
 508
 509        sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
 510
 511        err = virtqueue_add_buf(rq->vq, rq->sg, 0, 1, page, gfp);
 512        if (err < 0)
 513                give_pages(rq, page);
 514
 515        return err;
 516}
 517
 518/*
 519 * Returns false if we couldn't fill entirely (OOM).
 520 *
 521 * Normally run in the receive path, but can also be run from ndo_open
 522 * before we're receiving packets, or from refill_work which is
 523 * careful to disable receiving (using napi_disable).
 524 */
 525static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
 526{
 527        struct virtnet_info *vi = rq->vq->vdev->priv;
 528        int err;
 529        bool oom;
 530
 531        do {
 532                if (vi->mergeable_rx_bufs)
 533                        err = add_recvbuf_mergeable(rq, gfp);
 534                else if (vi->big_packets)
 535                        err = add_recvbuf_big(rq, gfp);
 536                else
 537                        err = add_recvbuf_small(rq, gfp);
 538
 539                oom = err == -ENOMEM;
 540                if (err)
 541                        break;
 542                ++rq->num;
 543        } while (rq->vq->num_free);
 544        if (unlikely(rq->num > rq->max))
 545                rq->max = rq->num;
 546        virtqueue_kick(rq->vq);
 547        return !oom;
 548}
 549
 550static void skb_recv_done(struct virtqueue *rvq)
 551{
 552        struct virtnet_info *vi = rvq->vdev->priv;
 553        struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
 554
 555        /* Schedule NAPI, Suppress further interrupts if successful. */
 556        if (napi_schedule_prep(&rq->napi)) {
 557                virtqueue_disable_cb(rvq);
 558                __napi_schedule(&rq->napi);
 559        }
 560}
 561
 562static void virtnet_napi_enable(struct receive_queue *rq)
 563{
 564        napi_enable(&rq->napi);
 565
 566        /* If all buffers were filled by other side before we napi_enabled, we
 567         * won't get another interrupt, so process any outstanding packets
 568         * now.  virtnet_poll wants re-enable the queue, so we disable here.
 569         * We synchronize against interrupts via NAPI_STATE_SCHED */
 570        if (napi_schedule_prep(&rq->napi)) {
 571                virtqueue_disable_cb(rq->vq);
 572                local_bh_disable();
 573                __napi_schedule(&rq->napi);
 574                local_bh_enable();
 575        }
 576}
 577
 578static void refill_work(struct work_struct *work)
 579{
 580        struct virtnet_info *vi =
 581                container_of(work, struct virtnet_info, refill.work);
 582        bool still_empty;
 583        int i;
 584
 585        for (i = 0; i < vi->max_queue_pairs; i++) {
 586                struct receive_queue *rq = &vi->rq[i];
 587
 588                napi_disable(&rq->napi);
 589                still_empty = !try_fill_recv(rq, GFP_KERNEL);
 590                virtnet_napi_enable(rq);
 591
 592                /* In theory, this can happen: if we don't get any buffers in
 593                 * we will *never* try to fill again.
 594                 */
 595                if (still_empty)
 596                        schedule_delayed_work(&vi->refill, HZ/2);
 597        }
 598}
 599
 600static int virtnet_poll(struct napi_struct *napi, int budget)
 601{
 602        struct receive_queue *rq =
 603                container_of(napi, struct receive_queue, napi);
 604        struct virtnet_info *vi = rq->vq->vdev->priv;
 605        void *buf;
 606        unsigned int len, received = 0;
 607
 608again:
 609        while (received < budget &&
 610               (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
 611                receive_buf(rq, buf, len);
 612                --rq->num;
 613                received++;
 614        }
 615
 616        if (rq->num < rq->max / 2) {
 617                if (!try_fill_recv(rq, GFP_ATOMIC))
 618                        schedule_delayed_work(&vi->refill, 0);
 619        }
 620
 621        /* Out of packets? */
 622        if (received < budget) {
 623                napi_complete(napi);
 624                if (unlikely(!virtqueue_enable_cb(rq->vq)) &&
 625                    napi_schedule_prep(napi)) {
 626                        virtqueue_disable_cb(rq->vq);
 627                        __napi_schedule(napi);
 628                        goto again;
 629                }
 630        }
 631
 632        return received;
 633}
 634
 635static int virtnet_open(struct net_device *dev)
 636{
 637        struct virtnet_info *vi = netdev_priv(dev);
 638        int i;
 639
 640        for (i = 0; i < vi->max_queue_pairs; i++) {
 641                /* Make sure we have some buffers: if oom use wq. */
 642                if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
 643                        schedule_delayed_work(&vi->refill, 0);
 644                virtnet_napi_enable(&vi->rq[i]);
 645        }
 646
 647        return 0;
 648}
 649
 650static void free_old_xmit_skbs(struct send_queue *sq)
 651{
 652        struct sk_buff *skb;
 653        unsigned int len;
 654        struct virtnet_info *vi = sq->vq->vdev->priv;
 655        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 656
 657        while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
 658                pr_debug("Sent skb %p\n", skb);
 659
 660                u64_stats_update_begin(&stats->tx_syncp);
 661                stats->tx_bytes += skb->len;
 662                stats->tx_packets++;
 663                u64_stats_update_end(&stats->tx_syncp);
 664
 665                dev_kfree_skb_any(skb);
 666        }
 667}
 668
 669static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 670{
 671        struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 672        const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 673        struct virtnet_info *vi = sq->vq->vdev->priv;
 674        unsigned num_sg;
 675
 676        pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
 677
 678        if (skb->ip_summed == CHECKSUM_PARTIAL) {
 679                hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 680                hdr->hdr.csum_start = skb_checksum_start_offset(skb);
 681                hdr->hdr.csum_offset = skb->csum_offset;
 682        } else {
 683                hdr->hdr.flags = 0;
 684                hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
 685        }
 686
 687        if (skb_is_gso(skb)) {
 688                hdr->hdr.hdr_len = skb_headlen(skb);
 689                hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
 690                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
 691                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 692                else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
 693                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
 694                else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
 695                        hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
 696                else
 697                        BUG();
 698                if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
 699                        hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 700        } else {
 701                hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
 702                hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
 703        }
 704
 705        hdr->mhdr.num_buffers = 0;
 706
 707        /* Encode metadata header at front. */
 708        if (vi->mergeable_rx_bufs)
 709                sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
 710        else
 711                sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
 712
 713        num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
 714        return virtqueue_add_buf(sq->vq, sq->sg, num_sg,
 715                                 0, skb, GFP_ATOMIC);
 716}
 717
 718static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 719{
 720        struct virtnet_info *vi = netdev_priv(dev);
 721        int qnum = skb_get_queue_mapping(skb);
 722        struct send_queue *sq = &vi->sq[qnum];
 723        int err;
 724
 725        /* Free up any pending old buffers before queueing new ones. */
 726        free_old_xmit_skbs(sq);
 727
 728        /* Try to transmit */
 729        err = xmit_skb(sq, skb);
 730
 731        /* This should not happen! */
 732        if (unlikely(err)) {
 733                dev->stats.tx_fifo_errors++;
 734                if (net_ratelimit())
 735                        dev_warn(&dev->dev,
 736                                 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
 737                dev->stats.tx_dropped++;
 738                kfree_skb(skb);
 739                return NETDEV_TX_OK;
 740        }
 741        virtqueue_kick(sq->vq);
 742
 743        /* Don't wait up for transmitted skbs to be freed. */
 744        skb_orphan(skb);
 745        nf_reset(skb);
 746
 747        /* Apparently nice girls don't return TX_BUSY; stop the queue
 748         * before it gets out of hand.  Naturally, this wastes entries. */
 749        if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
 750                netif_stop_subqueue(dev, qnum);
 751                if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 752                        /* More just got used, free them then recheck. */
 753                        free_old_xmit_skbs(sq);
 754                        if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
 755                                netif_start_subqueue(dev, qnum);
 756                                virtqueue_disable_cb(sq->vq);
 757                        }
 758                }
 759        }
 760
 761        return NETDEV_TX_OK;
 762}
 763
 764/*
 765 * Send command via the control virtqueue and check status.  Commands
 766 * supported by the hypervisor, as indicated by feature bits, should
 767 * never fail unless improperly formated.
 768 */
 769static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 770                                 struct scatterlist *data, int out, int in)
 771{
 772        struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
 773        struct virtio_net_ctrl_hdr ctrl;
 774        virtio_net_ctrl_ack status = ~0;
 775        unsigned int tmp;
 776        int i;
 777
 778        /* Caller should know better */
 779        BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
 780                (out + in > VIRTNET_SEND_COMMAND_SG_MAX));
 781
 782        out++; /* Add header */
 783        in++; /* Add return status */
 784
 785        ctrl.class = class;
 786        ctrl.cmd = cmd;
 787
 788        sg_init_table(sg, out + in);
 789
 790        sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
 791        for_each_sg(data, s, out + in - 2, i)
 792                sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 793        sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 794
 795        BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
 796
 797        virtqueue_kick(vi->cvq);
 798
 799        /* Spin for a response, the kick causes an ioport write, trapping
 800         * into the hypervisor, so the request should be handled immediately.
 801         */
 802        while (!virtqueue_get_buf(vi->cvq, &tmp))
 803                cpu_relax();
 804
 805        return status == VIRTIO_NET_OK;
 806}
 807
 808static int virtnet_set_mac_address(struct net_device *dev, void *p)
 809{
 810        struct virtnet_info *vi = netdev_priv(dev);
 811        struct virtio_device *vdev = vi->vdev;
 812        int ret;
 813        struct sockaddr *addr = p;
 814        struct scatterlist sg;
 815
 816        ret = eth_prepare_mac_addr_change(dev, p);
 817        if (ret)
 818                return ret;
 819
 820        if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
 821                sg_init_one(&sg, addr->sa_data, dev->addr_len);
 822                if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
 823                                          VIRTIO_NET_CTRL_MAC_ADDR_SET,
 824                                          &sg, 1, 0)) {
 825                        dev_warn(&vdev->dev,
 826                                 "Failed to set mac address by vq command.\n");
 827                        return -EINVAL;
 828                }
 829        } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
 830                vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
 831                                  addr->sa_data, dev->addr_len);
 832        }
 833
 834        eth_commit_mac_addr_change(dev, p);
 835
 836        return 0;
 837}
 838
 839static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
 840                                               struct rtnl_link_stats64 *tot)
 841{
 842        struct virtnet_info *vi = netdev_priv(dev);
 843        int cpu;
 844        unsigned int start;
 845
 846        for_each_possible_cpu(cpu) {
 847                struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
 848                u64 tpackets, tbytes, rpackets, rbytes;
 849
 850                do {
 851                        start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
 852                        tpackets = stats->tx_packets;
 853                        tbytes   = stats->tx_bytes;
 854                } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
 855
 856                do {
 857                        start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
 858                        rpackets = stats->rx_packets;
 859                        rbytes   = stats->rx_bytes;
 860                } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
 861
 862                tot->rx_packets += rpackets;
 863                tot->tx_packets += tpackets;
 864                tot->rx_bytes   += rbytes;
 865                tot->tx_bytes   += tbytes;
 866        }
 867
 868        tot->tx_dropped = dev->stats.tx_dropped;
 869        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 870        tot->rx_dropped = dev->stats.rx_dropped;
 871        tot->rx_length_errors = dev->stats.rx_length_errors;
 872        tot->rx_frame_errors = dev->stats.rx_frame_errors;
 873
 874        return tot;
 875}
 876
 877#ifdef CONFIG_NET_POLL_CONTROLLER
 878static void virtnet_netpoll(struct net_device *dev)
 879{
 880        struct virtnet_info *vi = netdev_priv(dev);
 881        int i;
 882
 883        for (i = 0; i < vi->curr_queue_pairs; i++)
 884                napi_schedule(&vi->rq[i].napi);
 885}
 886#endif
 887
 888static void virtnet_ack_link_announce(struct virtnet_info *vi)
 889{
 890        rtnl_lock();
 891        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
 892                                  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL,
 893                                  0, 0))
 894                dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
 895        rtnl_unlock();
 896}
 897
 898static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 899{
 900        struct scatterlist sg;
 901        struct virtio_net_ctrl_mq s;
 902        struct net_device *dev = vi->dev;
 903
 904        if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
 905                return 0;
 906
 907        s.virtqueue_pairs = queue_pairs;
 908        sg_init_one(&sg, &s, sizeof(s));
 909
 910        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
 911                                  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){
 912                dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
 913                         queue_pairs);
 914                return -EINVAL;
 915        } else
 916                vi->curr_queue_pairs = queue_pairs;
 917
 918        return 0;
 919}
 920
 921static int virtnet_close(struct net_device *dev)
 922{
 923        struct virtnet_info *vi = netdev_priv(dev);
 924        int i;
 925
 926        /* Make sure refill_work doesn't re-enable napi! */
 927        cancel_delayed_work_sync(&vi->refill);
 928
 929        for (i = 0; i < vi->max_queue_pairs; i++)
 930                napi_disable(&vi->rq[i].napi);
 931
 932        return 0;
 933}
 934
 935static void virtnet_set_rx_mode(struct net_device *dev)
 936{
 937        struct virtnet_info *vi = netdev_priv(dev);
 938        struct scatterlist sg[2];
 939        u8 promisc, allmulti;
 940        struct virtio_net_ctrl_mac *mac_data;
 941        struct netdev_hw_addr *ha;
 942        int uc_count;
 943        int mc_count;
 944        void *buf;
 945        int i;
 946
 947        /* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
 948        if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 949                return;
 950
 951        promisc = ((dev->flags & IFF_PROMISC) != 0);
 952        allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
 953
 954        sg_init_one(sg, &promisc, sizeof(promisc));
 955
 956        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 957                                  VIRTIO_NET_CTRL_RX_PROMISC,
 958                                  sg, 1, 0))
 959                dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
 960                         promisc ? "en" : "dis");
 961
 962        sg_init_one(sg, &allmulti, sizeof(allmulti));
 963
 964        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 965                                  VIRTIO_NET_CTRL_RX_ALLMULTI,
 966                                  sg, 1, 0))
 967                dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 968                         allmulti ? "en" : "dis");
 969
 970        uc_count = netdev_uc_count(dev);
 971        mc_count = netdev_mc_count(dev);
 972        /* MAC filter - use one buffer for both lists */
 973        buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
 974                      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 975        mac_data = buf;
 976        if (!buf)
 977                return;
 978
 979        sg_init_table(sg, 2);
 980
 981        /* Store the unicast list and count in the front of the buffer */
 982        mac_data->entries = uc_count;
 983        i = 0;
 984        netdev_for_each_uc_addr(ha, dev)
 985                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 986
 987        sg_set_buf(&sg[0], mac_data,
 988                   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
 989
 990        /* multicast list and count fill the end */
 991        mac_data = (void *)&mac_data->macs[uc_count][0];
 992
 993        mac_data->entries = mc_count;
 994        i = 0;
 995        netdev_for_each_mc_addr(ha, dev)
 996                memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 997
 998        sg_set_buf(&sg[1], mac_data,
 999                   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1000
1001        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1002                                  VIRTIO_NET_CTRL_MAC_TABLE_SET,
1003                                  sg, 2, 0))
1004                dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
1005
1006        kfree(buf);
1007}
1008
1009static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
1010{
1011        struct virtnet_info *vi = netdev_priv(dev);
1012        struct scatterlist sg;
1013
1014        sg_init_one(&sg, &vid, sizeof(vid));
1015
1016        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1017                                  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
1018                dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1019        return 0;
1020}
1021
1022static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
1023{
1024        struct virtnet_info *vi = netdev_priv(dev);
1025        struct scatterlist sg;
1026
1027        sg_init_one(&sg, &vid, sizeof(vid));
1028
1029        if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1030                                  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
1031                dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1032        return 0;
1033}
1034
1035static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
1036{
1037        int i;
1038        int cpu;
1039
1040        if (vi->affinity_hint_set) {
1041                for (i = 0; i < vi->max_queue_pairs; i++) {
1042                        virtqueue_set_affinity(vi->rq[i].vq, -1);
1043                        virtqueue_set_affinity(vi->sq[i].vq, -1);
1044                }
1045
1046                vi->affinity_hint_set = false;
1047        }
1048
1049        i = 0;
1050        for_each_online_cpu(cpu) {
1051                if (cpu == hcpu) {
1052                        *per_cpu_ptr(vi->vq_index, cpu) = -1;
1053                } else {
1054                        *per_cpu_ptr(vi->vq_index, cpu) =
1055                                ++i % vi->curr_queue_pairs;
1056                }
1057        }
1058}
1059
1060static void virtnet_set_affinity(struct virtnet_info *vi)
1061{
1062        int i;
1063        int cpu;
1064
1065        /* In multiqueue mode, when the number of cpu is equal to the number of
1066         * queue pairs, we let the queue pairs to be private to one cpu by
1067         * setting the affinity hint to eliminate the contention.
1068         */
1069        if (vi->curr_queue_pairs == 1 ||
1070            vi->max_queue_pairs != num_online_cpus()) {
1071                virtnet_clean_affinity(vi, -1);
1072                return;
1073        }
1074
1075        i = 0;
1076        for_each_online_cpu(cpu) {
1077                virtqueue_set_affinity(vi->rq[i].vq, cpu);
1078                virtqueue_set_affinity(vi->sq[i].vq, cpu);
1079                *per_cpu_ptr(vi->vq_index, cpu) = i;
1080                i++;
1081        }
1082
1083        vi->affinity_hint_set = true;
1084}
1085
1086static int virtnet_cpu_callback(struct notifier_block *nfb,
1087                                unsigned long action, void *hcpu)
1088{
1089        struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
1090
1091        switch(action & ~CPU_TASKS_FROZEN) {
1092        case CPU_ONLINE:
1093        case CPU_DOWN_FAILED:
1094        case CPU_DEAD:
1095                virtnet_set_affinity(vi);
1096                break;
1097        case CPU_DOWN_PREPARE:
1098                virtnet_clean_affinity(vi, (long)hcpu);
1099                break;
1100        default:
1101                break;
1102        }
1103        return NOTIFY_OK;
1104}
1105
1106static void virtnet_get_ringparam(struct net_device *dev,
1107                                struct ethtool_ringparam *ring)
1108{
1109        struct virtnet_info *vi = netdev_priv(dev);
1110
1111        ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1112        ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
1113        ring->rx_pending = ring->rx_max_pending;
1114        ring->tx_pending = ring->tx_max_pending;
1115}
1116
1117
1118static void virtnet_get_drvinfo(struct net_device *dev,
1119                                struct ethtool_drvinfo *info)
1120{
1121        struct virtnet_info *vi = netdev_priv(dev);
1122        struct virtio_device *vdev = vi->vdev;
1123
1124        strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1125        strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1126        strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1127
1128}
1129
1130/* TODO: Eliminate OOO packets during switching */
1131static int virtnet_set_channels(struct net_device *dev,
1132                                struct ethtool_channels *channels)
1133{
1134        struct virtnet_info *vi = netdev_priv(dev);
1135        u16 queue_pairs = channels->combined_count;
1136        int err;
1137
1138        /* We don't support separate rx/tx channels.
1139         * We don't allow setting 'other' channels.
1140         */
1141        if (channels->rx_count || channels->tx_count || channels->other_count)
1142                return -EINVAL;
1143
1144        if (queue_pairs > vi->max_queue_pairs)
1145                return -EINVAL;
1146
1147        get_online_cpus();
1148        err = virtnet_set_queues(vi, queue_pairs);
1149        if (!err) {
1150                netif_set_real_num_tx_queues(dev, queue_pairs);
1151                netif_set_real_num_rx_queues(dev, queue_pairs);
1152
1153                virtnet_set_affinity(vi);
1154        }
1155        put_online_cpus();
1156
1157        return err;
1158}
1159
1160static void virtnet_get_channels(struct net_device *dev,
1161                                 struct ethtool_channels *channels)
1162{
1163        struct virtnet_info *vi = netdev_priv(dev);
1164
1165        channels->combined_count = vi->curr_queue_pairs;
1166        channels->max_combined = vi->max_queue_pairs;
1167        channels->max_other = 0;
1168        channels->rx_count = 0;
1169        channels->tx_count = 0;
1170        channels->other_count = 0;
1171}
1172
1173static const struct ethtool_ops virtnet_ethtool_ops = {
1174        .get_drvinfo = virtnet_get_drvinfo,
1175        .get_link = ethtool_op_get_link,
1176        .get_ringparam = virtnet_get_ringparam,
1177        .set_channels = virtnet_set_channels,
1178        .get_channels = virtnet_get_channels,
1179};
1180
1181#define MIN_MTU 68
1182#define MAX_MTU 65535
1183
1184static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
1185{
1186        if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
1187                return -EINVAL;
1188        dev->mtu = new_mtu;
1189        return 0;
1190}
1191
1192/* To avoid contending a lock hold by a vcpu who would exit to host, select the
1193 * txq based on the processor id.
1194 */
1195static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
1196{
1197        int txq;
1198        struct virtnet_info *vi = netdev_priv(dev);
1199
1200        if (skb_rx_queue_recorded(skb)) {
1201                txq = skb_get_rx_queue(skb);
1202        } else {
1203                txq = *__this_cpu_ptr(vi->vq_index);
1204                if (txq == -1)
1205                        txq = 0;
1206        }
1207
1208        while (unlikely(txq >= dev->real_num_tx_queues))
1209                txq -= dev->real_num_tx_queues;
1210
1211        return txq;
1212}
1213
1214static const struct net_device_ops virtnet_netdev = {
1215        .ndo_open            = virtnet_open,
1216        .ndo_stop            = virtnet_close,
1217        .ndo_start_xmit      = start_xmit,
1218        .ndo_validate_addr   = eth_validate_addr,
1219        .ndo_set_mac_address = virtnet_set_mac_address,
1220        .ndo_set_rx_mode     = virtnet_set_rx_mode,
1221        .ndo_change_mtu      = virtnet_change_mtu,
1222        .ndo_get_stats64     = virtnet_stats,
1223        .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
1224        .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
1225        .ndo_select_queue     = virtnet_select_queue,
1226#ifdef CONFIG_NET_POLL_CONTROLLER
1227        .ndo_poll_controller = virtnet_netpoll,
1228#endif
1229};
1230
1231static void virtnet_config_changed_work(struct work_struct *work)
1232{
1233        struct virtnet_info *vi =
1234                container_of(work, struct virtnet_info, config_work);
1235        u16 v;
1236
1237        mutex_lock(&vi->config_lock);
1238        if (!vi->config_enable)
1239                goto done;
1240
1241        if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS,
1242                              offsetof(struct virtio_net_config, status),
1243                              &v) < 0)
1244                goto done;
1245
1246        if (v & VIRTIO_NET_S_ANNOUNCE) {
1247                netdev_notify_peers(vi->dev);
1248                virtnet_ack_link_announce(vi);
1249        }
1250
1251        /* Ignore unknown (future) status bits */
1252        v &= VIRTIO_NET_S_LINK_UP;
1253
1254        if (vi->status == v)
1255                goto done;
1256
1257        vi->status = v;
1258
1259        if (vi->status & VIRTIO_NET_S_LINK_UP) {
1260                netif_carrier_on(vi->dev);
1261                netif_tx_wake_all_queues(vi->dev);
1262        } else {
1263                netif_carrier_off(vi->dev);
1264                netif_tx_stop_all_queues(vi->dev);
1265        }
1266done:
1267        mutex_unlock(&vi->config_lock);
1268}
1269
1270static void virtnet_config_changed(struct virtio_device *vdev)
1271{
1272        struct virtnet_info *vi = vdev->priv;
1273
1274        schedule_work(&vi->config_work);
1275}
1276
1277static void virtnet_free_queues(struct virtnet_info *vi)
1278{
1279        kfree(vi->rq);
1280        kfree(vi->sq);
1281}
1282
1283static void free_receive_bufs(struct virtnet_info *vi)
1284{
1285        int i;
1286
1287        for (i = 0; i < vi->max_queue_pairs; i++) {
1288                while (vi->rq[i].pages)
1289                        __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
1290        }
1291}
1292
1293static void free_unused_bufs(struct virtnet_info *vi)
1294{
1295        void *buf;
1296        int i;
1297
1298        for (i = 0; i < vi->max_queue_pairs; i++) {
1299                struct virtqueue *vq = vi->sq[i].vq;
1300                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
1301                        dev_kfree_skb(buf);
1302        }
1303
1304        for (i = 0; i < vi->max_queue_pairs; i++) {
1305                struct virtqueue *vq = vi->rq[i].vq;
1306
1307                while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1308                        if (vi->mergeable_rx_bufs || vi->big_packets)
1309                                give_pages(&vi->rq[i], buf);
1310                        else
1311                                dev_kfree_skb(buf);
1312                        --vi->rq[i].num;
1313                }
1314                BUG_ON(vi->rq[i].num != 0);
1315        }
1316}
1317
1318static void virtnet_del_vqs(struct virtnet_info *vi)
1319{
1320        struct virtio_device *vdev = vi->vdev;
1321
1322        virtnet_clean_affinity(vi, -1);
1323
1324        vdev->config->del_vqs(vdev);
1325
1326        virtnet_free_queues(vi);
1327}
1328
1329static int virtnet_find_vqs(struct virtnet_info *vi)
1330{
1331        vq_callback_t **callbacks;
1332        struct virtqueue **vqs;
1333        int ret = -ENOMEM;
1334        int i, total_vqs;
1335        const char **names;
1336
1337        /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
1338         * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
1339         * possible control vq.
1340         */
1341        total_vqs = vi->max_queue_pairs * 2 +
1342                    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
1343
1344        /* Allocate space for find_vqs parameters */
1345        vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
1346        if (!vqs)
1347                goto err_vq;
1348        callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
1349        if (!callbacks)
1350                goto err_callback;
1351        names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
1352        if (!names)
1353                goto err_names;
1354
1355        /* Parameters for control virtqueue, if any */
1356        if (vi->has_cvq) {
1357                callbacks[total_vqs - 1] = NULL;
1358                names[total_vqs - 1] = "control";
1359        }
1360
1361        /* Allocate/initialize parameters for send/receive virtqueues */
1362        for (i = 0; i < vi->max_queue_pairs; i++) {
1363                callbacks[rxq2vq(i)] = skb_recv_done;
1364                callbacks[txq2vq(i)] = skb_xmit_done;
1365                sprintf(vi->rq[i].name, "input.%d", i);
1366                sprintf(vi->sq[i].name, "output.%d", i);
1367                names[rxq2vq(i)] = vi->rq[i].name;
1368                names[txq2vq(i)] = vi->sq[i].name;
1369        }
1370
1371        ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
1372                                         names);
1373        if (ret)
1374                goto err_find;
1375
1376        if (vi->has_cvq) {
1377                vi->cvq = vqs[total_vqs - 1];
1378                if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1379                        vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
1380        }
1381
1382        for (i = 0; i < vi->max_queue_pairs; i++) {
1383                vi->rq[i].vq = vqs[rxq2vq(i)];
1384                vi->sq[i].vq = vqs[txq2vq(i)];
1385        }
1386
1387        kfree(names);
1388        kfree(callbacks);
1389        kfree(vqs);
1390
1391        return 0;
1392
1393err_find:
1394        kfree(names);
1395err_names:
1396        kfree(callbacks);
1397err_callback:
1398        kfree(vqs);
1399err_vq:
1400        return ret;
1401}
1402
1403static int virtnet_alloc_queues(struct virtnet_info *vi)
1404{
1405        int i;
1406
1407        vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
1408        if (!vi->sq)
1409                goto err_sq;
1410        vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1411        if (!vi->rq)
1412                goto err_rq;
1413
1414        INIT_DELAYED_WORK(&vi->refill, refill_work);
1415        for (i = 0; i < vi->max_queue_pairs; i++) {
1416                vi->rq[i].pages = NULL;
1417                netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
1418                               napi_weight);
1419
1420                sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
1421                sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
1422        }
1423
1424        return 0;
1425
1426err_rq:
1427        kfree(vi->sq);
1428err_sq:
1429        return -ENOMEM;
1430}
1431
1432static int init_vqs(struct virtnet_info *vi)
1433{
1434        int ret;
1435
1436        /* Allocate send & receive queues */
1437        ret = virtnet_alloc_queues(vi);
1438        if (ret)
1439                goto err;
1440
1441        ret = virtnet_find_vqs(vi);
1442        if (ret)
1443                goto err_free;
1444
1445        get_online_cpus();
1446        virtnet_set_affinity(vi);
1447        put_online_cpus();
1448
1449        return 0;
1450
1451err_free:
1452        virtnet_free_queues(vi);
1453err:
1454        return ret;
1455}
1456
1457static int virtnet_probe(struct virtio_device *vdev)
1458{
1459        int i, err;
1460        struct net_device *dev;
1461        struct virtnet_info *vi;
1462        u16 max_queue_pairs;
1463
1464        /* Find if host supports multiqueue virtio_net device */
1465        err = virtio_config_val(vdev, VIRTIO_NET_F_MQ,
1466                                offsetof(struct virtio_net_config,
1467                                max_virtqueue_pairs), &max_queue_pairs);
1468
1469        /* We need at least 2 queue's */
1470        if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1471            max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1472            !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1473                max_queue_pairs = 1;
1474
1475        /* Allocate ourselves a network device with room for our info */
1476        dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
1477        if (!dev)
1478                return -ENOMEM;
1479
1480        /* Set up network device as normal. */
1481        dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
1482        dev->netdev_ops = &virtnet_netdev;
1483        dev->features = NETIF_F_HIGHDMA;
1484
1485        SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
1486        SET_NETDEV_DEV(dev, &vdev->dev);
1487
1488        /* Do we support "hardware" checksums? */
1489        if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
1490                /* This opens up the world of extra features. */
1491                dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
1492                if (csum)
1493                        dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
1494
1495                if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
1496                        dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
1497                                | NETIF_F_TSO_ECN | NETIF_F_TSO6;
1498                }
1499                /* Individual feature bits: what can host handle? */
1500                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
1501                        dev->hw_features |= NETIF_F_TSO;
1502                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
1503                        dev->hw_features |= NETIF_F_TSO6;
1504                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
1505                        dev->hw_features |= NETIF_F_TSO_ECN;
1506                if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
1507                        dev->hw_features |= NETIF_F_UFO;
1508
1509                if (gso)
1510                        dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
1511                /* (!csum && gso) case will be fixed by register_netdev() */
1512        }
1513
1514        /* Configuration may specify what MAC to use.  Otherwise random. */
1515        if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC,
1516                                  offsetof(struct virtio_net_config, mac),
1517                                  dev->dev_addr, dev->addr_len) < 0)
1518                eth_hw_addr_random(dev);
1519
1520        /* Set up our device-specific information */
1521        vi = netdev_priv(dev);
1522        vi->dev = dev;
1523        vi->vdev = vdev;
1524        vdev->priv = vi;
1525        vi->stats = alloc_percpu(struct virtnet_stats);
1526        err = -ENOMEM;
1527        if (vi->stats == NULL)
1528                goto free;
1529
1530        vi->vq_index = alloc_percpu(int);
1531        if (vi->vq_index == NULL)
1532                goto free_stats;
1533
1534        mutex_init(&vi->config_lock);
1535        vi->config_enable = true;
1536        INIT_WORK(&vi->config_work, virtnet_config_changed_work);
1537
1538        /* If we can receive ANY GSO packets, we must allocate large ones. */
1539        if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
1540            virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
1541            virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
1542                vi->big_packets = true;
1543
1544        if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
1545                vi->mergeable_rx_bufs = true;
1546
1547        if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
1548                vi->has_cvq = true;
1549
1550        /* Use single tx/rx queue pair as default */
1551        vi->curr_queue_pairs = 1;
1552        vi->max_queue_pairs = max_queue_pairs;
1553
1554        /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
1555        err = init_vqs(vi);
1556        if (err)
1557                goto free_index;
1558
1559        netif_set_real_num_tx_queues(dev, 1);
1560        netif_set_real_num_rx_queues(dev, 1);
1561
1562        err = register_netdev(dev);
1563        if (err) {
1564                pr_debug("virtio_net: registering device failed\n");
1565                goto free_vqs;
1566        }
1567
1568        /* Last of all, set up some receive buffers. */
1569        for (i = 0; i < vi->max_queue_pairs; i++) {
1570                try_fill_recv(&vi->rq[i], GFP_KERNEL);
1571
1572                /* If we didn't even get one input buffer, we're useless. */
1573                if (vi->rq[i].num == 0) {
1574                        free_unused_bufs(vi);
1575                        err = -ENOMEM;
1576                        goto free_recv_bufs;
1577                }
1578        }
1579
1580        vi->nb.notifier_call = &virtnet_cpu_callback;
1581        err = register_hotcpu_notifier(&vi->nb);
1582        if (err) {
1583                pr_debug("virtio_net: registering cpu notifier failed\n");
1584                goto free_recv_bufs;
1585        }
1586
1587        /* Assume link up if device can't report link status,
1588           otherwise get link status from config. */
1589        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
1590                netif_carrier_off(dev);
1591                schedule_work(&vi->config_work);
1592        } else {
1593                vi->status = VIRTIO_NET_S_LINK_UP;
1594                netif_carrier_on(dev);
1595        }
1596
1597        pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
1598                 dev->name, max_queue_pairs);
1599
1600        return 0;
1601
1602free_recv_bufs:
1603        free_receive_bufs(vi);
1604        unregister_netdev(dev);
1605free_vqs:
1606        cancel_delayed_work_sync(&vi->refill);
1607        virtnet_del_vqs(vi);
1608free_index:
1609        free_percpu(vi->vq_index);
1610free_stats:
1611        free_percpu(vi->stats);
1612free:
1613        free_netdev(dev);
1614        return err;
1615}
1616
1617static void remove_vq_common(struct virtnet_info *vi)
1618{
1619        vi->vdev->config->reset(vi->vdev);
1620
1621        /* Free unused buffers in both send and recv, if any. */
1622        free_unused_bufs(vi);
1623
1624        free_receive_bufs(vi);
1625
1626        virtnet_del_vqs(vi);
1627}
1628
1629static void virtnet_remove(struct virtio_device *vdev)
1630{
1631        struct virtnet_info *vi = vdev->priv;
1632
1633        unregister_hotcpu_notifier(&vi->nb);
1634
1635        /* Prevent config work handler from accessing the device. */
1636        mutex_lock(&vi->config_lock);
1637        vi->config_enable = false;
1638        mutex_unlock(&vi->config_lock);
1639
1640        unregister_netdev(vi->dev);
1641
1642        remove_vq_common(vi);
1643
1644        flush_work(&vi->config_work);
1645
1646        free_percpu(vi->vq_index);
1647        free_percpu(vi->stats);
1648        free_netdev(vi->dev);
1649}
1650
1651#ifdef CONFIG_PM
1652static int virtnet_freeze(struct virtio_device *vdev)
1653{
1654        struct virtnet_info *vi = vdev->priv;
1655        int i;
1656
1657        /* Prevent config work handler from accessing the device */
1658        mutex_lock(&vi->config_lock);
1659        vi->config_enable = false;
1660        mutex_unlock(&vi->config_lock);
1661
1662        netif_device_detach(vi->dev);
1663        cancel_delayed_work_sync(&vi->refill);
1664
1665        if (netif_running(vi->dev))
1666                for (i = 0; i < vi->max_queue_pairs; i++) {
1667                        napi_disable(&vi->rq[i].napi);
1668                        netif_napi_del(&vi->rq[i].napi);
1669                }
1670
1671        remove_vq_common(vi);
1672
1673        flush_work(&vi->config_work);
1674
1675        return 0;
1676}
1677
1678static int virtnet_restore(struct virtio_device *vdev)
1679{
1680        struct virtnet_info *vi = vdev->priv;
1681        int err, i;
1682
1683        err = init_vqs(vi);
1684        if (err)
1685                return err;
1686
1687        if (netif_running(vi->dev))
1688                for (i = 0; i < vi->max_queue_pairs; i++)
1689                        virtnet_napi_enable(&vi->rq[i]);
1690
1691        netif_device_attach(vi->dev);
1692
1693        for (i = 0; i < vi->max_queue_pairs; i++)
1694                if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
1695                        schedule_delayed_work(&vi->refill, 0);
1696
1697        mutex_lock(&vi->config_lock);
1698        vi->config_enable = true;
1699        mutex_unlock(&vi->config_lock);
1700
1701        virtnet_set_queues(vi, vi->curr_queue_pairs);
1702
1703        return 0;
1704}
1705#endif
1706
1707static struct virtio_device_id id_table[] = {
1708        { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
1709        { 0 },
1710};
1711
1712static unsigned int features[] = {
1713        VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
1714        VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
1715        VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
1716        VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1717        VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1718        VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1719        VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1720        VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
1721        VIRTIO_NET_F_CTRL_MAC_ADDR,
1722};
1723
1724static struct virtio_driver virtio_net_driver = {
1725        .feature_table = features,
1726        .feature_table_size = ARRAY_SIZE(features),
1727        .driver.name =  KBUILD_MODNAME,
1728        .driver.owner = THIS_MODULE,
1729        .id_table =     id_table,
1730        .probe =        virtnet_probe,
1731        .remove =       virtnet_remove,
1732        .config_changed = virtnet_config_changed,
1733#ifdef CONFIG_PM
1734        .freeze =       virtnet_freeze,
1735        .restore =      virtnet_restore,
1736#endif
1737};
1738
1739module_virtio_driver(virtio_net_driver);
1740
1741MODULE_DEVICE_TABLE(virtio, id_table);
1742MODULE_DESCRIPTION("Virtio network driver");
1743MODULE_LICENSE("GPL");
1744
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.