linux/drivers/net/tun.c
<<
>>
Prefs
   1/*
   2 *  TUN - Universal TUN/TAP device driver.
   3 *  Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
   4 *
   5 *  This program is free software; you can redistribute it and/or modify
   6 *  it under the terms of the GNU General Public License as published by
   7 *  the Free Software Foundation; either version 2 of the License, or
   8 *  (at your option) any later version.
   9 *
  10 *  This program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
  16 */
  17
  18/*
  19 *  Changes:
  20 *
  21 *  Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
  22 *    Add TUNSETLINK ioctl to set the link encapsulation
  23 *
  24 *  Mark Smith <markzzzsmith@yahoo.com.au>
  25 *    Use random_ether_addr() for tap MAC address.
  26 *
  27 *  Harald Roelle <harald.roelle@ifi.lmu.de>  2004/04/20
  28 *    Fixes in packet dropping, queue length setting and queue wakeup.
  29 *    Increased default tx queue length.
  30 *    Added ethtool API.
  31 *    Minor cleanups
  32 *
  33 *  Daniel Podlejski <underley@underley.eu.org>
  34 *    Modifications for 2.3.99-pre5 kernel.
  35 */
  36
  37#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  38
  39#define DRV_NAME        "tun"
  40#define DRV_VERSION     "1.6"
  41#define DRV_DESCRIPTION "Universal TUN/TAP device driver"
  42#define DRV_COPYRIGHT   "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
  43
  44#include <linux/module.h>
  45#include <linux/errno.h>
  46#include <linux/kernel.h>
  47#include <linux/major.h>
  48#include <linux/slab.h>
  49#include <linux/poll.h>
  50#include <linux/fcntl.h>
  51#include <linux/init.h>
  52#include <linux/skbuff.h>
  53#include <linux/netdevice.h>
  54#include <linux/etherdevice.h>
  55#include <linux/miscdevice.h>
  56#include <linux/ethtool.h>
  57#include <linux/rtnetlink.h>
  58#include <linux/compat.h>
  59#include <linux/if.h>
  60#include <linux/if_arp.h>
  61#include <linux/if_ether.h>
  62#include <linux/if_tun.h>
  63#include <linux/crc32.h>
  64#include <linux/nsproxy.h>
  65#include <linux/virtio_net.h>
  66#include <linux/rcupdate.h>
  67#include <net/net_namespace.h>
  68#include <net/netns/generic.h>
  69#include <net/rtnetlink.h>
  70#include <net/sock.h>
  71
  72#include <asm/system.h>
  73#include <asm/uaccess.h>
  74
  75/* Uncomment to enable debugging */
  76/* #define TUN_DEBUG 1 */
  77
  78#ifdef TUN_DEBUG
  79static int debug;
  80
  81#define tun_debug(level, tun, fmt, args...)                     \
  82do {                                                            \
  83        if (tun->debug)                                         \
  84                netdev_printk(level, tun->dev, fmt, ##args);    \
  85} while (0)
  86#define DBG1(level, fmt, args...)                               \
  87do {                                                            \
  88        if (debug == 2)                                         \
  89                printk(level fmt, ##args);                      \
  90} while (0)
  91#else
  92#define tun_debug(level, tun, fmt, args...)                     \
  93do {                                                            \
  94        if (0)                                                  \
  95                netdev_printk(level, tun->dev, fmt, ##args);    \
  96} while (0)
  97#define DBG1(level, fmt, args...)                               \
  98do {                                                            \
  99        if (0)                                                  \
 100                printk(level fmt, ##args);                      \
 101} while (0)
 102#endif
 103
 104#define FLT_EXACT_COUNT 8
 105struct tap_filter {
 106        unsigned int    count;    /* Number of addrs. Zero means disabled */
 107        u32             mask[2];  /* Mask of the hashed addrs */
 108        unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
 109};
 110
 111struct tun_file {
 112        atomic_t count;
 113        struct tun_struct *tun;
 114        struct net *net;
 115};
 116
 117struct tun_sock;
 118
 119struct tun_struct {
 120        struct tun_file         *tfile;
 121        unsigned int            flags;
 122        uid_t                   owner;
 123        gid_t                   group;
 124
 125        struct net_device       *dev;
 126        u32                     set_features;
 127#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
 128                          NETIF_F_TSO6|NETIF_F_UFO)
 129        struct fasync_struct    *fasync;
 130
 131        struct tap_filter       txflt;
 132        struct socket           socket;
 133        struct socket_wq        wq;
 134
 135        int                     vnet_hdr_sz;
 136
 137#ifdef TUN_DEBUG
 138        int debug;
 139#endif
 140};
 141
 142struct tun_sock {
 143        struct sock             sk;
 144        struct tun_struct       *tun;
 145};
 146
 147static inline struct tun_sock *tun_sk(struct sock *sk)
 148{
 149        return container_of(sk, struct tun_sock, sk);
 150}
 151
 152static int tun_attach(struct tun_struct *tun, struct file *file)
 153{
 154        struct tun_file *tfile = file->private_data;
 155        int err;
 156
 157        ASSERT_RTNL();
 158
 159        netif_tx_lock_bh(tun->dev);
 160
 161        err = -EINVAL;
 162        if (tfile->tun)
 163                goto out;
 164
 165        err = -EBUSY;
 166        if (tun->tfile)
 167                goto out;
 168
 169        err = 0;
 170        tfile->tun = tun;
 171        tun->tfile = tfile;
 172        tun->socket.file = file;
 173        netif_carrier_on(tun->dev);
 174        dev_hold(tun->dev);
 175        sock_hold(tun->socket.sk);
 176        atomic_inc(&tfile->count);
 177
 178out:
 179        netif_tx_unlock_bh(tun->dev);
 180        return err;
 181}
 182
 183static void __tun_detach(struct tun_struct *tun)
 184{
 185        /* Detach from net device */
 186        netif_tx_lock_bh(tun->dev);
 187        netif_carrier_off(tun->dev);
 188        tun->tfile = NULL;
 189        tun->socket.file = NULL;
 190        netif_tx_unlock_bh(tun->dev);
 191
 192        /* Drop read queue */
 193        skb_queue_purge(&tun->socket.sk->sk_receive_queue);
 194
 195        /* Drop the extra count on the net device */
 196        dev_put(tun->dev);
 197}
 198
 199static void tun_detach(struct tun_struct *tun)
 200{
 201        rtnl_lock();
 202        __tun_detach(tun);
 203        rtnl_unlock();
 204}
 205
 206static struct tun_struct *__tun_get(struct tun_file *tfile)
 207{
 208        struct tun_struct *tun = NULL;
 209
 210        if (atomic_inc_not_zero(&tfile->count))
 211                tun = tfile->tun;
 212
 213        return tun;
 214}
 215
 216static struct tun_struct *tun_get(struct file *file)
 217{
 218        return __tun_get(file->private_data);
 219}
 220
 221static void tun_put(struct tun_struct *tun)
 222{
 223        struct tun_file *tfile = tun->tfile;
 224
 225        if (atomic_dec_and_test(&tfile->count))
 226                tun_detach(tfile->tun);
 227}
 228
 229/* TAP filtering */
 230static void addr_hash_set(u32 *mask, const u8 *addr)
 231{
 232        int n = ether_crc(ETH_ALEN, addr) >> 26;
 233        mask[n >> 5] |= (1 << (n & 31));
 234}
 235
 236static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
 237{
 238        int n = ether_crc(ETH_ALEN, addr) >> 26;
 239        return mask[n >> 5] & (1 << (n & 31));
 240}
 241
 242static int update_filter(struct tap_filter *filter, void __user *arg)
 243{
 244        struct { u8 u[ETH_ALEN]; } *addr;
 245        struct tun_filter uf;
 246        int err, alen, n, nexact;
 247
 248        if (copy_from_user(&uf, arg, sizeof(uf)))
 249                return -EFAULT;
 250
 251        if (!uf.count) {
 252                /* Disabled */
 253                filter->count = 0;
 254                return 0;
 255        }
 256
 257        alen = ETH_ALEN * uf.count;
 258        addr = kmalloc(alen, GFP_KERNEL);
 259        if (!addr)
 260                return -ENOMEM;
 261
 262        if (copy_from_user(addr, arg + sizeof(uf), alen)) {
 263                err = -EFAULT;
 264                goto done;
 265        }
 266
 267        /* The filter is updated without holding any locks. Which is
 268         * perfectly safe. We disable it first and in the worst
 269         * case we'll accept a few undesired packets. */
 270        filter->count = 0;
 271        wmb();
 272
 273        /* Use first set of addresses as an exact filter */
 274        for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
 275                memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
 276
 277        nexact = n;
 278
 279        /* Remaining multicast addresses are hashed,
 280         * unicast will leave the filter disabled. */
 281        memset(filter->mask, 0, sizeof(filter->mask));
 282        for (; n < uf.count; n++) {
 283                if (!is_multicast_ether_addr(addr[n].u)) {
 284                        err = 0; /* no filter */
 285                        goto done;
 286                }
 287                addr_hash_set(filter->mask, addr[n].u);
 288        }
 289
 290        /* For ALLMULTI just set the mask to all ones.
 291         * This overrides the mask populated above. */
 292        if ((uf.flags & TUN_FLT_ALLMULTI))
 293                memset(filter->mask, ~0, sizeof(filter->mask));
 294
 295        /* Now enable the filter */
 296        wmb();
 297        filter->count = nexact;
 298
 299        /* Return the number of exact filters */
 300        err = nexact;
 301
 302done:
 303        kfree(addr);
 304        return err;
 305}
 306
 307/* Returns: 0 - drop, !=0 - accept */
 308static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
 309{
 310        /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
 311         * at this point. */
 312        struct ethhdr *eh = (struct ethhdr *) skb->data;
 313        int i;
 314
 315        /* Exact match */
 316        for (i = 0; i < filter->count; i++)
 317                if (!compare_ether_addr(eh->h_dest, filter->addr[i]))
 318                        return 1;
 319
 320        /* Inexact match (multicast only) */
 321        if (is_multicast_ether_addr(eh->h_dest))
 322                return addr_hash_test(filter->mask, eh->h_dest);
 323
 324        return 0;
 325}
 326
 327/*
 328 * Checks whether the packet is accepted or not.
 329 * Returns: 0 - drop, !=0 - accept
 330 */
 331static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
 332{
 333        if (!filter->count)
 334                return 1;
 335
 336        return run_filter(filter, skb);
 337}
 338
 339/* Network device part of the driver */
 340
 341static const struct ethtool_ops tun_ethtool_ops;
 342
 343/* Net device detach from fd. */
 344static void tun_net_uninit(struct net_device *dev)
 345{
 346        struct tun_struct *tun = netdev_priv(dev);
 347        struct tun_file *tfile = tun->tfile;
 348
 349        /* Inform the methods they need to stop using the dev.
 350         */
 351        if (tfile) {
 352                wake_up_all(&tun->wq.wait);
 353                if (atomic_dec_and_test(&tfile->count))
 354                        __tun_detach(tun);
 355        }
 356}
 357
 358static void tun_free_netdev(struct net_device *dev)
 359{
 360        struct tun_struct *tun = netdev_priv(dev);
 361
 362        sock_put(tun->socket.sk);
 363}
 364
 365/* Net device open. */
 366static int tun_net_open(struct net_device *dev)
 367{
 368        netif_start_queue(dev);
 369        return 0;
 370}
 371
 372/* Net device close. */
 373static int tun_net_close(struct net_device *dev)
 374{
 375        netif_stop_queue(dev);
 376        return 0;
 377}
 378
 379/* Net device start xmit */
 380static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 381{
 382        struct tun_struct *tun = netdev_priv(dev);
 383
 384        tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
 385
 386        /* Drop packet if interface is not attached */
 387        if (!tun->tfile)
 388                goto drop;
 389
 390        /* Drop if the filter does not like it.
 391         * This is a noop if the filter is disabled.
 392         * Filter can be enabled only for the TAP devices. */
 393        if (!check_filter(&tun->txflt, skb))
 394                goto drop;
 395
 396        if (tun->socket.sk->sk_filter &&
 397            sk_filter(tun->socket.sk, skb))
 398                goto drop;
 399
 400        if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) {
 401                if (!(tun->flags & TUN_ONE_QUEUE)) {
 402                        /* Normal queueing mode. */
 403                        /* Packet scheduler handles dropping of further packets. */
 404                        netif_stop_queue(dev);
 405
 406                        /* We won't see all dropped packets individually, so overrun
 407                         * error is more appropriate. */
 408                        dev->stats.tx_fifo_errors++;
 409                } else {
 410                        /* Single queue mode.
 411                         * Driver handles dropping of all packets itself. */
 412                        goto drop;
 413                }
 414        }
 415
 416        /* Orphan the skb - required as we might hang on to it
 417         * for indefinite time. */
 418        skb_orphan(skb);
 419
 420        /* Enqueue packet */
 421        skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb);
 422
 423        /* Notify and wake up reader process */
 424        if (tun->flags & TUN_FASYNC)
 425                kill_fasync(&tun->fasync, SIGIO, POLL_IN);
 426        wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
 427                                   POLLRDNORM | POLLRDBAND);
 428        return NETDEV_TX_OK;
 429
 430drop:
 431        dev->stats.tx_dropped++;
 432        kfree_skb(skb);
 433        return NETDEV_TX_OK;
 434}
 435
 436static void tun_net_mclist(struct net_device *dev)
 437{
 438        /*
 439         * This callback is supposed to deal with mc filter in
 440         * _rx_ path and has nothing to do with the _tx_ path.
 441         * In rx path we always accept everything userspace gives us.
 442         */
 443}
 444
 445#define MIN_MTU 68
 446#define MAX_MTU 65535
 447
 448static int
 449tun_net_change_mtu(struct net_device *dev, int new_mtu)
 450{
 451        if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU)
 452                return -EINVAL;
 453        dev->mtu = new_mtu;
 454        return 0;
 455}
 456
 457static u32 tun_net_fix_features(struct net_device *dev, u32 features)
 458{
 459        struct tun_struct *tun = netdev_priv(dev);
 460
 461        return (features & tun->set_features) | (features & ~TUN_USER_FEATURES);
 462}
 463#ifdef CONFIG_NET_POLL_CONTROLLER
 464static void tun_poll_controller(struct net_device *dev)
 465{
 466        /*
 467         * Tun only receives frames when:
 468         * 1) the char device endpoint gets data from user space
 469         * 2) the tun socket gets a sendmsg call from user space
 470         * Since both of those are syncronous operations, we are guaranteed
 471         * never to have pending data when we poll for it
 472         * so theres nothing to do here but return.
 473         * We need this though so netpoll recognizes us as an interface that
 474         * supports polling, which enables bridge devices in virt setups to
 475         * still use netconsole
 476         */
 477        return;
 478}
 479#endif
 480static const struct net_device_ops tun_netdev_ops = {
 481        .ndo_uninit             = tun_net_uninit,
 482        .ndo_open               = tun_net_open,
 483        .ndo_stop               = tun_net_close,
 484        .ndo_start_xmit         = tun_net_xmit,
 485        .ndo_change_mtu         = tun_net_change_mtu,
 486        .ndo_fix_features       = tun_net_fix_features,
 487#ifdef CONFIG_NET_POLL_CONTROLLER
 488        .ndo_poll_controller    = tun_poll_controller,
 489#endif
 490};
 491
 492static const struct net_device_ops tap_netdev_ops = {
 493        .ndo_uninit             = tun_net_uninit,
 494        .ndo_open               = tun_net_open,
 495        .ndo_stop               = tun_net_close,
 496        .ndo_start_xmit         = tun_net_xmit,
 497        .ndo_change_mtu         = tun_net_change_mtu,
 498        .ndo_fix_features       = tun_net_fix_features,
 499        .ndo_set_rx_mode        = tun_net_mclist,
 500        .ndo_set_mac_address    = eth_mac_addr,
 501        .ndo_validate_addr      = eth_validate_addr,
 502#ifdef CONFIG_NET_POLL_CONTROLLER
 503        .ndo_poll_controller    = tun_poll_controller,
 504#endif
 505};
 506
 507/* Initialize net device. */
 508static void tun_net_init(struct net_device *dev)
 509{
 510        struct tun_struct *tun = netdev_priv(dev);
 511
 512        switch (tun->flags & TUN_TYPE_MASK) {
 513        case TUN_TUN_DEV:
 514                dev->netdev_ops = &tun_netdev_ops;
 515
 516                /* Point-to-Point TUN Device */
 517                dev->hard_header_len = 0;
 518                dev->addr_len = 0;
 519                dev->mtu = 1500;
 520
 521                /* Zero header length */
 522                dev->type = ARPHRD_NONE;
 523                dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 524                dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 525                break;
 526
 527        case TUN_TAP_DEV:
 528                dev->netdev_ops = &tap_netdev_ops;
 529                /* Ethernet TAP Device */
 530                ether_setup(dev);
 531                dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 532
 533                random_ether_addr(dev->dev_addr);
 534
 535                dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 536                break;
 537        }
 538}
 539
 540/* Character device part */
 541
 542/* Poll */
 543static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 544{
 545        struct tun_file *tfile = file->private_data;
 546        struct tun_struct *tun = __tun_get(tfile);
 547        struct sock *sk;
 548        unsigned int mask = 0;
 549
 550        if (!tun)
 551                return POLLERR;
 552
 553        sk = tun->socket.sk;
 554
 555        tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
 556
 557        poll_wait(file, &tun->wq.wait, wait);
 558
 559        if (!skb_queue_empty(&sk->sk_receive_queue))
 560                mask |= POLLIN | POLLRDNORM;
 561
 562        if (sock_writeable(sk) ||
 563            (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
 564             sock_writeable(sk)))
 565                mask |= POLLOUT | POLLWRNORM;
 566
 567        if (tun->dev->reg_state != NETREG_REGISTERED)
 568                mask = POLLERR;
 569
 570        tun_put(tun);
 571        return mask;
 572}
 573
 574/* prepad is the amount to reserve at front.  len is length after that.
 575 * linear is a hint as to how much to copy (usually headers). */
 576static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
 577                                     size_t prepad, size_t len,
 578                                     size_t linear, int noblock)
 579{
 580        struct sock *sk = tun->socket.sk;
 581        struct sk_buff *skb;
 582        int err;
 583
 584        sock_update_classid(sk);
 585
 586        /* Under a page?  Don't bother with paged skb. */
 587        if (prepad + len < PAGE_SIZE || !linear)
 588                linear = len;
 589
 590        skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
 591                                   &err);
 592        if (!skb)
 593                return ERR_PTR(err);
 594
 595        skb_reserve(skb, prepad);
 596        skb_put(skb, linear);
 597        skb->data_len = len - linear;
 598        skb->len += len - linear;
 599
 600        return skb;
 601}
 602
 603/* Get packet from user space buffer */
 604static ssize_t tun_get_user(struct tun_struct *tun,
 605                            const struct iovec *iv, size_t count,
 606                            int noblock)
 607{
 608        struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
 609        struct sk_buff *skb;
 610        size_t len = count, align = NET_SKB_PAD;
 611        struct virtio_net_hdr gso = { 0 };
 612        int offset = 0;
 613
 614        if (!(tun->flags & TUN_NO_PI)) {
 615                if ((len -= sizeof(pi)) > count)
 616                        return -EINVAL;
 617
 618                if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
 619                        return -EFAULT;
 620                offset += sizeof(pi);
 621        }
 622
 623        if (tun->flags & TUN_VNET_HDR) {
 624                if ((len -= tun->vnet_hdr_sz) > count)
 625                        return -EINVAL;
 626
 627                if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
 628                        return -EFAULT;
 629
 630                if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
 631                    gso.csum_start + gso.csum_offset + 2 > gso.hdr_len)
 632                        gso.hdr_len = gso.csum_start + gso.csum_offset + 2;
 633
 634                if (gso.hdr_len > len)
 635                        return -EINVAL;
 636                offset += tun->vnet_hdr_sz;
 637        }
 638
 639        if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
 640                align += NET_IP_ALIGN;
 641                if (unlikely(len < ETH_HLEN ||
 642                             (gso.hdr_len && gso.hdr_len < ETH_HLEN)))
 643                        return -EINVAL;
 644        }
 645
 646        skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
 647        if (IS_ERR(skb)) {
 648                if (PTR_ERR(skb) != -EAGAIN)
 649                        tun->dev->stats.rx_dropped++;
 650                return PTR_ERR(skb);
 651        }
 652
 653        if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) {
 654                tun->dev->stats.rx_dropped++;
 655                kfree_skb(skb);
 656                return -EFAULT;
 657        }
 658
 659        if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 660                if (!skb_partial_csum_set(skb, gso.csum_start,
 661                                          gso.csum_offset)) {
 662                        tun->dev->stats.rx_frame_errors++;
 663                        kfree_skb(skb);
 664                        return -EINVAL;
 665                }
 666        }
 667
 668        switch (tun->flags & TUN_TYPE_MASK) {
 669        case TUN_TUN_DEV:
 670                if (tun->flags & TUN_NO_PI) {
 671                        switch (skb->data[0] & 0xf0) {
 672                        case 0x40:
 673                                pi.proto = htons(ETH_P_IP);
 674                                break;
 675                        case 0x60:
 676                                pi.proto = htons(ETH_P_IPV6);
 677                                break;
 678                        default:
 679                                tun->dev->stats.rx_dropped++;
 680                                kfree_skb(skb);
 681                                return -EINVAL;
 682                        }
 683                }
 684
 685                skb_reset_mac_header(skb);
 686                skb->protocol = pi.proto;
 687                skb->dev = tun->dev;
 688                break;
 689        case TUN_TAP_DEV:
 690                skb->protocol = eth_type_trans(skb, tun->dev);
 691                break;
 692        }
 693
 694        if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 695                pr_debug("GSO!\n");
 696                switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 697                case VIRTIO_NET_HDR_GSO_TCPV4:
 698                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 699                        break;
 700                case VIRTIO_NET_HDR_GSO_TCPV6:
 701                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 702                        break;
 703                case VIRTIO_NET_HDR_GSO_UDP:
 704                        skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 705                        break;
 706                default:
 707                        tun->dev->stats.rx_frame_errors++;
 708                        kfree_skb(skb);
 709                        return -EINVAL;
 710                }
 711
 712                if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
 713                        skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 714
 715                skb_shinfo(skb)->gso_size = gso.gso_size;
 716                if (skb_shinfo(skb)->gso_size == 0) {
 717                        tun->dev->stats.rx_frame_errors++;
 718                        kfree_skb(skb);
 719                        return -EINVAL;
 720                }
 721
 722                /* Header must be checked, and gso_segs computed. */
 723                skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 724                skb_shinfo(skb)->gso_segs = 0;
 725        }
 726
 727        netif_rx_ni(skb);
 728
 729        tun->dev->stats.rx_packets++;
 730        tun->dev->stats.rx_bytes += len;
 731
 732        return count;
 733}
 734
 735static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
 736                              unsigned long count, loff_t pos)
 737{
 738        struct file *file = iocb->ki_filp;
 739        struct tun_struct *tun = tun_get(file);
 740        ssize_t result;
 741
 742        if (!tun)
 743                return -EBADFD;
 744
 745        tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count);
 746
 747        result = tun_get_user(tun, iv, iov_length(iv, count),
 748                              file->f_flags & O_NONBLOCK);
 749
 750        tun_put(tun);
 751        return result;
 752}
 753
 754/* Put packet to the user space buffer */
 755static ssize_t tun_put_user(struct tun_struct *tun,
 756                            struct sk_buff *skb,
 757                            const struct iovec *iv, int len)
 758{
 759        struct tun_pi pi = { 0, skb->protocol };
 760        ssize_t total = 0;
 761
 762        if (!(tun->flags & TUN_NO_PI)) {
 763                if ((len -= sizeof(pi)) < 0)
 764                        return -EINVAL;
 765
 766                if (len < skb->len) {
 767                        /* Packet will be striped */
 768                        pi.flags |= TUN_PKT_STRIP;
 769                }
 770
 771                if (memcpy_toiovecend(iv, (void *) &pi, 0, sizeof(pi)))
 772                        return -EFAULT;
 773                total += sizeof(pi);
 774        }
 775
 776        if (tun->flags & TUN_VNET_HDR) {
 777                struct virtio_net_hdr gso = { 0 }; /* no info leak */
 778                if ((len -= tun->vnet_hdr_sz) < 0)
 779                        return -EINVAL;
 780
 781                if (skb_is_gso(skb)) {
 782                        struct skb_shared_info *sinfo = skb_shinfo(skb);
 783
 784                        /* This is a hint as to how much should be linear. */
 785                        gso.hdr_len = skb_headlen(skb);
 786                        gso.gso_size = sinfo->gso_size;
 787                        if (sinfo->gso_type & SKB_GSO_TCPV4)
 788                                gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 789                        else if (sinfo->gso_type & SKB_GSO_TCPV6)
 790                                gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
 791                        else if (sinfo->gso_type & SKB_GSO_UDP)
 792                                gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
 793                        else {
 794                                pr_err("unexpected GSO type: "
 795                                       "0x%x, gso_size %d, hdr_len %d\n",
 796                                       sinfo->gso_type, gso.gso_size,
 797                                       gso.hdr_len);
 798                                print_hex_dump(KERN_ERR, "tun: ",
 799                                               DUMP_PREFIX_NONE,
 800                                               16, 1, skb->head,
 801                                               min((int)gso.hdr_len, 64), true);
 802                                WARN_ON_ONCE(1);
 803                                return -EINVAL;
 804                        }
 805                        if (sinfo->gso_type & SKB_GSO_TCP_ECN)
 806                                gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 807                } else
 808                        gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
 809
 810                if (skb->ip_summed == CHECKSUM_PARTIAL) {
 811                        gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 812                        gso.csum_start = skb_checksum_start_offset(skb);
 813                        gso.csum_offset = skb->csum_offset;
 814                } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 815                        gso.flags = VIRTIO_NET_HDR_F_DATA_VALID;
 816                } /* else everything is zero */
 817
 818                if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total,
 819                                               sizeof(gso))))
 820                        return -EFAULT;
 821                total += tun->vnet_hdr_sz;
 822        }
 823
 824        len = min_t(int, skb->len, len);
 825
 826        skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
 827        total += skb->len;
 828
 829        tun->dev->stats.tx_packets++;
 830        tun->dev->stats.tx_bytes += len;
 831
 832        return total;
 833}
 834
 835static ssize_t tun_do_read(struct tun_struct *tun,
 836                           struct kiocb *iocb, const struct iovec *iv,
 837                           ssize_t len, int noblock)
 838{
 839        DECLARE_WAITQUEUE(wait, current);
 840        struct sk_buff *skb;
 841        ssize_t ret = 0;
 842
 843        tun_debug(KERN_INFO, tun, "tun_chr_read\n");
 844
 845        if (unlikely(!noblock))
 846                add_wait_queue(&tun->wq.wait, &wait);
 847        while (len) {
 848                current->state = TASK_INTERRUPTIBLE;
 849
 850                /* Read frames from the queue */
 851                if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
 852                        if (noblock) {
 853                                ret = -EAGAIN;
 854                                break;
 855                        }
 856                        if (signal_pending(current)) {
 857                                ret = -ERESTARTSYS;
 858                                break;
 859                        }
 860                        if (tun->dev->reg_state != NETREG_REGISTERED) {
 861                                ret = -EIO;
 862                                break;
 863                        }
 864
 865                        /* Nothing to read, let's sleep */
 866                        schedule();
 867                        continue;
 868                }
 869                netif_wake_queue(tun->dev);
 870
 871                ret = tun_put_user(tun, skb, iv, len);
 872                kfree_skb(skb);
 873                break;
 874        }
 875
 876        current->state = TASK_RUNNING;
 877        if (unlikely(!noblock))
 878                remove_wait_queue(&tun->wq.wait, &wait);
 879
 880        return ret;
 881}
 882
 883static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 884                            unsigned long count, loff_t pos)
 885{
 886        struct file *file = iocb->ki_filp;
 887        struct tun_file *tfile = file->private_data;
 888        struct tun_struct *tun = __tun_get(tfile);
 889        ssize_t len, ret;
 890
 891        if (!tun)
 892                return -EBADFD;
 893        len = iov_length(iv, count);
 894        if (len < 0) {
 895                ret = -EINVAL;
 896                goto out;
 897        }
 898
 899        ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
 900        ret = min_t(ssize_t, ret, len);
 901out:
 902        tun_put(tun);
 903        return ret;
 904}
 905
 906static void tun_setup(struct net_device *dev)
 907{
 908        struct tun_struct *tun = netdev_priv(dev);
 909
 910        tun->owner = -1;
 911        tun->group = -1;
 912
 913        dev->ethtool_ops = &tun_ethtool_ops;
 914        dev->destructor = tun_free_netdev;
 915}
 916
 917/* Trivial set of netlink ops to allow deleting tun or tap
 918 * device with netlink.
 919 */
 920static int tun_validate(struct nlattr *tb[], struct nlattr *data[])
 921{
 922        return -EINVAL;
 923}
 924
 925static struct rtnl_link_ops tun_link_ops __read_mostly = {
 926        .kind           = DRV_NAME,
 927        .priv_size      = sizeof(struct tun_struct),
 928        .setup          = tun_setup,
 929        .validate       = tun_validate,
 930};
 931
 932static void tun_sock_write_space(struct sock *sk)
 933{
 934        struct tun_struct *tun;
 935        wait_queue_head_t *wqueue;
 936
 937        if (!sock_writeable(sk))
 938                return;
 939
 940        if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 941                return;
 942
 943        wqueue = sk_sleep(sk);
 944        if (wqueue && waitqueue_active(wqueue))
 945                wake_up_interruptible_sync_poll(wqueue, POLLOUT |
 946                                                POLLWRNORM | POLLWRBAND);
 947
 948        tun = tun_sk(sk)->tun;
 949        kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
 950}
 951
 952static void tun_sock_destruct(struct sock *sk)
 953{
 954        free_netdev(tun_sk(sk)->tun->dev);
 955}
 956
 957static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
 958                       struct msghdr *m, size_t total_len)
 959{
 960        struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
 961        return tun_get_user(tun, m->msg_iov, total_len,
 962                            m->msg_flags & MSG_DONTWAIT);
 963}
 964
 965static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
 966                       struct msghdr *m, size_t total_len,
 967                       int flags)
 968{
 969        struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
 970        int ret;
 971        if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
 972                return -EINVAL;
 973        ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
 974                          flags & MSG_DONTWAIT);
 975        if (ret > total_len) {
 976                m->msg_flags |= MSG_TRUNC;
 977                ret = flags & MSG_TRUNC ? ret : total_len;
 978        }
 979        return ret;
 980}
 981
 982/* Ops structure to mimic raw sockets with tun */
 983static const struct proto_ops tun_socket_ops = {
 984        .sendmsg = tun_sendmsg,
 985        .recvmsg = tun_recvmsg,
 986};
 987
 988static struct proto tun_proto = {
 989        .name           = "tun",
 990        .owner          = THIS_MODULE,
 991        .obj_size       = sizeof(struct tun_sock),
 992};
 993
 994static int tun_flags(struct tun_struct *tun)
 995{
 996        int flags = 0;
 997
 998        if (tun->flags & TUN_TUN_DEV)
 999                flags |= IFF_TUN;
1000        else
1001                flags |= IFF_TAP;
1002
1003        if (tun->flags & TUN_NO_PI)
1004                flags |= IFF_NO_PI;
1005
1006        if (tun->flags & TUN_ONE_QUEUE)
1007                flags |= IFF_ONE_QUEUE;
1008
1009        if (tun->flags & TUN_VNET_HDR)
1010                flags |= IFF_VNET_HDR;
1011
1012        return flags;
1013}
1014
1015static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr,
1016                              char *buf)
1017{
1018        struct tun_struct *tun = netdev_priv(to_net_dev(dev));
1019        return sprintf(buf, "0x%x\n", tun_flags(tun));
1020}
1021
1022static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr,
1023                              char *buf)
1024{
1025        struct tun_struct *tun = netdev_priv(to_net_dev(dev));
1026        return sprintf(buf, "%d\n", tun->owner);
1027}
1028
1029static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr,
1030                              char *buf)
1031{
1032        struct tun_struct *tun = netdev_priv(to_net_dev(dev));
1033        return sprintf(buf, "%d\n", tun->group);
1034}
1035
1036static DEVICE_ATTR(tun_flags, 0444, tun_show_flags, NULL);
1037static DEVICE_ATTR(owner, 0444, tun_show_owner, NULL);
1038static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
1039
1040static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1041{
1042        struct sock *sk;
1043        struct tun_struct *tun;
1044        struct net_device *dev;
1045        int err;
1046
1047        dev = __dev_get_by_name(net, ifr->ifr_name);
1048        if (dev) {
1049                const struct cred *cred = current_cred();
1050
1051                if (ifr->ifr_flags & IFF_TUN_EXCL)
1052                        return -EBUSY;
1053                if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
1054                        tun = netdev_priv(dev);
1055                else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
1056                        tun = netdev_priv(dev);
1057                else
1058                        return -EINVAL;
1059
1060                if (((tun->owner != -1 && cred->euid != tun->owner) ||
1061                     (tun->group != -1 && !in_egroup_p(tun->group))) &&
1062                    !capable(CAP_NET_ADMIN))
1063                        return -EPERM;
1064                err = security_tun_dev_attach(tun->socket.sk);
1065                if (err < 0)
1066                        return err;
1067
1068                err = tun_attach(tun, file);
1069                if (err < 0)
1070                        return err;
1071        }
1072        else {
1073                char *name;
1074                unsigned long flags = 0;
1075
1076                if (!capable(CAP_NET_ADMIN))
1077                        return -EPERM;
1078                err = security_tun_dev_create();
1079                if (err < 0)
1080                        return err;
1081
1082                /* Set dev type */
1083                if (ifr->ifr_flags & IFF_TUN) {
1084                        /* TUN device */
1085                        flags |= TUN_TUN_DEV;
1086                        name = "tun%d";
1087                } else if (ifr->ifr_flags & IFF_TAP) {
1088                        /* TAP device */
1089                        flags |= TUN_TAP_DEV;
1090                        name = "tap%d";
1091                } else
1092                        return -EINVAL;
1093
1094                if (*ifr->ifr_name)
1095                        name = ifr->ifr_name;
1096
1097                dev = alloc_netdev(sizeof(struct tun_struct), name,
1098                                   tun_setup);
1099                if (!dev)
1100                        return -ENOMEM;
1101
1102                dev_net_set(dev, net);
1103                dev->rtnl_link_ops = &tun_link_ops;
1104
1105                tun = netdev_priv(dev);
1106                tun->dev = dev;
1107                tun->flags = flags;
1108                tun->txflt.count = 0;
1109                tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
1110
1111                err = -ENOMEM;
1112                sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
1113                if (!sk)
1114                        goto err_free_dev;
1115
1116                tun->socket.wq = &tun->wq;
1117                init_waitqueue_head(&tun->wq.wait);
1118                tun->socket.ops = &tun_socket_ops;
1119                sock_init_data(&tun->socket, sk);
1120                sk->sk_write_space = tun_sock_write_space;
1121                sk->sk_sndbuf = INT_MAX;
1122
1123                tun_sk(sk)->tun = tun;
1124
1125                security_tun_dev_post_create(sk);
1126
1127                tun_net_init(dev);
1128
1129                dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
1130                        TUN_USER_FEATURES;
1131                dev->features = dev->hw_features;
1132
1133                err = register_netdevice(tun->dev);
1134                if (err < 0)
1135                        goto err_free_sk;
1136
1137                if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
1138                    device_create_file(&tun->dev->dev, &dev_attr_owner) ||
1139                    device_create_file(&tun->dev->dev, &dev_attr_group))
1140                        pr_err("Failed to create tun sysfs files\n");
1141
1142                sk->sk_destruct = tun_sock_destruct;
1143
1144                err = tun_attach(tun, file);
1145                if (err < 0)
1146                        goto failed;
1147        }
1148
1149        tun_debug(KERN_INFO, tun, "tun_set_iff\n");
1150
1151        if (ifr->ifr_flags & IFF_NO_PI)
1152                tun->flags |= TUN_NO_PI;
1153        else
1154                tun->flags &= ~TUN_NO_PI;
1155
1156        if (ifr->ifr_flags & IFF_ONE_QUEUE)
1157                tun->flags |= TUN_ONE_QUEUE;
1158        else
1159                tun->flags &= ~TUN_ONE_QUEUE;
1160
1161        if (ifr->ifr_flags & IFF_VNET_HDR)
1162                tun->flags |= TUN_VNET_HDR;
1163        else
1164                tun->flags &= ~TUN_VNET_HDR;
1165
1166        /* Make sure persistent devices do not get stuck in
1167         * xoff state.
1168         */
1169        if (netif_running(tun->dev))
1170                netif_wake_queue(tun->dev);
1171
1172        strcpy(ifr->ifr_name, tun->dev->name);
1173        return 0;
1174
1175 err_free_sk:
1176        sock_put(sk);
1177 err_free_dev:
1178        free_netdev(dev);
1179 failed:
1180        return err;
1181}
1182
1183static int tun_get_iff(struct net *net, struct tun_struct *tun,
1184                       struct ifreq *ifr)
1185{
1186        tun_debug(KERN_INFO, tun, "tun_get_iff\n");
1187
1188        strcpy(ifr->ifr_name, tun->dev->name);
1189
1190        ifr->ifr_flags = tun_flags(tun);
1191
1192        return 0;
1193}
1194
1195/* This is like a cut-down ethtool ops, except done via tun fd so no
1196 * privs required. */
1197static int set_offload(struct tun_struct *tun, unsigned long arg)
1198{
1199        u32 features = 0;
1200
1201        if (arg & TUN_F_CSUM) {
1202                features |= NETIF_F_HW_CSUM;
1203                arg &= ~TUN_F_CSUM;
1204
1205                if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
1206                        if (arg & TUN_F_TSO_ECN) {
1207                                features |= NETIF_F_TSO_ECN;
1208                                arg &= ~TUN_F_TSO_ECN;
1209                        }
1210                        if (arg & TUN_F_TSO4)
1211                                features |= NETIF_F_TSO;
1212                        if (arg & TUN_F_TSO6)
1213                                features |= NETIF_F_TSO6;
1214                        arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
1215                }
1216
1217                if (arg & TUN_F_UFO) {
1218                        features |= NETIF_F_UFO;
1219                        arg &= ~TUN_F_UFO;
1220                }
1221        }
1222
1223        /* This gives the user a way to test for new features in future by
1224         * trying to set them. */
1225        if (arg)
1226                return -EINVAL;
1227
1228        tun->set_features = features;
1229        netdev_update_features(tun->dev);
1230
1231        return 0;
1232}
1233
1234static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1235                            unsigned long arg, int ifreq_len)
1236{
1237        struct tun_file *tfile = file->private_data;
1238        struct tun_struct *tun;
1239        void __user* argp = (void __user*)arg;
1240        struct sock_fprog fprog;
1241        struct ifreq ifr;
1242        int sndbuf;
1243        int vnet_hdr_sz;
1244        int ret;
1245
1246        if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
1247                if (copy_from_user(&ifr, argp, ifreq_len))
1248                        return -EFAULT;
1249
1250        if (cmd == TUNGETFEATURES) {
1251                /* Currently this just means: "what IFF flags are valid?".
1252                 * This is needed because we never checked for invalid flags on
1253                 * TUNSETIFF. */
1254                return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
1255                                IFF_VNET_HDR,
1256                                (unsigned int __user*)argp);
1257        }
1258
1259        rtnl_lock();
1260
1261        tun = __tun_get(tfile);
1262        if (cmd == TUNSETIFF && !tun) {
1263                ifr.ifr_name[IFNAMSIZ-1] = '\0';
1264
1265                ret = tun_set_iff(tfile->net, file, &ifr);
1266
1267                if (ret)
1268                        goto unlock;
1269
1270                if (copy_to_user(argp, &ifr, ifreq_len))
1271                        ret = -EFAULT;
1272                goto unlock;
1273        }
1274
1275        ret = -EBADFD;
1276        if (!tun)
1277                goto unlock;
1278
1279        tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %d\n", cmd);
1280
1281        ret = 0;
1282        switch (cmd) {
1283        case TUNGETIFF:
1284                ret = tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
1285                if (ret)
1286                        break;
1287
1288                if (copy_to_user(argp, &ifr, ifreq_len))
1289                        ret = -EFAULT;
1290                break;
1291
1292        case TUNSETNOCSUM:
1293                /* Disable/Enable checksum */
1294
1295                /* [unimplemented] */
1296                tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n",
1297                          arg ? "disabled" : "enabled");
1298                break;
1299
1300        case TUNSETPERSIST:
1301                /* Disable/Enable persist mode */
1302                if (arg)
1303                        tun->flags |= TUN_PERSIST;
1304                else
1305                        tun->flags &= ~TUN_PERSIST;
1306
1307                tun_debug(KERN_INFO, tun, "persist %s\n",
1308                          arg ? "enabled" : "disabled");
1309                break;
1310
1311        case TUNSETOWNER:
1312                /* Set owner of the device */
1313                tun->owner = (uid_t) arg;
1314
1315                tun_debug(KERN_INFO, tun, "owner set to %d\n", tun->owner);
1316                break;
1317
1318        case TUNSETGROUP:
1319                /* Set group of the device */
1320                tun->group= (gid_t) arg;
1321
1322                tun_debug(KERN_INFO, tun, "group set to %d\n", tun->group);
1323                break;
1324
1325        case TUNSETLINK:
1326                /* Only allow setting the type when the interface is down */
1327                if (tun->dev->flags & IFF_UP) {
1328                        tun_debug(KERN_INFO, tun,
1329                                  "Linktype set failed because interface is up\n");
1330                        ret = -EBUSY;
1331                } else {
1332                        tun->dev->type = (int) arg;
1333                        tun_debug(KERN_INFO, tun, "linktype set to %d\n",
1334                                  tun->dev->type);
1335                        ret = 0;
1336                }
1337                break;
1338
1339#ifdef TUN_DEBUG
1340        case TUNSETDEBUG:
1341                tun->debug = arg;
1342                break;
1343#endif
1344        case TUNSETOFFLOAD:
1345                ret = set_offload(tun, arg);
1346                break;
1347
1348        case TUNSETTXFILTER:
1349                /* Can be set only for TAPs */
1350                ret = -EINVAL;
1351                if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1352                        break;
1353                ret = update_filter(&tun->txflt, (void __user *)arg);
1354                break;
1355
1356        case SIOCGIFHWADDR:
1357                /* Get hw address */
1358                memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
1359                ifr.ifr_hwaddr.sa_family = tun->dev->type;
1360                if (copy_to_user(argp, &ifr, ifreq_len))
1361                        ret = -EFAULT;
1362                break;
1363
1364        case SIOCSIFHWADDR:
1365                /* Set hw address */
1366                tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n",
1367                          ifr.ifr_hwaddr.sa_data);
1368
1369                ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
1370                break;
1371
1372        case TUNGETSNDBUF:
1373                sndbuf = tun->socket.sk->sk_sndbuf;
1374                if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
1375                        ret = -EFAULT;
1376                break;
1377
1378        case TUNSETSNDBUF:
1379                if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
1380                        ret = -EFAULT;
1381                        break;
1382                }
1383
1384                tun->socket.sk->sk_sndbuf = sndbuf;
1385                break;
1386
1387        case TUNGETVNETHDRSZ:
1388                vnet_hdr_sz = tun->vnet_hdr_sz;
1389                if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
1390                        ret = -EFAULT;
1391                break;
1392
1393        case TUNSETVNETHDRSZ:
1394                if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
1395                        ret = -EFAULT;
1396                        break;
1397                }
1398                if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
1399                        ret = -EINVAL;
1400                        break;
1401                }
1402
1403                tun->vnet_hdr_sz = vnet_hdr_sz;
1404                break;
1405
1406        case TUNATTACHFILTER:
1407                /* Can be set only for TAPs */
1408                ret = -EINVAL;
1409                if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1410                        break;
1411                ret = -EFAULT;
1412                if (copy_from_user(&fprog, argp, sizeof(fprog)))
1413                        break;
1414
1415                ret = sk_attach_filter(&fprog, tun->socket.sk);
1416                break;
1417
1418        case TUNDETACHFILTER:
1419                /* Can be set only for TAPs */
1420                ret = -EINVAL;
1421                if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1422                        break;
1423                ret = sk_detach_filter(tun->socket.sk);
1424                break;
1425
1426        default:
1427                ret = -EINVAL;
1428                break;
1429        }
1430
1431unlock:
1432        rtnl_unlock();
1433        if (tun)
1434                tun_put(tun);
1435        return ret;
1436}
1437
1438static long tun_chr_ioctl(struct file *file,
1439                          unsigned int cmd, unsigned long arg)
1440{
1441        return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq));
1442}
1443
1444#ifdef CONFIG_COMPAT
1445static long tun_chr_compat_ioctl(struct file *file,
1446                         unsigned int cmd, unsigned long arg)
1447{
1448        switch (cmd) {
1449        case TUNSETIFF:
1450        case TUNGETIFF:
1451        case TUNSETTXFILTER:
1452        case TUNGETSNDBUF:
1453        case TUNSETSNDBUF:
1454        case SIOCGIFHWADDR:
1455        case SIOCSIFHWADDR:
1456                arg = (unsigned long)compat_ptr(arg);
1457                break;
1458        default:
1459                arg = (compat_ulong_t)arg;
1460                break;
1461        }
1462
1463        /*
1464         * compat_ifreq is shorter than ifreq, so we must not access beyond
1465         * the end of that structure. All fields that are used in this
1466         * driver are compatible though, we don't need to convert the
1467         * contents.
1468         */
1469        return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq));
1470}
1471#endif /* CONFIG_COMPAT */
1472
1473static int tun_chr_fasync(int fd, struct file *file, int on)
1474{
1475        struct tun_struct *tun = tun_get(file);
1476        int ret;
1477
1478        if (!tun)
1479                return -EBADFD;
1480
1481        tun_debug(KERN_INFO, tun, "tun_chr_fasync %d\n", on);
1482
1483        if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
1484                goto out;
1485
1486        if (on) {
1487                ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
1488                if (ret)
1489                        goto out;
1490                tun->flags |= TUN_FASYNC;
1491        } else
1492                tun->flags &= ~TUN_FASYNC;
1493        ret = 0;
1494out:
1495        tun_put(tun);
1496        return ret;
1497}
1498
1499static int tun_chr_open(struct inode *inode, struct file * file)
1500{
1501        struct tun_file *tfile;
1502
1503        DBG1(KERN_INFO, "tunX: tun_chr_open\n");
1504
1505        tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
1506        if (!tfile)
1507                return -ENOMEM;
1508        atomic_set(&tfile->count, 0);
1509        tfile->tun = NULL;
1510        tfile->net = get_net(current->nsproxy->net_ns);
1511        file->private_data = tfile;
1512        return 0;
1513}
1514
1515static int tun_chr_close(struct inode *inode, struct file *file)
1516{
1517        struct tun_file *tfile = file->private_data;
1518        struct tun_struct *tun;
1519
1520        tun = __tun_get(tfile);
1521        if (tun) {
1522                struct net_device *dev = tun->dev;
1523
1524                tun_debug(KERN_INFO, tun, "tun_chr_close\n");
1525
1526                __tun_detach(tun);
1527
1528                /* If desirable, unregister the netdevice. */
1529                if (!(tun->flags & TUN_PERSIST)) {
1530                        rtnl_lock();
1531                        if (dev->reg_state == NETREG_REGISTERED)
1532                                unregister_netdevice(dev);
1533                        rtnl_unlock();
1534                }
1535        }
1536
1537        tun = tfile->tun;
1538        if (tun)
1539                sock_put(tun->socket.sk);
1540
1541        put_net(tfile->net);
1542        kfree(tfile);
1543
1544        return 0;
1545}
1546
1547static const struct file_operations tun_fops = {
1548        .owner  = THIS_MODULE,
1549        .llseek = no_llseek,
1550        .read  = do_sync_read,
1551        .aio_read  = tun_chr_aio_read,
1552        .write = do_sync_write,
1553        .aio_write = tun_chr_aio_write,
1554        .poll   = tun_chr_poll,
1555        .unlocked_ioctl = tun_chr_ioctl,
1556#ifdef CONFIG_COMPAT
1557        .compat_ioctl = tun_chr_compat_ioctl,
1558#endif
1559        .open   = tun_chr_open,
1560        .release = tun_chr_close,
1561        .fasync = tun_chr_fasync
1562};
1563
1564static struct miscdevice tun_miscdev = {
1565        .minor = TUN_MINOR,
1566        .name = "tun",
1567        .nodename = "net/tun",
1568        .fops = &tun_fops,
1569};
1570
1571/* ethtool interface */
1572
1573static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1574{
1575        cmd->supported          = 0;
1576        cmd->advertising        = 0;
1577        ethtool_cmd_speed_set(cmd, SPEED_10);
1578        cmd->duplex             = DUPLEX_FULL;
1579        cmd->port               = PORT_TP;
1580        cmd->phy_address        = 0;
1581        cmd->transceiver        = XCVR_INTERNAL;
1582        cmd->autoneg            = AUTONEG_DISABLE;
1583        cmd->maxtxpkt           = 0;
1584        cmd->maxrxpkt           = 0;
1585        return 0;
1586}
1587
1588static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1589{
1590        struct tun_struct *tun = netdev_priv(dev);
1591
1592        strcpy(info->driver, DRV_NAME);
1593        strcpy(info->version, DRV_VERSION);
1594        strcpy(info->fw_version, "N/A");
1595
1596        switch (tun->flags & TUN_TYPE_MASK) {
1597        case TUN_TUN_DEV:
1598                strcpy(info->bus_info, "tun");
1599                break;
1600        case TUN_TAP_DEV:
1601                strcpy(info->bus_info, "tap");
1602                break;
1603        }
1604}
1605
1606static u32 tun_get_msglevel(struct net_device *dev)
1607{
1608#ifdef TUN_DEBUG
1609        struct tun_struct *tun = netdev_priv(dev);
1610        return tun->debug;
1611#else
1612        return -EOPNOTSUPP;
1613#endif
1614}
1615
1616static void tun_set_msglevel(struct net_device *dev, u32 value)
1617{
1618#ifdef TUN_DEBUG
1619        struct tun_struct *tun = netdev_priv(dev);
1620        tun->debug = value;
1621#endif
1622}
1623
1624static const struct ethtool_ops tun_ethtool_ops = {
1625        .get_settings   = tun_get_settings,
1626        .get_drvinfo    = tun_get_drvinfo,
1627        .get_msglevel   = tun_get_msglevel,
1628        .set_msglevel   = tun_set_msglevel,
1629        .get_link       = ethtool_op_get_link,
1630};
1631
1632
1633static int __init tun_init(void)
1634{
1635        int ret = 0;
1636
1637        pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
1638        pr_info("%s\n", DRV_COPYRIGHT);
1639
1640        ret = rtnl_link_register(&tun_link_ops);
1641        if (ret) {
1642                pr_err("Can't register link_ops\n");
1643                goto err_linkops;
1644        }
1645
1646        ret = misc_register(&tun_miscdev);
1647        if (ret) {
1648                pr_err("Can't register misc device %d\n", TUN_MINOR);
1649                goto err_misc;
1650        }
1651        return  0;
1652err_misc:
1653        rtnl_link_unregister(&tun_link_ops);
1654err_linkops:
1655        return ret;
1656}
1657
1658static void tun_cleanup(void)
1659{
1660        misc_deregister(&tun_miscdev);
1661        rtnl_link_unregister(&tun_link_ops);
1662}
1663
1664/* Get an underlying socket object from tun file.  Returns error unless file is
1665 * attached to a device.  The returned object works like a packet socket, it
1666 * can be used for sock_sendmsg/sock_recvmsg.  The caller is responsible for
1667 * holding a reference to the file for as long as the socket is in use. */
1668struct socket *tun_get_socket(struct file *file)
1669{
1670        struct tun_struct *tun;
1671        if (file->f_op != &tun_fops)
1672                return ERR_PTR(-EINVAL);
1673        tun = tun_get(file);
1674        if (!tun)
1675                return ERR_PTR(-EBADFD);
1676        tun_put(tun);
1677        return &tun->socket;
1678}
1679EXPORT_SYMBOL_GPL(tun_get_socket);
1680
1681module_init(tun_init);
1682module_exit(tun_cleanup);
1683MODULE_DESCRIPTION(DRV_DESCRIPTION);
1684MODULE_AUTHOR(DRV_COPYRIGHT);
1685MODULE_LICENSE("GPL");
1686MODULE_ALIAS_MISCDEV(TUN_MINOR);
1687MODULE_ALIAS("devname:net/tun");
1688
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.