linux/drivers/net/tun.c
<<
>>
Prefs
   1/*
   2 *  TUN - Universal TUN/TAP device driver.
   3 *  Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
   4 *
   5 *  This program is free software; you can redistribute it and/or modify
   6 *  it under the terms of the GNU General Public License as published by
   7 *  the Free Software Foundation; either version 2 of the License, or
   8 *  (at your option) any later version.
   9 *
  10 *  This program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
  16 */
  17
  18/*
  19 *  Changes:
  20 *
  21 *  Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
  22 *    Add TUNSETLINK ioctl to set the link encapsulation
  23 *
  24 *  Mark Smith <markzzzsmith@yahoo.com.au>
  25 *    Use random_ether_addr() for tap MAC address.
  26 *
  27 *  Harald Roelle <harald.roelle@ifi.lmu.de>  2004/04/20
  28 *    Fixes in packet dropping, queue length setting and queue wakeup.
  29 *    Increased default tx queue length.
  30 *    Added ethtool API.
  31 *    Minor cleanups
  32 *
  33 *  Daniel Podlejski <underley@underley.eu.org>
  34 *    Modifications for 2.3.99-pre5 kernel.
  35 */
  36
  37#define DRV_NAME        "tun"
  38#define DRV_VERSION     "1.6"
  39#define DRV_DESCRIPTION "Universal TUN/TAP device driver"
  40#define DRV_COPYRIGHT   "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
  41
  42#include <linux/module.h>
  43#include <linux/errno.h>
  44#include <linux/kernel.h>
  45#include <linux/major.h>
  46#include <linux/slab.h>
  47#include <linux/smp_lock.h>
  48#include <linux/poll.h>
  49#include <linux/fcntl.h>
  50#include <linux/init.h>
  51#include <linux/skbuff.h>
  52#include <linux/netdevice.h>
  53#include <linux/etherdevice.h>
  54#include <linux/miscdevice.h>
  55#include <linux/ethtool.h>
  56#include <linux/rtnetlink.h>
  57#include <linux/if.h>
  58#include <linux/if_arp.h>
  59#include <linux/if_ether.h>
  60#include <linux/if_tun.h>
  61#include <linux/crc32.h>
  62#include <linux/nsproxy.h>
  63#include <linux/virtio_net.h>
  64#include <net/net_namespace.h>
  65#include <net/netns/generic.h>
  66
  67#include <asm/system.h>
  68#include <asm/uaccess.h>
  69
  70/* Uncomment to enable debugging */
  71/* #define TUN_DEBUG 1 */
  72
  73#ifdef TUN_DEBUG
  74static int debug;
  75
  76#define DBG  if(tun->debug)printk
  77#define DBG1 if(debug==2)printk
  78#else
  79#define DBG( a... )
  80#define DBG1( a... )
  81#endif
  82
  83#define FLT_EXACT_COUNT 8
  84struct tap_filter {
  85        unsigned int    count;    /* Number of addrs. Zero means disabled */
  86        u32             mask[2];  /* Mask of the hashed addrs */
  87        unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
  88};
  89
  90struct tun_struct {
  91        struct list_head        list;
  92        unsigned int            flags;
  93        int                     attached;
  94        uid_t                   owner;
  95        gid_t                   group;
  96
  97        wait_queue_head_t       read_wait;
  98        struct sk_buff_head     readq;
  99
 100        struct net_device       *dev;
 101        struct fasync_struct    *fasync;
 102
 103        struct tap_filter       txflt;
 104
 105#ifdef TUN_DEBUG
 106        int debug;
 107#endif
 108};
 109
 110/* TAP filterting */
 111static void addr_hash_set(u32 *mask, const u8 *addr)
 112{
 113        int n = ether_crc(ETH_ALEN, addr) >> 26;
 114        mask[n >> 5] |= (1 << (n & 31));
 115}
 116
 117static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
 118{
 119        int n = ether_crc(ETH_ALEN, addr) >> 26;
 120        return mask[n >> 5] & (1 << (n & 31));
 121}
 122
 123static int update_filter(struct tap_filter *filter, void __user *arg)
 124{
 125        struct { u8 u[ETH_ALEN]; } *addr;
 126        struct tun_filter uf;
 127        int err, alen, n, nexact;
 128
 129        if (copy_from_user(&uf, arg, sizeof(uf)))
 130                return -EFAULT;
 131
 132        if (!uf.count) {
 133                /* Disabled */
 134                filter->count = 0;
 135                return 0;
 136        }
 137
 138        alen = ETH_ALEN * uf.count;
 139        addr = kmalloc(alen, GFP_KERNEL);
 140        if (!addr)
 141                return -ENOMEM;
 142
 143        if (copy_from_user(addr, arg + sizeof(uf), alen)) {
 144                err = -EFAULT;
 145                goto done;
 146        }
 147
 148        /* The filter is updated without holding any locks. Which is
 149         * perfectly safe. We disable it first and in the worst
 150         * case we'll accept a few undesired packets. */
 151        filter->count = 0;
 152        wmb();
 153
 154        /* Use first set of addresses as an exact filter */
 155        for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
 156                memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
 157
 158        nexact = n;
 159
 160        /* Remaining multicast addresses are hashed,
 161         * unicast will leave the filter disabled. */
 162        memset(filter->mask, 0, sizeof(filter->mask));
 163        for (; n < uf.count; n++) {
 164                if (!is_multicast_ether_addr(addr[n].u)) {
 165                        err = 0; /* no filter */
 166                        goto done;
 167                }
 168                addr_hash_set(filter->mask, addr[n].u);
 169        }
 170
 171        /* For ALLMULTI just set the mask to all ones.
 172         * This overrides the mask populated above. */
 173        if ((uf.flags & TUN_FLT_ALLMULTI))
 174                memset(filter->mask, ~0, sizeof(filter->mask));
 175
 176        /* Now enable the filter */
 177        wmb();
 178        filter->count = nexact;
 179
 180        /* Return the number of exact filters */
 181        err = nexact;
 182
 183done:
 184        kfree(addr);
 185        return err;
 186}
 187
 188/* Returns: 0 - drop, !=0 - accept */
 189static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
 190{
 191        /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
 192         * at this point. */
 193        struct ethhdr *eh = (struct ethhdr *) skb->data;
 194        int i;
 195
 196        /* Exact match */
 197        for (i = 0; i < filter->count; i++)
 198                if (!compare_ether_addr(eh->h_dest, filter->addr[i]))
 199                        return 1;
 200
 201        /* Inexact match (multicast only) */
 202        if (is_multicast_ether_addr(eh->h_dest))
 203                return addr_hash_test(filter->mask, eh->h_dest);
 204
 205        return 0;
 206}
 207
 208/*
 209 * Checks whether the packet is accepted or not.
 210 * Returns: 0 - drop, !=0 - accept
 211 */
 212static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
 213{
 214        if (!filter->count)
 215                return 1;
 216
 217        return run_filter(filter, skb);
 218}
 219
 220/* Network device part of the driver */
 221
 222static int tun_net_id;
 223struct tun_net {
 224        struct list_head dev_list;
 225};
 226
 227static const struct ethtool_ops tun_ethtool_ops;
 228
 229/* Net device open. */
 230static int tun_net_open(struct net_device *dev)
 231{
 232        netif_start_queue(dev);
 233        return 0;
 234}
 235
 236/* Net device close. */
 237static int tun_net_close(struct net_device *dev)
 238{
 239        netif_stop_queue(dev);
 240        return 0;
 241}
 242
 243/* Net device start xmit */
 244static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 245{
 246        struct tun_struct *tun = netdev_priv(dev);
 247
 248        DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
 249
 250        /* Drop packet if interface is not attached */
 251        if (!tun->attached)
 252                goto drop;
 253
 254        /* Drop if the filter does not like it.
 255         * This is a noop if the filter is disabled.
 256         * Filter can be enabled only for the TAP devices. */
 257        if (!check_filter(&tun->txflt, skb))
 258                goto drop;
 259
 260        if (skb_queue_len(&tun->readq) >= dev->tx_queue_len) {
 261                if (!(tun->flags & TUN_ONE_QUEUE)) {
 262                        /* Normal queueing mode. */
 263                        /* Packet scheduler handles dropping of further packets. */
 264                        netif_stop_queue(dev);
 265
 266                        /* We won't see all dropped packets individually, so overrun
 267                         * error is more appropriate. */
 268                        dev->stats.tx_fifo_errors++;
 269                } else {
 270                        /* Single queue mode.
 271                         * Driver handles dropping of all packets itself. */
 272                        goto drop;
 273                }
 274        }
 275
 276        /* Enqueue packet */
 277        skb_queue_tail(&tun->readq, skb);
 278        dev->trans_start = jiffies;
 279
 280        /* Notify and wake up reader process */
 281        if (tun->flags & TUN_FASYNC)
 282                kill_fasync(&tun->fasync, SIGIO, POLL_IN);
 283        wake_up_interruptible(&tun->read_wait);
 284        return 0;
 285
 286drop:
 287        dev->stats.tx_dropped++;
 288        kfree_skb(skb);
 289        return 0;
 290}
 291
 292static void tun_net_mclist(struct net_device *dev)
 293{
 294        /*
 295         * This callback is supposed to deal with mc filter in
 296         * _rx_ path and has nothing to do with the _tx_ path.
 297         * In rx path we always accept everything userspace gives us.
 298         */
 299        return;
 300}
 301
 302#define MIN_MTU 68
 303#define MAX_MTU 65535
 304
 305static int
 306tun_net_change_mtu(struct net_device *dev, int new_mtu)
 307{
 308        if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU)
 309                return -EINVAL;
 310        dev->mtu = new_mtu;
 311        return 0;
 312}
 313
 314static const struct net_device_ops tun_netdev_ops = {
 315        .ndo_open               = tun_net_open,
 316        .ndo_stop               = tun_net_close,
 317        .ndo_start_xmit         = tun_net_xmit,
 318        .ndo_change_mtu         = tun_net_change_mtu,
 319};
 320
 321static const struct net_device_ops tap_netdev_ops = {
 322        .ndo_open               = tun_net_open,
 323        .ndo_stop               = tun_net_close,
 324        .ndo_start_xmit         = tun_net_xmit,
 325        .ndo_change_mtu         = tun_net_change_mtu,
 326        .ndo_set_multicast_list = tun_net_mclist,
 327        .ndo_set_mac_address    = eth_mac_addr,
 328        .ndo_validate_addr      = eth_validate_addr,
 329};
 330
 331/* Initialize net device. */
 332static void tun_net_init(struct net_device *dev)
 333{
 334        struct tun_struct *tun = netdev_priv(dev);
 335
 336        switch (tun->flags & TUN_TYPE_MASK) {
 337        case TUN_TUN_DEV:
 338                dev->netdev_ops = &tun_netdev_ops;
 339
 340                /* Point-to-Point TUN Device */
 341                dev->hard_header_len = 0;
 342                dev->addr_len = 0;
 343                dev->mtu = 1500;
 344
 345                /* Zero header length */
 346                dev->type = ARPHRD_NONE;
 347                dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 348                dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 349                break;
 350
 351        case TUN_TAP_DEV:
 352                dev->netdev_ops = &tap_netdev_ops;
 353                /* Ethernet TAP Device */
 354                ether_setup(dev);
 355
 356                random_ether_addr(dev->dev_addr);
 357
 358                dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 359                break;
 360        }
 361}
 362
 363/* Character device part */
 364
 365/* Poll */
 366static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 367{
 368        struct tun_struct *tun = file->private_data;
 369        unsigned int mask = POLLOUT | POLLWRNORM;
 370
 371        if (!tun)
 372                return -EBADFD;
 373
 374        DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
 375
 376        poll_wait(file, &tun->read_wait, wait);
 377
 378        if (!skb_queue_empty(&tun->readq))
 379                mask |= POLLIN | POLLRDNORM;
 380
 381        return mask;
 382}
 383
 384/* prepad is the amount to reserve at front.  len is length after that.
 385 * linear is a hint as to how much to copy (usually headers). */
 386static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear,
 387                                     gfp_t gfp)
 388{
 389        struct sk_buff *skb;
 390        unsigned int i;
 391
 392        skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN);
 393        if (skb) {
 394                skb_reserve(skb, prepad);
 395                skb_put(skb, len);
 396                return skb;
 397        }
 398
 399        /* Under a page?  Don't bother with paged skb. */
 400        if (prepad + len < PAGE_SIZE)
 401                return NULL;
 402
 403        /* Start with a normal skb, and add pages. */
 404        skb = alloc_skb(prepad + linear, gfp);
 405        if (!skb)
 406                return NULL;
 407
 408        skb_reserve(skb, prepad);
 409        skb_put(skb, linear);
 410
 411        len -= linear;
 412
 413        for (i = 0; i < MAX_SKB_FRAGS; i++) {
 414                skb_frag_t *f = &skb_shinfo(skb)->frags[i];
 415
 416                f->page = alloc_page(gfp|__GFP_ZERO);
 417                if (!f->page)
 418                        break;
 419
 420                f->page_offset = 0;
 421                f->size = PAGE_SIZE;
 422
 423                skb->data_len += PAGE_SIZE;
 424                skb->len += PAGE_SIZE;
 425                skb->truesize += PAGE_SIZE;
 426                skb_shinfo(skb)->nr_frags++;
 427
 428                if (len < PAGE_SIZE) {
 429                        len = 0;
 430                        break;
 431                }
 432                len -= PAGE_SIZE;
 433        }
 434
 435        /* Too large, or alloc fail? */
 436        if (unlikely(len)) {
 437                kfree_skb(skb);
 438                skb = NULL;
 439        }
 440
 441        return skb;
 442}
 443
 444/* Get packet from user space buffer */
 445static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
 446{
 447        struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
 448        struct sk_buff *skb;
 449        size_t len = count, align = 0;
 450        struct virtio_net_hdr gso = { 0 };
 451
 452        if (!(tun->flags & TUN_NO_PI)) {
 453                if ((len -= sizeof(pi)) > count)
 454                        return -EINVAL;
 455
 456                if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
 457                        return -EFAULT;
 458        }
 459
 460        if (tun->flags & TUN_VNET_HDR) {
 461                if ((len -= sizeof(gso)) > count)
 462                        return -EINVAL;
 463
 464                if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
 465                        return -EFAULT;
 466
 467                if (gso.hdr_len > len)
 468                        return -EINVAL;
 469        }
 470
 471        if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
 472                align = NET_IP_ALIGN;
 473                if (unlikely(len < ETH_HLEN))
 474                        return -EINVAL;
 475        }
 476
 477        if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) {
 478                tun->dev->stats.rx_dropped++;
 479                return -ENOMEM;
 480        }
 481
 482        if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
 483                tun->dev->stats.rx_dropped++;
 484                kfree_skb(skb);
 485                return -EFAULT;
 486        }
 487
 488        if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 489                if (!skb_partial_csum_set(skb, gso.csum_start,
 490                                          gso.csum_offset)) {
 491                        tun->dev->stats.rx_frame_errors++;
 492                        kfree_skb(skb);
 493                        return -EINVAL;
 494                }
 495        } else if (tun->flags & TUN_NOCHECKSUM)
 496                skb->ip_summed = CHECKSUM_UNNECESSARY;
 497
 498        switch (tun->flags & TUN_TYPE_MASK) {
 499        case TUN_TUN_DEV:
 500                if (tun->flags & TUN_NO_PI) {
 501                        switch (skb->data[0] & 0xf0) {
 502                        case 0x40:
 503                                pi.proto = htons(ETH_P_IP);
 504                                break;
 505                        case 0x60:
 506                                pi.proto = htons(ETH_P_IPV6);
 507                                break;
 508                        default:
 509                                tun->dev->stats.rx_dropped++;
 510                                kfree_skb(skb);
 511                                return -EINVAL;
 512                        }
 513                }
 514
 515                skb_reset_mac_header(skb);
 516                skb->protocol = pi.proto;
 517                skb->dev = tun->dev;
 518                break;
 519        case TUN_TAP_DEV:
 520                skb->protocol = eth_type_trans(skb, tun->dev);
 521                break;
 522        };
 523
 524        if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 525                pr_debug("GSO!\n");
 526                switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 527                case VIRTIO_NET_HDR_GSO_TCPV4:
 528                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 529                        break;
 530                case VIRTIO_NET_HDR_GSO_TCPV6:
 531                        skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 532                        break;
 533                default:
 534                        tun->dev->stats.rx_frame_errors++;
 535                        kfree_skb(skb);
 536                        return -EINVAL;
 537                }
 538
 539                if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
 540                        skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 541
 542                skb_shinfo(skb)->gso_size = gso.gso_size;
 543                if (skb_shinfo(skb)->gso_size == 0) {
 544                        tun->dev->stats.rx_frame_errors++;
 545                        kfree_skb(skb);
 546                        return -EINVAL;
 547                }
 548
 549                /* Header must be checked, and gso_segs computed. */
 550                skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 551                skb_shinfo(skb)->gso_segs = 0;
 552        }
 553
 554        netif_rx_ni(skb);
 555
 556        tun->dev->stats.rx_packets++;
 557        tun->dev->stats.rx_bytes += len;
 558
 559        return count;
 560}
 561
 562static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
 563                              unsigned long count, loff_t pos)
 564{
 565        struct tun_struct *tun = iocb->ki_filp->private_data;
 566
 567        if (!tun)
 568                return -EBADFD;
 569
 570        DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
 571
 572        return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
 573}
 574
 575/* Put packet to the user space buffer */
 576static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 577                                       struct sk_buff *skb,
 578                                       struct iovec *iv, int len)
 579{
 580        struct tun_pi pi = { 0, skb->protocol };
 581        ssize_t total = 0;
 582
 583        if (!(tun->flags & TUN_NO_PI)) {
 584                if ((len -= sizeof(pi)) < 0)
 585                        return -EINVAL;
 586
 587                if (len < skb->len) {
 588                        /* Packet will be striped */
 589                        pi.flags |= TUN_PKT_STRIP;
 590                }
 591
 592                if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))
 593                        return -EFAULT;
 594                total += sizeof(pi);
 595        }
 596
 597        if (tun->flags & TUN_VNET_HDR) {
 598                struct virtio_net_hdr gso = { 0 }; /* no info leak */
 599                if ((len -= sizeof(gso)) < 0)
 600                        return -EINVAL;
 601
 602                if (skb_is_gso(skb)) {
 603                        struct skb_shared_info *sinfo = skb_shinfo(skb);
 604
 605                        /* This is a hint as to how much should be linear. */
 606                        gso.hdr_len = skb_headlen(skb);
 607                        gso.gso_size = sinfo->gso_size;
 608                        if (sinfo->gso_type & SKB_GSO_TCPV4)
 609                                gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 610                        else if (sinfo->gso_type & SKB_GSO_TCPV6)
 611                                gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
 612                        else
 613                                BUG();
 614                        if (sinfo->gso_type & SKB_GSO_TCP_ECN)
 615                                gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
 616                } else
 617                        gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
 618
 619                if (skb->ip_summed == CHECKSUM_PARTIAL) {
 620                        gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 621                        gso.csum_start = skb->csum_start - skb_headroom(skb);
 622                        gso.csum_offset = skb->csum_offset;
 623                } /* else everything is zero */
 624
 625                if (unlikely(memcpy_toiovec(iv, (void *)&gso, sizeof(gso))))
 626                        return -EFAULT;
 627                total += sizeof(gso);
 628        }
 629
 630        len = min_t(int, skb->len, len);
 631
 632        skb_copy_datagram_iovec(skb, 0, iv, len);
 633        total += len;
 634
 635        tun->dev->stats.tx_packets++;
 636        tun->dev->stats.tx_bytes += len;
 637
 638        return total;
 639}
 640
 641static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 642                            unsigned long count, loff_t pos)
 643{
 644        struct file *file = iocb->ki_filp;
 645        struct tun_struct *tun = file->private_data;
 646        DECLARE_WAITQUEUE(wait, current);
 647        struct sk_buff *skb;
 648        ssize_t len, ret = 0;
 649
 650        if (!tun)
 651                return -EBADFD;
 652
 653        DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
 654
 655        len = iov_length(iv, count);
 656        if (len < 0)
 657                return -EINVAL;
 658
 659        add_wait_queue(&tun->read_wait, &wait);
 660        while (len) {
 661                current->state = TASK_INTERRUPTIBLE;
 662
 663                /* Read frames from the queue */
 664                if (!(skb=skb_dequeue(&tun->readq))) {
 665                        if (file->f_flags & O_NONBLOCK) {
 666                                ret = -EAGAIN;
 667                                break;
 668                        }
 669                        if (signal_pending(current)) {
 670                                ret = -ERESTARTSYS;
 671                                break;
 672                        }
 673
 674                        /* Nothing to read, let's sleep */
 675                        schedule();
 676                        continue;
 677                }
 678                netif_wake_queue(tun->dev);
 679
 680                ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
 681                kfree_skb(skb);
 682                break;
 683        }
 684
 685        current->state = TASK_RUNNING;
 686        remove_wait_queue(&tun->read_wait, &wait);
 687
 688        return ret;
 689}
 690
 691static void tun_setup(struct net_device *dev)
 692{
 693        struct tun_struct *tun = netdev_priv(dev);
 694
 695        skb_queue_head_init(&tun->readq);
 696        init_waitqueue_head(&tun->read_wait);
 697
 698        tun->owner = -1;
 699        tun->group = -1;
 700
 701        dev->ethtool_ops = &tun_ethtool_ops;
 702        dev->destructor = free_netdev;
 703        dev->features |= NETIF_F_NETNS_LOCAL;
 704}
 705
 706static struct tun_struct *tun_get_by_name(struct tun_net *tn, const char *name)
 707{
 708        struct tun_struct *tun;
 709
 710        ASSERT_RTNL();
 711        list_for_each_entry(tun, &tn->dev_list, list) {
 712                if (!strncmp(tun->dev->name, name, IFNAMSIZ))
 713                    return tun;
 714        }
 715
 716        return NULL;
 717}
 718
 719static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 720{
 721        struct tun_net *tn;
 722        struct tun_struct *tun;
 723        struct net_device *dev;
 724        const struct cred *cred = current_cred();
 725        int err;
 726
 727        tn = net_generic(net, tun_net_id);
 728        tun = tun_get_by_name(tn, ifr->ifr_name);
 729        if (tun) {
 730                if (tun->attached)
 731                        return -EBUSY;
 732
 733                /* Check permissions */
 734                if (((tun->owner != -1 &&
 735                      cred->euid != tun->owner) ||
 736                     (tun->group != -1 &&
 737                      cred->egid != tun->group)) &&
 738                    !capable(CAP_NET_ADMIN)) {
 739                        return -EPERM;
 740                }
 741        }
 742        else if (__dev_get_by_name(net, ifr->ifr_name))
 743                return -EINVAL;
 744        else {
 745                char *name;
 746                unsigned long flags = 0;
 747
 748                err = -EINVAL;
 749
 750                if (!capable(CAP_NET_ADMIN))
 751                        return -EPERM;
 752
 753                /* Set dev type */
 754                if (ifr->ifr_flags & IFF_TUN) {
 755                        /* TUN device */
 756                        flags |= TUN_TUN_DEV;
 757                        name = "tun%d";
 758                } else if (ifr->ifr_flags & IFF_TAP) {
 759                        /* TAP device */
 760                        flags |= TUN_TAP_DEV;
 761                        name = "tap%d";
 762                } else
 763                        goto failed;
 764
 765                if (*ifr->ifr_name)
 766                        name = ifr->ifr_name;
 767
 768                dev = alloc_netdev(sizeof(struct tun_struct), name,
 769                                   tun_setup);
 770                if (!dev)
 771                        return -ENOMEM;
 772
 773                dev_net_set(dev, net);
 774
 775                tun = netdev_priv(dev);
 776                tun->dev = dev;
 777                tun->flags = flags;
 778                tun->txflt.count = 0;
 779
 780                tun_net_init(dev);
 781
 782                if (strchr(dev->name, '%')) {
 783                        err = dev_alloc_name(dev, dev->name);
 784                        if (err < 0)
 785                                goto err_free_dev;
 786                }
 787
 788                err = register_netdevice(tun->dev);
 789                if (err < 0)
 790                        goto err_free_dev;
 791
 792                list_add(&tun->list, &tn->dev_list);
 793        }
 794
 795        DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
 796
 797        if (ifr->ifr_flags & IFF_NO_PI)
 798                tun->flags |= TUN_NO_PI;
 799        else
 800                tun->flags &= ~TUN_NO_PI;
 801
 802        if (ifr->ifr_flags & IFF_ONE_QUEUE)
 803                tun->flags |= TUN_ONE_QUEUE;
 804        else
 805                tun->flags &= ~TUN_ONE_QUEUE;
 806
 807        if (ifr->ifr_flags & IFF_VNET_HDR)
 808                tun->flags |= TUN_VNET_HDR;
 809        else
 810                tun->flags &= ~TUN_VNET_HDR;
 811
 812        file->private_data = tun;
 813        tun->attached = 1;
 814        get_net(dev_net(tun->dev));
 815
 816        /* Make sure persistent devices do not get stuck in
 817         * xoff state.
 818         */
 819        if (netif_running(tun->dev))
 820                netif_wake_queue(tun->dev);
 821
 822        strcpy(ifr->ifr_name, tun->dev->name);
 823        return 0;
 824
 825 err_free_dev:
 826        free_netdev(dev);
 827 failed:
 828        return err;
 829}
 830
 831static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
 832{
 833        struct tun_struct *tun = file->private_data;
 834
 835        if (!tun)
 836                return -EBADFD;
 837
 838        DBG(KERN_INFO "%s: tun_get_iff\n", tun->dev->name);
 839
 840        strcpy(ifr->ifr_name, tun->dev->name);
 841
 842        ifr->ifr_flags = 0;
 843
 844        if (ifr->ifr_flags & TUN_TUN_DEV)
 845                ifr->ifr_flags |= IFF_TUN;
 846        else
 847                ifr->ifr_flags |= IFF_TAP;
 848
 849        if (tun->flags & TUN_NO_PI)
 850                ifr->ifr_flags |= IFF_NO_PI;
 851
 852        if (tun->flags & TUN_ONE_QUEUE)
 853                ifr->ifr_flags |= IFF_ONE_QUEUE;
 854
 855        if (tun->flags & TUN_VNET_HDR)
 856                ifr->ifr_flags |= IFF_VNET_HDR;
 857
 858        return 0;
 859}
 860
 861/* This is like a cut-down ethtool ops, except done via tun fd so no
 862 * privs required. */
 863static int set_offload(struct net_device *dev, unsigned long arg)
 864{
 865        unsigned int old_features, features;
 866
 867        old_features = dev->features;
 868        /* Unset features, set them as we chew on the arg. */
 869        features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST
 870                                    |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6));
 871
 872        if (arg & TUN_F_CSUM) {
 873                features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
 874                arg &= ~TUN_F_CSUM;
 875
 876                if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
 877                        if (arg & TUN_F_TSO_ECN) {
 878                                features |= NETIF_F_TSO_ECN;
 879                                arg &= ~TUN_F_TSO_ECN;
 880                        }
 881                        if (arg & TUN_F_TSO4)
 882                                features |= NETIF_F_TSO;
 883                        if (arg & TUN_F_TSO6)
 884                                features |= NETIF_F_TSO6;
 885                        arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
 886                }
 887        }
 888
 889        /* This gives the user a way to test for new features in future by
 890         * trying to set them. */
 891        if (arg)
 892                return -EINVAL;
 893
 894        dev->features = features;
 895        if (old_features != dev->features)
 896                netdev_features_change(dev);
 897
 898        return 0;
 899}
 900
 901static int tun_chr_ioctl(struct inode *inode, struct file *file,
 902                         unsigned int cmd, unsigned long arg)
 903{
 904        struct tun_struct *tun = file->private_data;
 905        void __user* argp = (void __user*)arg;
 906        struct ifreq ifr;
 907        int ret;
 908
 909        if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
 910                if (copy_from_user(&ifr, argp, sizeof ifr))
 911                        return -EFAULT;
 912
 913        if (cmd == TUNSETIFF && !tun) {
 914                int err;
 915
 916                ifr.ifr_name[IFNAMSIZ-1] = '\0';
 917
 918                rtnl_lock();
 919                err = tun_set_iff(current->nsproxy->net_ns, file, &ifr);
 920                rtnl_unlock();
 921
 922                if (err)
 923                        return err;
 924
 925                if (copy_to_user(argp, &ifr, sizeof(ifr)))
 926                        return -EFAULT;
 927                return 0;
 928        }
 929
 930        if (cmd == TUNGETFEATURES) {
 931                /* Currently this just means: "what IFF flags are valid?".
 932                 * This is needed because we never checked for invalid flags on
 933                 * TUNSETIFF. */
 934                return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
 935                                IFF_VNET_HDR,
 936                                (unsigned int __user*)argp);
 937        }
 938
 939        if (!tun)
 940                return -EBADFD;
 941
 942        DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
 943
 944        switch (cmd) {
 945        case TUNGETIFF:
 946                ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr);
 947                if (ret)
 948                        return ret;
 949
 950                if (copy_to_user(argp, &ifr, sizeof(ifr)))
 951                        return -EFAULT;
 952                break;
 953
 954        case TUNSETNOCSUM:
 955                /* Disable/Enable checksum */
 956                if (arg)
 957                        tun->flags |= TUN_NOCHECKSUM;
 958                else
 959                        tun->flags &= ~TUN_NOCHECKSUM;
 960
 961                DBG(KERN_INFO "%s: checksum %s\n",
 962                    tun->dev->name, arg ? "disabled" : "enabled");
 963                break;
 964
 965        case TUNSETPERSIST:
 966                /* Disable/Enable persist mode */
 967                if (arg)
 968                        tun->flags |= TUN_PERSIST;
 969                else
 970                        tun->flags &= ~TUN_PERSIST;
 971
 972                DBG(KERN_INFO "%s: persist %s\n",
 973                    tun->dev->name, arg ? "enabled" : "disabled");
 974                break;
 975
 976        case TUNSETOWNER:
 977                /* Set owner of the device */
 978                tun->owner = (uid_t) arg;
 979
 980                DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner);
 981                break;
 982
 983        case TUNSETGROUP:
 984                /* Set group of the device */
 985                tun->group= (gid_t) arg;
 986
 987                DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
 988                break;
 989
 990        case TUNSETLINK:
 991                /* Only allow setting the type when the interface is down */
 992                rtnl_lock();
 993                if (tun->dev->flags & IFF_UP) {
 994                        DBG(KERN_INFO "%s: Linktype set failed because interface is up\n",
 995                                tun->dev->name);
 996                        ret = -EBUSY;
 997                } else {
 998                        tun->dev->type = (int) arg;
 999                        DBG(KERN_INFO "%s: linktype set to %d\n", tun->dev->name, tun->dev->type);
1000                        ret = 0;
1001                }
1002                rtnl_unlock();
1003                return ret;
1004
1005#ifdef TUN_DEBUG
1006        case TUNSETDEBUG:
1007                tun->debug = arg;
1008                break;
1009#endif
1010        case TUNSETOFFLOAD:
1011                rtnl_lock();
1012                ret = set_offload(tun->dev, arg);
1013                rtnl_unlock();
1014                return ret;
1015
1016        case TUNSETTXFILTER:
1017                /* Can be set only for TAPs */
1018                if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1019                        return -EINVAL;
1020                rtnl_lock();
1021                ret = update_filter(&tun->txflt, (void __user *)arg);
1022                rtnl_unlock();
1023                return ret;
1024
1025        case SIOCGIFHWADDR:
1026                /* Get hw addres */
1027                memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
1028                ifr.ifr_hwaddr.sa_family = tun->dev->type;
1029                if (copy_to_user(argp, &ifr, sizeof ifr))
1030                        return -EFAULT;
1031                return 0;
1032
1033        case SIOCSIFHWADDR:
1034                /* Set hw address */
1035                DBG(KERN_DEBUG "%s: set hw address: %pM\n",
1036                        tun->dev->name, ifr.ifr_hwaddr.sa_data);
1037
1038                rtnl_lock();
1039                ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
1040                rtnl_unlock();
1041                return ret;
1042
1043        default:
1044                return -EINVAL;
1045        };
1046
1047        return 0;
1048}
1049
1050static int tun_chr_fasync(int fd, struct file *file, int on)
1051{
1052        struct tun_struct *tun = file->private_data;
1053        int ret;
1054
1055        if (!tun)
1056                return -EBADFD;
1057
1058        DBG(KERN_INFO "%s: tun_chr_fasync %d\n", tun->dev->name, on);
1059
1060        lock_kernel();
1061        if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
1062                goto out;
1063
1064        if (on) {
1065                ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
1066                if (ret)
1067                        goto out;
1068                tun->flags |= TUN_FASYNC;
1069        } else
1070                tun->flags &= ~TUN_FASYNC;
1071        ret = 0;
1072out:
1073        unlock_kernel();
1074        return ret;
1075}
1076
1077static int tun_chr_open(struct inode *inode, struct file * file)
1078{
1079        cycle_kernel_lock();
1080        DBG1(KERN_INFO "tunX: tun_chr_open\n");
1081        file->private_data = NULL;
1082        return 0;
1083}
1084
1085static int tun_chr_close(struct inode *inode, struct file *file)
1086{
1087        struct tun_struct *tun = file->private_data;
1088
1089        if (!tun)
1090                return 0;
1091
1092        DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
1093
1094        rtnl_lock();
1095
1096        /* Detach from net device */
1097        file->private_data = NULL;
1098        tun->attached = 0;
1099        put_net(dev_net(tun->dev));
1100
1101        /* Drop read queue */
1102        skb_queue_purge(&tun->readq);
1103
1104        if (!(tun->flags & TUN_PERSIST)) {
1105                list_del(&tun->list);
1106                unregister_netdevice(tun->dev);
1107        }
1108
1109        rtnl_unlock();
1110
1111        return 0;
1112}
1113
1114static const struct file_operations tun_fops = {
1115        .owner  = THIS_MODULE,
1116        .llseek = no_llseek,
1117        .read  = do_sync_read,
1118        .aio_read  = tun_chr_aio_read,
1119        .write = do_sync_write,
1120        .aio_write = tun_chr_aio_write,
1121        .poll   = tun_chr_poll,
1122        .ioctl  = tun_chr_ioctl,
1123        .open   = tun_chr_open,
1124        .release = tun_chr_close,
1125        .fasync = tun_chr_fasync
1126};
1127
1128static struct miscdevice tun_miscdev = {
1129        .minor = TUN_MINOR,
1130        .name = "tun",
1131        .fops = &tun_fops,
1132};
1133
1134/* ethtool interface */
1135
1136static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1137{
1138        cmd->supported          = 0;
1139        cmd->advertising        = 0;
1140        cmd->speed              = SPEED_10;
1141        cmd->duplex             = DUPLEX_FULL;
1142        cmd->port               = PORT_TP;
1143        cmd->phy_address        = 0;
1144        cmd->transceiver        = XCVR_INTERNAL;
1145        cmd->autoneg            = AUTONEG_DISABLE;
1146        cmd->maxtxpkt           = 0;
1147        cmd->maxrxpkt           = 0;
1148        return 0;
1149}
1150
1151static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1152{
1153        struct tun_struct *tun = netdev_priv(dev);
1154
1155        strcpy(info->driver, DRV_NAME);
1156        strcpy(info->version, DRV_VERSION);
1157        strcpy(info->fw_version, "N/A");
1158
1159        switch (tun->flags & TUN_TYPE_MASK) {
1160        case TUN_TUN_DEV:
1161                strcpy(info->bus_info, "tun");
1162                break;
1163        case TUN_TAP_DEV:
1164                strcpy(info->bus_info, "tap");
1165                break;
1166        }
1167}
1168
1169static u32 tun_get_msglevel(struct net_device *dev)
1170{
1171#ifdef TUN_DEBUG
1172        struct tun_struct *tun = netdev_priv(dev);
1173        return tun->debug;
1174#else
1175        return -EOPNOTSUPP;
1176#endif
1177}
1178
1179static void tun_set_msglevel(struct net_device *dev, u32 value)
1180{
1181#ifdef TUN_DEBUG
1182        struct tun_struct *tun = netdev_priv(dev);
1183        tun->debug = value;
1184#endif
1185}
1186
1187static u32 tun_get_link(struct net_device *dev)
1188{
1189        struct tun_struct *tun = netdev_priv(dev);
1190        return tun->attached;
1191}
1192
1193static u32 tun_get_rx_csum(struct net_device *dev)
1194{
1195        struct tun_struct *tun = netdev_priv(dev);
1196        return (tun->flags & TUN_NOCHECKSUM) == 0;
1197}
1198
1199static int tun_set_rx_csum(struct net_device *dev, u32 data)
1200{
1201        struct tun_struct *tun = netdev_priv(dev);
1202        if (data)
1203                tun->flags &= ~TUN_NOCHECKSUM;
1204        else
1205                tun->flags |= TUN_NOCHECKSUM;
1206        return 0;
1207}
1208
1209static const struct ethtool_ops tun_ethtool_ops = {
1210        .get_settings   = tun_get_settings,
1211        .get_drvinfo    = tun_get_drvinfo,
1212        .get_msglevel   = tun_get_msglevel,
1213        .set_msglevel   = tun_set_msglevel,
1214        .get_link       = tun_get_link,
1215        .get_rx_csum    = tun_get_rx_csum,
1216        .set_rx_csum    = tun_set_rx_csum
1217};
1218
1219static int tun_init_net(struct net *net)
1220{
1221        struct tun_net *tn;
1222
1223        tn = kmalloc(sizeof(*tn), GFP_KERNEL);
1224        if (tn == NULL)
1225                return -ENOMEM;
1226
1227        INIT_LIST_HEAD(&tn->dev_list);
1228
1229        if (net_assign_generic(net, tun_net_id, tn)) {
1230                kfree(tn);
1231                return -ENOMEM;
1232        }
1233
1234        return 0;
1235}
1236
1237static void tun_exit_net(struct net *net)
1238{
1239        struct tun_net *tn;
1240        struct tun_struct *tun, *nxt;
1241
1242        tn = net_generic(net, tun_net_id);
1243
1244        rtnl_lock();
1245        list_for_each_entry_safe(tun, nxt, &tn->dev_list, list) {
1246                DBG(KERN_INFO "%s cleaned up\n", tun->dev->name);
1247                unregister_netdevice(tun->dev);
1248        }
1249        rtnl_unlock();
1250
1251        kfree(tn);
1252}
1253
1254static struct pernet_operations tun_net_ops = {
1255        .init = tun_init_net,
1256        .exit = tun_exit_net,
1257};
1258
1259static int __init tun_init(void)
1260{
1261        int ret = 0;
1262
1263        printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
1264        printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
1265
1266        ret = register_pernet_gen_device(&tun_net_id, &tun_net_ops);
1267        if (ret) {
1268                printk(KERN_ERR "tun: Can't register pernet ops\n");
1269                goto err_pernet;
1270        }
1271
1272        ret = misc_register(&tun_miscdev);
1273        if (ret) {
1274                printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR);
1275                goto err_misc;
1276        }
1277        return 0;
1278
1279err_misc:
1280        unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
1281err_pernet:
1282        return ret;
1283}
1284
1285static void tun_cleanup(void)
1286{
1287        misc_deregister(&tun_miscdev);
1288        unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
1289}
1290
1291module_init(tun_init);
1292module_exit(tun_cleanup);
1293MODULE_DESCRIPTION(DRV_DESCRIPTION);
1294MODULE_AUTHOR(DRV_COPYRIGHT);
1295MODULE_LICENSE("GPL");
1296MODULE_ALIAS_MISCDEV(TUN_MINOR);
1297
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.