linux/fs/btrfs/ioctl.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/kernel.h>
  20#include <linux/bio.h>
  21#include <linux/buffer_head.h>
  22#include <linux/file.h>
  23#include <linux/fs.h>
  24#include <linux/fsnotify.h>
  25#include <linux/pagemap.h>
  26#include <linux/highmem.h>
  27#include <linux/time.h>
  28#include <linux/init.h>
  29#include <linux/string.h>
  30#include <linux/backing-dev.h>
  31#include <linux/mount.h>
  32#include <linux/mpage.h>
  33#include <linux/namei.h>
  34#include <linux/swap.h>
  35#include <linux/writeback.h>
  36#include <linux/statfs.h>
  37#include <linux/compat.h>
  38#include <linux/bit_spinlock.h>
  39#include <linux/security.h>
  40#include <linux/xattr.h>
  41#include <linux/vmalloc.h>
  42#include <linux/slab.h>
  43#include <linux/blkdev.h>
  44#include <linux/uuid.h>
  45#include "compat.h"
  46#include "ctree.h"
  47#include "disk-io.h"
  48#include "transaction.h"
  49#include "btrfs_inode.h"
  50#include "ioctl.h"
  51#include "print-tree.h"
  52#include "volumes.h"
  53#include "locking.h"
  54#include "inode-map.h"
  55#include "backref.h"
  56#include "rcu-string.h"
  57#include "send.h"
  58
  59/* Mask out flags that are inappropriate for the given type of inode. */
  60static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
  61{
  62        if (S_ISDIR(mode))
  63                return flags;
  64        else if (S_ISREG(mode))
  65                return flags & ~FS_DIRSYNC_FL;
  66        else
  67                return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
  68}
  69
  70/*
  71 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
  72 */
  73static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
  74{
  75        unsigned int iflags = 0;
  76
  77        if (flags & BTRFS_INODE_SYNC)
  78                iflags |= FS_SYNC_FL;
  79        if (flags & BTRFS_INODE_IMMUTABLE)
  80                iflags |= FS_IMMUTABLE_FL;
  81        if (flags & BTRFS_INODE_APPEND)
  82                iflags |= FS_APPEND_FL;
  83        if (flags & BTRFS_INODE_NODUMP)
  84                iflags |= FS_NODUMP_FL;
  85        if (flags & BTRFS_INODE_NOATIME)
  86                iflags |= FS_NOATIME_FL;
  87        if (flags & BTRFS_INODE_DIRSYNC)
  88                iflags |= FS_DIRSYNC_FL;
  89        if (flags & BTRFS_INODE_NODATACOW)
  90                iflags |= FS_NOCOW_FL;
  91
  92        if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
  93                iflags |= FS_COMPR_FL;
  94        else if (flags & BTRFS_INODE_NOCOMPRESS)
  95                iflags |= FS_NOCOMP_FL;
  96
  97        return iflags;
  98}
  99
 100/*
 101 * Update inode->i_flags based on the btrfs internal flags.
 102 */
 103void btrfs_update_iflags(struct inode *inode)
 104{
 105        struct btrfs_inode *ip = BTRFS_I(inode);
 106
 107        inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
 108
 109        if (ip->flags & BTRFS_INODE_SYNC)
 110                inode->i_flags |= S_SYNC;
 111        if (ip->flags & BTRFS_INODE_IMMUTABLE)
 112                inode->i_flags |= S_IMMUTABLE;
 113        if (ip->flags & BTRFS_INODE_APPEND)
 114                inode->i_flags |= S_APPEND;
 115        if (ip->flags & BTRFS_INODE_NOATIME)
 116                inode->i_flags |= S_NOATIME;
 117        if (ip->flags & BTRFS_INODE_DIRSYNC)
 118                inode->i_flags |= S_DIRSYNC;
 119}
 120
 121/*
 122 * Inherit flags from the parent inode.
 123 *
 124 * Currently only the compression flags and the cow flags are inherited.
 125 */
 126void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 127{
 128        unsigned int flags;
 129
 130        if (!dir)
 131                return;
 132
 133        flags = BTRFS_I(dir)->flags;
 134
 135        if (flags & BTRFS_INODE_NOCOMPRESS) {
 136                BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
 137                BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
 138        } else if (flags & BTRFS_INODE_COMPRESS) {
 139                BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
 140                BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
 141        }
 142
 143        if (flags & BTRFS_INODE_NODATACOW)
 144                BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
 145
 146        btrfs_update_iflags(inode);
 147}
 148
 149static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
 150{
 151        struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
 152        unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
 153
 154        if (copy_to_user(arg, &flags, sizeof(flags)))
 155                return -EFAULT;
 156        return 0;
 157}
 158
 159static int check_flags(unsigned int flags)
 160{
 161        if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
 162                      FS_NOATIME_FL | FS_NODUMP_FL | \
 163                      FS_SYNC_FL | FS_DIRSYNC_FL | \
 164                      FS_NOCOMP_FL | FS_COMPR_FL |
 165                      FS_NOCOW_FL))
 166                return -EOPNOTSUPP;
 167
 168        if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
 169                return -EINVAL;
 170
 171        return 0;
 172}
 173
 174static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 175{
 176        struct inode *inode = file->f_path.dentry->d_inode;
 177        struct btrfs_inode *ip = BTRFS_I(inode);
 178        struct btrfs_root *root = ip->root;
 179        struct btrfs_trans_handle *trans;
 180        unsigned int flags, oldflags;
 181        int ret;
 182        u64 ip_oldflags;
 183        unsigned int i_oldflags;
 184
 185        if (btrfs_root_readonly(root))
 186                return -EROFS;
 187
 188        if (copy_from_user(&flags, arg, sizeof(flags)))
 189                return -EFAULT;
 190
 191        ret = check_flags(flags);
 192        if (ret)
 193                return ret;
 194
 195        if (!inode_owner_or_capable(inode))
 196                return -EACCES;
 197
 198        ret = mnt_want_write_file(file);
 199        if (ret)
 200                return ret;
 201
 202        mutex_lock(&inode->i_mutex);
 203
 204        ip_oldflags = ip->flags;
 205        i_oldflags = inode->i_flags;
 206
 207        flags = btrfs_mask_flags(inode->i_mode, flags);
 208        oldflags = btrfs_flags_to_ioctl(ip->flags);
 209        if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
 210                if (!capable(CAP_LINUX_IMMUTABLE)) {
 211                        ret = -EPERM;
 212                        goto out_unlock;
 213                }
 214        }
 215
 216        if (flags & FS_SYNC_FL)
 217                ip->flags |= BTRFS_INODE_SYNC;
 218        else
 219                ip->flags &= ~BTRFS_INODE_SYNC;
 220        if (flags & FS_IMMUTABLE_FL)
 221                ip->flags |= BTRFS_INODE_IMMUTABLE;
 222        else
 223                ip->flags &= ~BTRFS_INODE_IMMUTABLE;
 224        if (flags & FS_APPEND_FL)
 225                ip->flags |= BTRFS_INODE_APPEND;
 226        else
 227                ip->flags &= ~BTRFS_INODE_APPEND;
 228        if (flags & FS_NODUMP_FL)
 229                ip->flags |= BTRFS_INODE_NODUMP;
 230        else
 231                ip->flags &= ~BTRFS_INODE_NODUMP;
 232        if (flags & FS_NOATIME_FL)
 233                ip->flags |= BTRFS_INODE_NOATIME;
 234        else
 235                ip->flags &= ~BTRFS_INODE_NOATIME;
 236        if (flags & FS_DIRSYNC_FL)
 237                ip->flags |= BTRFS_INODE_DIRSYNC;
 238        else
 239                ip->flags &= ~BTRFS_INODE_DIRSYNC;
 240        if (flags & FS_NOCOW_FL)
 241                ip->flags |= BTRFS_INODE_NODATACOW;
 242        else
 243                ip->flags &= ~BTRFS_INODE_NODATACOW;
 244
 245        /*
 246         * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
 247         * flag may be changed automatically if compression code won't make
 248         * things smaller.
 249         */
 250        if (flags & FS_NOCOMP_FL) {
 251                ip->flags &= ~BTRFS_INODE_COMPRESS;
 252                ip->flags |= BTRFS_INODE_NOCOMPRESS;
 253        } else if (flags & FS_COMPR_FL) {
 254                ip->flags |= BTRFS_INODE_COMPRESS;
 255                ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
 256        } else {
 257                ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
 258        }
 259
 260        trans = btrfs_start_transaction(root, 1);
 261        if (IS_ERR(trans)) {
 262                ret = PTR_ERR(trans);
 263                goto out_drop;
 264        }
 265
 266        btrfs_update_iflags(inode);
 267        inode_inc_iversion(inode);
 268        inode->i_ctime = CURRENT_TIME;
 269        ret = btrfs_update_inode(trans, root, inode);
 270
 271        btrfs_end_transaction(trans, root);
 272 out_drop:
 273        if (ret) {
 274                ip->flags = ip_oldflags;
 275                inode->i_flags = i_oldflags;
 276        }
 277
 278 out_unlock:
 279        mutex_unlock(&inode->i_mutex);
 280        mnt_drop_write_file(file);
 281        return ret;
 282}
 283
 284static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 285{
 286        struct inode *inode = file->f_path.dentry->d_inode;
 287
 288        return put_user(inode->i_generation, arg);
 289}
 290
 291static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
 292{
 293        struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb);
 294        struct btrfs_device *device;
 295        struct request_queue *q;
 296        struct fstrim_range range;
 297        u64 minlen = ULLONG_MAX;
 298        u64 num_devices = 0;
 299        u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
 300        int ret;
 301
 302        if (!capable(CAP_SYS_ADMIN))
 303                return -EPERM;
 304
 305        rcu_read_lock();
 306        list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
 307                                dev_list) {
 308                if (!device->bdev)
 309                        continue;
 310                q = bdev_get_queue(device->bdev);
 311                if (blk_queue_discard(q)) {
 312                        num_devices++;
 313                        minlen = min((u64)q->limits.discard_granularity,
 314                                     minlen);
 315                }
 316        }
 317        rcu_read_unlock();
 318
 319        if (!num_devices)
 320                return -EOPNOTSUPP;
 321        if (copy_from_user(&range, arg, sizeof(range)))
 322                return -EFAULT;
 323        if (range.start > total_bytes)
 324                return -EINVAL;
 325
 326        range.len = min(range.len, total_bytes - range.start);
 327        range.minlen = max(range.minlen, minlen);
 328        ret = btrfs_trim_fs(fs_info->tree_root, &range);
 329        if (ret < 0)
 330                return ret;
 331
 332        if (copy_to_user(arg, &range, sizeof(range)))
 333                return -EFAULT;
 334
 335        return 0;
 336}
 337
 338static noinline int create_subvol(struct btrfs_root *root,
 339                                  struct dentry *dentry,
 340                                  char *name, int namelen,
 341                                  u64 *async_transid,
 342                                  struct btrfs_qgroup_inherit **inherit)
 343{
 344        struct btrfs_trans_handle *trans;
 345        struct btrfs_key key;
 346        struct btrfs_root_item root_item;
 347        struct btrfs_inode_item *inode_item;
 348        struct extent_buffer *leaf;
 349        struct btrfs_root *new_root;
 350        struct dentry *parent = dentry->d_parent;
 351        struct inode *dir;
 352        struct timespec cur_time = CURRENT_TIME;
 353        int ret;
 354        int err;
 355        u64 objectid;
 356        u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
 357        u64 index = 0;
 358        uuid_le new_uuid;
 359
 360        ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
 361        if (ret)
 362                return ret;
 363
 364        dir = parent->d_inode;
 365
 366        /*
 367         * 1 - inode item
 368         * 2 - refs
 369         * 1 - root item
 370         * 2 - dir items
 371         */
 372        trans = btrfs_start_transaction(root, 6);
 373        if (IS_ERR(trans))
 374                return PTR_ERR(trans);
 375
 376        ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
 377                                   inherit ? *inherit : NULL);
 378        if (ret)
 379                goto fail;
 380
 381        leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
 382                                      0, objectid, NULL, 0, 0, 0);
 383        if (IS_ERR(leaf)) {
 384                ret = PTR_ERR(leaf);
 385                goto fail;
 386        }
 387
 388        memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
 389        btrfs_set_header_bytenr(leaf, leaf->start);
 390        btrfs_set_header_generation(leaf, trans->transid);
 391        btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
 392        btrfs_set_header_owner(leaf, objectid);
 393
 394        write_extent_buffer(leaf, root->fs_info->fsid,
 395                            (unsigned long)btrfs_header_fsid(leaf),
 396                            BTRFS_FSID_SIZE);
 397        write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
 398                            (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
 399                            BTRFS_UUID_SIZE);
 400        btrfs_mark_buffer_dirty(leaf);
 401
 402        memset(&root_item, 0, sizeof(root_item));
 403
 404        inode_item = &root_item.inode;
 405        inode_item->generation = cpu_to_le64(1);
 406        inode_item->size = cpu_to_le64(3);
 407        inode_item->nlink = cpu_to_le32(1);
 408        inode_item->nbytes = cpu_to_le64(root->leafsize);
 409        inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
 410
 411        root_item.flags = 0;
 412        root_item.byte_limit = 0;
 413        inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
 414
 415        btrfs_set_root_bytenr(&root_item, leaf->start);
 416        btrfs_set_root_generation(&root_item, trans->transid);
 417        btrfs_set_root_level(&root_item, 0);
 418        btrfs_set_root_refs(&root_item, 1);
 419        btrfs_set_root_used(&root_item, leaf->len);
 420        btrfs_set_root_last_snapshot(&root_item, 0);
 421
 422        btrfs_set_root_generation_v2(&root_item,
 423                        btrfs_root_generation(&root_item));
 424        uuid_le_gen(&new_uuid);
 425        memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
 426        root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
 427        root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
 428        root_item.ctime = root_item.otime;
 429        btrfs_set_root_ctransid(&root_item, trans->transid);
 430        btrfs_set_root_otransid(&root_item, trans->transid);
 431
 432        btrfs_tree_unlock(leaf);
 433        free_extent_buffer(leaf);
 434        leaf = NULL;
 435
 436        btrfs_set_root_dirid(&root_item, new_dirid);
 437
 438        key.objectid = objectid;
 439        key.offset = 0;
 440        btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 441        ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 442                                &root_item);
 443        if (ret)
 444                goto fail;
 445
 446        key.offset = (u64)-1;
 447        new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
 448        if (IS_ERR(new_root)) {
 449                btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
 450                ret = PTR_ERR(new_root);
 451                goto fail;
 452        }
 453
 454        btrfs_record_root_in_trans(trans, new_root);
 455
 456        ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
 457        if (ret) {
 458                /* We potentially lose an unused inode item here */
 459                btrfs_abort_transaction(trans, root, ret);
 460                goto fail;
 461        }
 462
 463        /*
 464         * insert the directory item
 465         */
 466        ret = btrfs_set_inode_index(dir, &index);
 467        if (ret) {
 468                btrfs_abort_transaction(trans, root, ret);
 469                goto fail;
 470        }
 471
 472        ret = btrfs_insert_dir_item(trans, root,
 473                                    name, namelen, dir, &key,
 474                                    BTRFS_FT_DIR, index);
 475        if (ret) {
 476                btrfs_abort_transaction(trans, root, ret);
 477                goto fail;
 478        }
 479
 480        btrfs_i_size_write(dir, dir->i_size + namelen * 2);
 481        ret = btrfs_update_inode(trans, root, dir);
 482        BUG_ON(ret);
 483
 484        ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
 485                                 objectid, root->root_key.objectid,
 486                                 btrfs_ino(dir), index, name, namelen);
 487
 488        BUG_ON(ret);
 489
 490        d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 491fail:
 492        if (async_transid) {
 493                *async_transid = trans->transid;
 494                err = btrfs_commit_transaction_async(trans, root, 1);
 495        } else {
 496                err = btrfs_commit_transaction(trans, root);
 497        }
 498        if (err && !ret)
 499                ret = err;
 500        return ret;
 501}
 502
 503static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 504                           char *name, int namelen, u64 *async_transid,
 505                           bool readonly, struct btrfs_qgroup_inherit **inherit)
 506{
 507        struct inode *inode;
 508        struct btrfs_pending_snapshot *pending_snapshot;
 509        struct btrfs_trans_handle *trans;
 510        int ret;
 511
 512        if (!root->ref_cows)
 513                return -EINVAL;
 514
 515        pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
 516        if (!pending_snapshot)
 517                return -ENOMEM;
 518
 519        btrfs_init_block_rsv(&pending_snapshot->block_rsv);
 520        pending_snapshot->dentry = dentry;
 521        pending_snapshot->root = root;
 522        pending_snapshot->readonly = readonly;
 523        if (inherit) {
 524                pending_snapshot->inherit = *inherit;
 525                *inherit = NULL;        /* take responsibility to free it */
 526        }
 527
 528        trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
 529        if (IS_ERR(trans)) {
 530                ret = PTR_ERR(trans);
 531                goto fail;
 532        }
 533
 534        ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
 535        BUG_ON(ret);
 536
 537        spin_lock(&root->fs_info->trans_lock);
 538        list_add(&pending_snapshot->list,
 539                 &trans->transaction->pending_snapshots);
 540        spin_unlock(&root->fs_info->trans_lock);
 541        if (async_transid) {
 542                *async_transid = trans->transid;
 543                ret = btrfs_commit_transaction_async(trans,
 544                                     root->fs_info->extent_root, 1);
 545        } else {
 546                ret = btrfs_commit_transaction(trans,
 547                                               root->fs_info->extent_root);
 548        }
 549        BUG_ON(ret);
 550
 551        ret = pending_snapshot->error;
 552        if (ret)
 553                goto fail;
 554
 555        ret = btrfs_orphan_cleanup(pending_snapshot->snap);
 556        if (ret)
 557                goto fail;
 558
 559        inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
 560        if (IS_ERR(inode)) {
 561                ret = PTR_ERR(inode);
 562                goto fail;
 563        }
 564        BUG_ON(!inode);
 565        d_instantiate(dentry, inode);
 566        ret = 0;
 567fail:
 568        kfree(pending_snapshot);
 569        return ret;
 570}
 571
 572/*  copy of check_sticky in fs/namei.c()
 573* It's inline, so penalty for filesystems that don't use sticky bit is
 574* minimal.
 575*/
 576static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
 577{
 578        uid_t fsuid = current_fsuid();
 579
 580        if (!(dir->i_mode & S_ISVTX))
 581                return 0;
 582        if (inode->i_uid == fsuid)
 583                return 0;
 584        if (dir->i_uid == fsuid)
 585                return 0;
 586        return !capable(CAP_FOWNER);
 587}
 588
 589/*  copy of may_delete in fs/namei.c()
 590 *      Check whether we can remove a link victim from directory dir, check
 591 *  whether the type of victim is right.
 592 *  1. We can't do it if dir is read-only (done in permission())
 593 *  2. We should have write and exec permissions on dir
 594 *  3. We can't remove anything from append-only dir
 595 *  4. We can't do anything with immutable dir (done in permission())
 596 *  5. If the sticky bit on dir is set we should either
 597 *      a. be owner of dir, or
 598 *      b. be owner of victim, or
 599 *      c. have CAP_FOWNER capability
 600 *  6. If the victim is append-only or immutable we can't do antyhing with
 601 *     links pointing to it.
 602 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 603 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 604 *  9. We can't remove a root or mountpoint.
 605 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
 606 *     nfs_async_unlink().
 607 */
 608
 609static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
 610{
 611        int error;
 612
 613        if (!victim->d_inode)
 614                return -ENOENT;
 615
 616        BUG_ON(victim->d_parent->d_inode != dir);
 617        audit_inode_child(victim, dir);
 618
 619        error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
 620        if (error)
 621                return error;
 622        if (IS_APPEND(dir))
 623                return -EPERM;
 624        if (btrfs_check_sticky(dir, victim->d_inode)||
 625                IS_APPEND(victim->d_inode)||
 626            IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
 627                return -EPERM;
 628        if (isdir) {
 629                if (!S_ISDIR(victim->d_inode->i_mode))
 630                        return -ENOTDIR;
 631                if (IS_ROOT(victim))
 632                        return -EBUSY;
 633        } else if (S_ISDIR(victim->d_inode->i_mode))
 634                return -EISDIR;
 635        if (IS_DEADDIR(dir))
 636                return -ENOENT;
 637        if (victim->d_flags & DCACHE_NFSFS_RENAMED)
 638                return -EBUSY;
 639        return 0;
 640}
 641
 642/* copy of may_create in fs/namei.c() */
 643static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 644{
 645        if (child->d_inode)
 646                return -EEXIST;
 647        if (IS_DEADDIR(dir))
 648                return -ENOENT;
 649        return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 650}
 651
 652/*
 653 * Create a new subvolume below @parent.  This is largely modeled after
 654 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
 655 * inside this filesystem so it's quite a bit simpler.
 656 */
 657static noinline int btrfs_mksubvol(struct path *parent,
 658                                   char *name, int namelen,
 659                                   struct btrfs_root *snap_src,
 660                                   u64 *async_transid, bool readonly,
 661                                   struct btrfs_qgroup_inherit **inherit)
 662{
 663        struct inode *dir  = parent->dentry->d_inode;
 664        struct dentry *dentry;
 665        int error;
 666
 667        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
 668
 669        dentry = lookup_one_len(name, parent->dentry, namelen);
 670        error = PTR_ERR(dentry);
 671        if (IS_ERR(dentry))
 672                goto out_unlock;
 673
 674        error = -EEXIST;
 675        if (dentry->d_inode)
 676                goto out_dput;
 677
 678        error = btrfs_may_create(dir, dentry);
 679        if (error)
 680                goto out_dput;
 681
 682        down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
 683
 684        if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
 685                goto out_up_read;
 686
 687        if (snap_src) {
 688                error = create_snapshot(snap_src, dentry, name, namelen,
 689                                        async_transid, readonly, inherit);
 690        } else {
 691                error = create_subvol(BTRFS_I(dir)->root, dentry,
 692                                      name, namelen, async_transid, inherit);
 693        }
 694        if (!error)
 695                fsnotify_mkdir(dir, dentry);
 696out_up_read:
 697        up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
 698out_dput:
 699        dput(dentry);
 700out_unlock:
 701        mutex_unlock(&dir->i_mutex);
 702        return error;
 703}
 704
 705/*
 706 * When we're defragging a range, we don't want to kick it off again
 707 * if it is really just waiting for delalloc to send it down.
 708 * If we find a nice big extent or delalloc range for the bytes in the
 709 * file you want to defrag, we return 0 to let you know to skip this
 710 * part of the file
 711 */
 712static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
 713{
 714        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 715        struct extent_map *em = NULL;
 716        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 717        u64 end;
 718
 719        read_lock(&em_tree->lock);
 720        em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
 721        read_unlock(&em_tree->lock);
 722
 723        if (em) {
 724                end = extent_map_end(em);
 725                free_extent_map(em);
 726                if (end - offset > thresh)
 727                        return 0;
 728        }
 729        /* if we already have a nice delalloc here, just stop */
 730        thresh /= 2;
 731        end = count_range_bits(io_tree, &offset, offset + thresh,
 732                               thresh, EXTENT_DELALLOC, 1);
 733        if (end >= thresh)
 734                return 0;
 735        return 1;
 736}
 737
 738/*
 739 * helper function to walk through a file and find extents
 740 * newer than a specific transid, and smaller than thresh.
 741 *
 742 * This is used by the defragging code to find new and small
 743 * extents
 744 */
 745static int find_new_extents(struct btrfs_root *root,
 746                            struct inode *inode, u64 newer_than,
 747                            u64 *off, int thresh)
 748{
 749        struct btrfs_path *path;
 750        struct btrfs_key min_key;
 751        struct btrfs_key max_key;
 752        struct extent_buffer *leaf;
 753        struct btrfs_file_extent_item *extent;
 754        int type;
 755        int ret;
 756        u64 ino = btrfs_ino(inode);
 757
 758        path = btrfs_alloc_path();
 759        if (!path)
 760                return -ENOMEM;
 761
 762        min_key.objectid = ino;
 763        min_key.type = BTRFS_EXTENT_DATA_KEY;
 764        min_key.offset = *off;
 765
 766        max_key.objectid = ino;
 767        max_key.type = (u8)-1;
 768        max_key.offset = (u64)-1;
 769
 770        path->keep_locks = 1;
 771
 772        while(1) {
 773                ret = btrfs_search_forward(root, &min_key, &max_key,
 774                                           path, 0, newer_than);
 775                if (ret != 0)
 776                        goto none;
 777                if (min_key.objectid != ino)
 778                        goto none;
 779                if (min_key.type != BTRFS_EXTENT_DATA_KEY)
 780                        goto none;
 781
 782                leaf = path->nodes[0];
 783                extent = btrfs_item_ptr(leaf, path->slots[0],
 784                                        struct btrfs_file_extent_item);
 785
 786                type = btrfs_file_extent_type(leaf, extent);
 787                if (type == BTRFS_FILE_EXTENT_REG &&
 788                    btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
 789                    check_defrag_in_cache(inode, min_key.offset, thresh)) {
 790                        *off = min_key.offset;
 791                        btrfs_free_path(path);
 792                        return 0;
 793                }
 794
 795                if (min_key.offset == (u64)-1)
 796                        goto none;
 797
 798                min_key.offset++;
 799                btrfs_release_path(path);
 800        }
 801none:
 802        btrfs_free_path(path);
 803        return -ENOENT;
 804}
 805
 806static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
 807{
 808        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 809        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 810        struct extent_map *em;
 811        u64 len = PAGE_CACHE_SIZE;
 812
 813        /*
 814         * hopefully we have this extent in the tree already, try without
 815         * the full extent lock
 816         */
 817        read_lock(&em_tree->lock);
 818        em = lookup_extent_mapping(em_tree, start, len);
 819        read_unlock(&em_tree->lock);
 820
 821        if (!em) {
 822                /* get the big lock and read metadata off disk */
 823                lock_extent(io_tree, start, start + len - 1);
 824                em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
 825                unlock_extent(io_tree, start, start + len - 1);
 826
 827                if (IS_ERR(em))
 828                        return NULL;
 829        }
 830
 831        return em;
 832}
 833
 834static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
 835{
 836        struct extent_map *next;
 837        bool ret = true;
 838
 839        /* this is the last extent */
 840        if (em->start + em->len >= i_size_read(inode))
 841                return false;
 842
 843        next = defrag_lookup_extent(inode, em->start + em->len);
 844        if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
 845                ret = false;
 846
 847        free_extent_map(next);
 848        return ret;
 849}
 850
 851static int should_defrag_range(struct inode *inode, u64 start, int thresh,
 852                               u64 *last_len, u64 *skip, u64 *defrag_end,
 853                               int compress)
 854{
 855        struct extent_map *em;
 856        int ret = 1;
 857        bool next_mergeable = true;
 858
 859        /*
 860         * make sure that once we start defragging an extent, we keep on
 861         * defragging it
 862         */
 863        if (start < *defrag_end)
 864                return 1;
 865
 866        *skip = 0;
 867
 868        em = defrag_lookup_extent(inode, start);
 869        if (!em)
 870                return 0;
 871
 872        /* this will cover holes, and inline extents */
 873        if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
 874                ret = 0;
 875                goto out;
 876        }
 877
 878        next_mergeable = defrag_check_next_extent(inode, em);
 879
 880        /*
 881         * we hit a real extent, if it is big or the next extent is not a
 882         * real extent, don't bother defragging it
 883         */
 884        if (!compress && (*last_len == 0 || *last_len >= thresh) &&
 885            (em->len >= thresh || !next_mergeable))
 886                ret = 0;
 887out:
 888        /*
 889         * last_len ends up being a counter of how many bytes we've defragged.
 890         * every time we choose not to defrag an extent, we reset *last_len
 891         * so that the next tiny extent will force a defrag.
 892         *
 893         * The end result of this is that tiny extents before a single big
 894         * extent will force at least part of that big extent to be defragged.
 895         */
 896        if (ret) {
 897                *defrag_end = extent_map_end(em);
 898        } else {
 899                *last_len = 0;
 900                *skip = extent_map_end(em);
 901                *defrag_end = 0;
 902        }
 903
 904        free_extent_map(em);
 905        return ret;
 906}
 907
 908/*
 909 * it doesn't do much good to defrag one or two pages
 910 * at a time.  This pulls in a nice chunk of pages
 911 * to COW and defrag.
 912 *
 913 * It also makes sure the delalloc code has enough
 914 * dirty data to avoid making new small extents as part
 915 * of the defrag
 916 *
 917 * It's a good idea to start RA on this range
 918 * before calling this.
 919 */
 920static int cluster_pages_for_defrag(struct inode *inode,
 921                                    struct page **pages,
 922                                    unsigned long start_index,
 923                                    int num_pages)
 924{
 925        unsigned long file_end;
 926        u64 isize = i_size_read(inode);
 927        u64 page_start;
 928        u64 page_end;
 929        u64 page_cnt;
 930        int ret;
 931        int i;
 932        int i_done;
 933        struct btrfs_ordered_extent *ordered;
 934        struct extent_state *cached_state = NULL;
 935        struct extent_io_tree *tree;
 936        gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 937
 938        file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
 939        if (!isize || start_index > file_end)
 940                return 0;
 941
 942        page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
 943
 944        ret = btrfs_delalloc_reserve_space(inode,
 945                                           page_cnt << PAGE_CACHE_SHIFT);
 946        if (ret)
 947                return ret;
 948        i_done = 0;
 949        tree = &BTRFS_I(inode)->io_tree;
 950
 951        /* step one, lock all the pages */
 952        for (i = 0; i < page_cnt; i++) {
 953                struct page *page;
 954again:
 955                page = find_or_create_page(inode->i_mapping,
 956                                           start_index + i, mask);
 957                if (!page)
 958                        break;
 959
 960                page_start = page_offset(page);
 961                page_end = page_start + PAGE_CACHE_SIZE - 1;
 962                while (1) {
 963                        lock_extent(tree, page_start, page_end);
 964                        ordered = btrfs_lookup_ordered_extent(inode,
 965                                                              page_start);
 966                        unlock_extent(tree, page_start, page_end);
 967                        if (!ordered)
 968                                break;
 969
 970                        unlock_page(page);
 971                        btrfs_start_ordered_extent(inode, ordered, 1);
 972                        btrfs_put_ordered_extent(ordered);
 973                        lock_page(page);
 974                        /*
 975                         * we unlocked the page above, so we need check if
 976                         * it was released or not.
 977                         */
 978                        if (page->mapping != inode->i_mapping) {
 979                                unlock_page(page);
 980                                page_cache_release(page);
 981                                goto again;
 982                        }
 983                }
 984
 985                if (!PageUptodate(page)) {
 986                        btrfs_readpage(NULL, page);
 987                        lock_page(page);
 988                        if (!PageUptodate(page)) {
 989                                unlock_page(page);
 990                                page_cache_release(page);
 991                                ret = -EIO;
 992                                break;
 993                        }
 994                }
 995
 996                if (page->mapping != inode->i_mapping) {
 997                        unlock_page(page);
 998                        page_cache_release(page);
 999                        goto again;
1000                }
1001
1002                pages[i] = page;
1003                i_done++;
1004        }
1005        if (!i_done || ret)
1006                goto out;
1007
1008        if (!(inode->i_sb->s_flags & MS_ACTIVE))
1009                goto out;
1010
1011        /*
1012         * so now we have a nice long stream of locked
1013         * and up to date pages, lets wait on them
1014         */
1015        for (i = 0; i < i_done; i++)
1016                wait_on_page_writeback(pages[i]);
1017
1018        page_start = page_offset(pages[0]);
1019        page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
1020
1021        lock_extent_bits(&BTRFS_I(inode)->io_tree,
1022                         page_start, page_end - 1, 0, &cached_state);
1023        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
1024                          page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
1025                          EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
1026                          GFP_NOFS);
1027
1028        if (i_done != page_cnt) {
1029                spin_lock(&BTRFS_I(inode)->lock);
1030                BTRFS_I(inode)->outstanding_extents++;
1031                spin_unlock(&BTRFS_I(inode)->lock);
1032                btrfs_delalloc_release_space(inode,
1033                                     (page_cnt - i_done) << PAGE_CACHE_SHIFT);
1034        }
1035
1036
1037        btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
1038                                  &cached_state);
1039
1040        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1041                             page_start, page_end - 1, &cached_state,
1042                             GFP_NOFS);
1043
1044        for (i = 0; i < i_done; i++) {
1045                clear_page_dirty_for_io(pages[i]);
1046                ClearPageChecked(pages[i]);
1047                set_page_extent_mapped(pages[i]);
1048                set_page_dirty(pages[i]);
1049                unlock_page(pages[i]);
1050                page_cache_release(pages[i]);
1051        }
1052        return i_done;
1053out:
1054        for (i = 0; i < i_done; i++) {
1055                unlock_page(pages[i]);
1056                page_cache_release(pages[i]);
1057        }
1058        btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
1059        return ret;
1060
1061}
1062
1063int btrfs_defrag_file(struct inode *inode, struct file *file,
1064                      struct btrfs_ioctl_defrag_range_args *range,
1065                      u64 newer_than, unsigned long max_to_defrag)
1066{
1067        struct btrfs_root *root = BTRFS_I(inode)->root;
1068        struct file_ra_state *ra = NULL;
1069        unsigned long last_index;
1070        u64 isize = i_size_read(inode);
1071        u64 last_len = 0;
1072        u64 skip = 0;
1073        u64 defrag_end = 0;
1074        u64 newer_off = range->start;
1075        unsigned long i;
1076        unsigned long ra_index = 0;
1077        int ret;
1078        int defrag_count = 0;
1079        int compress_type = BTRFS_COMPRESS_ZLIB;
1080        int extent_thresh = range->extent_thresh;
1081        int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
1082        int cluster = max_cluster;
1083        u64 new_align = ~((u64)128 * 1024 - 1);
1084        struct page **pages = NULL;
1085
1086        if (extent_thresh == 0)
1087                extent_thresh = 256 * 1024;
1088
1089        if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
1090                if (range->compress_type > BTRFS_COMPRESS_TYPES)
1091                        return -EINVAL;
1092                if (range->compress_type)
1093                        compress_type = range->compress_type;
1094        }
1095
1096        if (isize == 0)
1097                return 0;
1098
1099        /*
1100         * if we were not given a file, allocate a readahead
1101         * context
1102         */
1103        if (!file) {
1104                ra = kzalloc(sizeof(*ra), GFP_NOFS);
1105                if (!ra)
1106                        return -ENOMEM;
1107                file_ra_state_init(ra, inode->i_mapping);
1108        } else {
1109                ra = &file->f_ra;
1110        }
1111
1112        pages = kmalloc(sizeof(struct page *) * max_cluster,
1113                        GFP_NOFS);
1114        if (!pages) {
1115                ret = -ENOMEM;
1116                goto out_ra;
1117        }
1118
1119        /* find the last page to defrag */
1120        if (range->start + range->len > range->start) {
1121                last_index = min_t(u64, isize - 1,
1122                         range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
1123        } else {
1124                last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1125        }
1126
1127        if (newer_than) {
1128                ret = find_new_extents(root, inode, newer_than,
1129                                       &newer_off, 64 * 1024);
1130                if (!ret) {
1131                        range->start = newer_off;
1132                        /*
1133                         * we always align our defrag to help keep
1134                         * the extents in the file evenly spaced
1135                         */
1136                        i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1137                } else
1138                        goto out_ra;
1139        } else {
1140                i = range->start >> PAGE_CACHE_SHIFT;
1141        }
1142        if (!max_to_defrag)
1143                max_to_defrag = last_index + 1;
1144
1145        /*
1146         * make writeback starts from i, so the defrag range can be
1147         * written sequentially.
1148         */
1149        if (i < inode->i_mapping->writeback_index)
1150                inode->i_mapping->writeback_index = i;
1151
1152        while (i <= last_index && defrag_count < max_to_defrag &&
1153               (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
1154                PAGE_CACHE_SHIFT)) {
1155                /*
1156                 * make sure we stop running if someone unmounts
1157                 * the FS
1158                 */
1159                if (!(inode->i_sb->s_flags & MS_ACTIVE))
1160                        break;
1161
1162                if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1163                                         extent_thresh, &last_len, &skip,
1164                                         &defrag_end, range->flags &
1165                                         BTRFS_DEFRAG_RANGE_COMPRESS)) {
1166                        unsigned long next;
1167                        /*
1168                         * the should_defrag function tells us how much to skip
1169                         * bump our counter by the suggested amount
1170                         */
1171                        next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1172                        i = max(i + 1, next);
1173                        continue;
1174                }
1175
1176                if (!newer_than) {
1177                        cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
1178                                   PAGE_CACHE_SHIFT) - i;
1179                        cluster = min(cluster, max_cluster);
1180                } else {
1181                        cluster = max_cluster;
1182                }
1183
1184                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
1185                        BTRFS_I(inode)->force_compress = compress_type;
1186
1187                if (i + cluster > ra_index) {
1188                        ra_index = max(i, ra_index);
1189                        btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
1190                                       cluster);
1191                        ra_index += max_cluster;
1192                }
1193
1194                mutex_lock(&inode->i_mutex);
1195                ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1196                if (ret < 0) {
1197                        mutex_unlock(&inode->i_mutex);
1198                        goto out_ra;
1199                }
1200
1201                defrag_count += ret;
1202                balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
1203                mutex_unlock(&inode->i_mutex);
1204
1205                if (newer_than) {
1206                        if (newer_off == (u64)-1)
1207                                break;
1208
1209                        if (ret > 0)
1210                                i += ret;
1211
1212                        newer_off = max(newer_off + 1,
1213                                        (u64)i << PAGE_CACHE_SHIFT);
1214
1215                        ret = find_new_extents(root, inode,
1216                                               newer_than, &newer_off,
1217                                               64 * 1024);
1218                        if (!ret) {
1219                                range->start = newer_off;
1220                                i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1221                        } else {
1222                                break;
1223                        }
1224                } else {
1225                        if (ret > 0) {
1226                                i += ret;
1227                                last_len += ret << PAGE_CACHE_SHIFT;
1228                        } else {
1229                                i++;
1230                                last_len = 0;
1231                        }
1232                }
1233        }
1234
1235        if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
1236                filemap_flush(inode->i_mapping);
1237
1238        if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
1239                /* the filemap_flush will queue IO into the worker threads, but
1240                 * we have to make sure the IO is actually started and that
1241                 * ordered extents get created before we return
1242                 */
1243                atomic_inc(&root->fs_info->async_submit_draining);
1244                while (atomic_read(&root->fs_info->nr_async_submits) ||
1245                      atomic_read(&root->fs_info->async_delalloc_pages)) {
1246                        wait_event(root->fs_info->async_submit_wait,
1247                           (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
1248                            atomic_read(&root->fs_info->async_delalloc_pages) == 0));
1249                }
1250                atomic_dec(&root->fs_info->async_submit_draining);
1251
1252                mutex_lock(&inode->i_mutex);
1253                BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
1254                mutex_unlock(&inode->i_mutex);
1255        }
1256
1257        if (range->compress_type == BTRFS_COMPRESS_LZO) {
1258                btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
1259        }
1260
1261        ret = defrag_count;
1262
1263out_ra:
1264        if (!file)
1265                kfree(ra);
1266        kfree(pages);
1267        return ret;
1268}
1269
1270static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1271                                        void __user *arg)
1272{
1273        u64 new_size;
1274        u64 old_size;
1275        u64 devid = 1;
1276        struct btrfs_ioctl_vol_args *vol_args;
1277        struct btrfs_trans_handle *trans;
1278        struct btrfs_device *device = NULL;
1279        char *sizestr;
1280        char *devstr = NULL;
1281        int ret = 0;
1282        int mod = 0;
1283
1284        if (root->fs_info->sb->s_flags & MS_RDONLY)
1285                return -EROFS;
1286
1287        if (!capable(CAP_SYS_ADMIN))
1288                return -EPERM;
1289
1290        mutex_lock(&root->fs_info->volume_mutex);
1291        if (root->fs_info->balance_ctl) {
1292                printk(KERN_INFO "btrfs: balance in progress\n");
1293                ret = -EINVAL;
1294                goto out;
1295        }
1296
1297        vol_args = memdup_user(arg, sizeof(*vol_args));
1298        if (IS_ERR(vol_args)) {
1299                ret = PTR_ERR(vol_args);
1300                goto out;
1301        }
1302
1303        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1304
1305        sizestr = vol_args->name;
1306        devstr = strchr(sizestr, ':');
1307        if (devstr) {
1308                char *end;
1309                sizestr = devstr + 1;
1310                *devstr = '\0';
1311                devstr = vol_args->name;
1312                devid = simple_strtoull(devstr, &end, 10);
1313                printk(KERN_INFO "btrfs: resizing devid %llu\n",
1314                       (unsigned long long)devid);
1315        }
1316        device = btrfs_find_device(root, devid, NULL, NULL);
1317        if (!device) {
1318                printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1319                       (unsigned long long)devid);
1320                ret = -EINVAL;
1321                goto out_free;
1322        }
1323        if (device->fs_devices && device->fs_devices->seeding) {
1324                printk(KERN_INFO "btrfs: resizer unable to apply on "
1325                       "seeding device %llu\n",
1326                       (unsigned long long)devid);
1327                ret = -EINVAL;
1328                goto out_free;
1329        }
1330
1331        if (!strcmp(sizestr, "max"))
1332                new_size = device->bdev->bd_inode->i_size;
1333        else {
1334                if (sizestr[0] == '-') {
1335                        mod = -1;
1336                        sizestr++;
1337                } else if (sizestr[0] == '+') {
1338                        mod = 1;
1339                        sizestr++;
1340                }
1341                new_size = memparse(sizestr, NULL);
1342                if (new_size == 0) {
1343                        ret = -EINVAL;
1344                        goto out_free;
1345                }
1346        }
1347
1348        old_size = device->total_bytes;
1349
1350        if (mod < 0) {
1351                if (new_size > old_size) {
1352                        ret = -EINVAL;
1353                        goto out_free;
1354                }
1355                new_size = old_size - new_size;
1356        } else if (mod > 0) {
1357                new_size = old_size + new_size;
1358        }
1359
1360        if (new_size < 256 * 1024 * 1024) {
1361                ret = -EINVAL;
1362                goto out_free;
1363        }
1364        if (new_size > device->bdev->bd_inode->i_size) {
1365                ret = -EFBIG;
1366                goto out_free;
1367        }
1368
1369        do_div(new_size, root->sectorsize);
1370        new_size *= root->sectorsize;
1371
1372        printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1373                      rcu_str_deref(device->name),
1374                      (unsigned long long)new_size);
1375
1376        if (new_size > old_size) {
1377                trans = btrfs_start_transaction(root, 0);
1378                if (IS_ERR(trans)) {
1379                        ret = PTR_ERR(trans);
1380                        goto out_free;
1381                }
1382                ret = btrfs_grow_device(trans, device, new_size);
1383                btrfs_commit_transaction(trans, root);
1384        } else if (new_size < old_size) {
1385                ret = btrfs_shrink_device(device, new_size);
1386        }
1387
1388out_free:
1389        kfree(vol_args);
1390out:
1391        mutex_unlock(&root->fs_info->volume_mutex);
1392        return ret;
1393}
1394
1395static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1396                                char *name, unsigned long fd, int subvol,
1397                                u64 *transid, bool readonly,
1398                                struct btrfs_qgroup_inherit **inherit)
1399{
1400        struct file *src_file;
1401        int namelen;
1402        int ret = 0;
1403
1404        ret = mnt_want_write_file(file);
1405        if (ret)
1406                goto out;
1407
1408        namelen = strlen(name);
1409        if (strchr(name, '/')) {
1410                ret = -EINVAL;
1411                goto out_drop_write;
1412        }
1413
1414        if (name[0] == '.' &&
1415           (namelen == 1 || (name[1] == '.' && namelen == 2))) {
1416                ret = -EEXIST;
1417                goto out_drop_write;
1418        }
1419
1420        if (subvol) {
1421                ret = btrfs_mksubvol(&file->f_path, name, namelen,
1422                                     NULL, transid, readonly, inherit);
1423        } else {
1424                struct inode *src_inode;
1425                src_file = fget(fd);
1426                if (!src_file) {
1427                        ret = -EINVAL;
1428                        goto out_drop_write;
1429                }
1430
1431                src_inode = src_file->f_path.dentry->d_inode;
1432                if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
1433                        printk(KERN_INFO "btrfs: Snapshot src from "
1434                               "another FS\n");
1435                        ret = -EINVAL;
1436                        fput(src_file);
1437                        goto out_drop_write;
1438                }
1439                ret = btrfs_mksubvol(&file->f_path, name, namelen,
1440                                     BTRFS_I(src_inode)->root,
1441                                     transid, readonly, inherit);
1442                fput(src_file);
1443        }
1444out_drop_write:
1445        mnt_drop_write_file(file);
1446out:
1447        return ret;
1448}
1449
1450static noinline int btrfs_ioctl_snap_create(struct file *file,
1451                                            void __user *arg, int subvol)
1452{
1453        struct btrfs_ioctl_vol_args *vol_args;
1454        int ret;
1455
1456        vol_args = memdup_user(arg, sizeof(*vol_args));
1457        if (IS_ERR(vol_args))
1458                return PTR_ERR(vol_args);
1459        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1460
1461        ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1462                                              vol_args->fd, subvol,
1463                                              NULL, false, NULL);
1464
1465        kfree(vol_args);
1466        return ret;
1467}
1468
1469static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1470                                               void __user *arg, int subvol)
1471{
1472        struct btrfs_ioctl_vol_args_v2 *vol_args;
1473        int ret;
1474        u64 transid = 0;
1475        u64 *ptr = NULL;
1476        bool readonly = false;
1477        struct btrfs_qgroup_inherit *inherit = NULL;
1478
1479        vol_args = memdup_user(arg, sizeof(*vol_args));
1480        if (IS_ERR(vol_args))
1481                return PTR_ERR(vol_args);
1482        vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
1483
1484        if (vol_args->flags &
1485            ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
1486              BTRFS_SUBVOL_QGROUP_INHERIT)) {
1487                ret = -EOPNOTSUPP;
1488                goto out;
1489        }
1490
1491        if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
1492                ptr = &transid;
1493        if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
1494                readonly = true;
1495        if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1496                if (vol_args->size > PAGE_CACHE_SIZE) {
1497                        ret = -EINVAL;
1498                        goto out;
1499                }
1500                inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1501                if (IS_ERR(inherit)) {
1502                        ret = PTR_ERR(inherit);
1503                        goto out;
1504                }
1505        }
1506
1507        ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1508                                              vol_args->fd, subvol, ptr,
1509                                              readonly, &inherit);
1510
1511        if (ret == 0 && ptr &&
1512            copy_to_user(arg +
1513                         offsetof(struct btrfs_ioctl_vol_args_v2,
1514                                  transid), ptr, sizeof(*ptr)))
1515                ret = -EFAULT;
1516out:
1517        kfree(vol_args);
1518        kfree(inherit);
1519        return ret;
1520}
1521
1522static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1523                                                void __user *arg)
1524{
1525        struct inode *inode = fdentry(file)->d_inode;
1526        struct btrfs_root *root = BTRFS_I(inode)->root;
1527        int ret = 0;
1528        u64 flags = 0;
1529
1530        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1531                return -EINVAL;
1532
1533        down_read(&root->fs_info->subvol_sem);
1534        if (btrfs_root_readonly(root))
1535                flags |= BTRFS_SUBVOL_RDONLY;
1536        up_read(&root->fs_info->subvol_sem);
1537
1538        if (copy_to_user(arg, &flags, sizeof(flags)))
1539                ret = -EFAULT;
1540
1541        return ret;
1542}
1543
1544static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1545                                              void __user *arg)
1546{
1547        struct inode *inode = fdentry(file)->d_inode;
1548        struct btrfs_root *root = BTRFS_I(inode)->root;
1549        struct btrfs_trans_handle *trans;
1550        u64 root_flags;
1551        u64 flags;
1552        int ret = 0;
1553
1554        ret = mnt_want_write_file(file);
1555        if (ret)
1556                goto out;
1557
1558        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1559                ret = -EINVAL;
1560                goto out_drop_write;
1561        }
1562
1563        if (copy_from_user(&flags, arg, sizeof(flags))) {
1564                ret = -EFAULT;
1565                goto out_drop_write;
1566        }
1567
1568        if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
1569                ret = -EINVAL;
1570                goto out_drop_write;
1571        }
1572
1573        if (flags & ~BTRFS_SUBVOL_RDONLY) {
1574                ret = -EOPNOTSUPP;
1575                goto out_drop_write;
1576        }
1577
1578        if (!inode_owner_or_capable(inode)) {
1579                ret = -EACCES;
1580                goto out_drop_write;
1581        }
1582
1583        down_write(&root->fs_info->subvol_sem);
1584
1585        /* nothing to do */
1586        if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
1587                goto out_drop_sem;
1588
1589        root_flags = btrfs_root_flags(&root->root_item);
1590        if (flags & BTRFS_SUBVOL_RDONLY)
1591                btrfs_set_root_flags(&root->root_item,
1592                                     root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
1593        else
1594                btrfs_set_root_flags(&root->root_item,
1595                                     root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1596
1597        trans = btrfs_start_transaction(root, 1);
1598        if (IS_ERR(trans)) {
1599                ret = PTR_ERR(trans);
1600                goto out_reset;
1601        }
1602
1603        ret = btrfs_update_root(trans, root->fs_info->tree_root,
1604                                &root->root_key, &root->root_item);
1605
1606        btrfs_commit_transaction(trans, root);
1607out_reset:
1608        if (ret)
1609                btrfs_set_root_flags(&root->root_item, root_flags);
1610out_drop_sem:
1611        up_write(&root->fs_info->subvol_sem);
1612out_drop_write:
1613        mnt_drop_write_file(file);
1614out:
1615        return ret;
1616}
1617
1618/*
1619 * helper to check if the subvolume references other subvolumes
1620 */
1621static noinline int may_destroy_subvol(struct btrfs_root *root)
1622{
1623        struct btrfs_path *path;
1624        struct btrfs_key key;
1625        int ret;
1626
1627        path = btrfs_alloc_path();
1628        if (!path)
1629                return -ENOMEM;
1630
1631        key.objectid = root->root_key.objectid;
1632        key.type = BTRFS_ROOT_REF_KEY;
1633        key.offset = (u64)-1;
1634
1635        ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
1636                                &key, path, 0, 0);
1637        if (ret < 0)
1638                goto out;
1639        BUG_ON(ret == 0);
1640
1641        ret = 0;
1642        if (path->slots[0] > 0) {
1643                path->slots[0]--;
1644                btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1645                if (key.objectid == root->root_key.objectid &&
1646                    key.type == BTRFS_ROOT_REF_KEY)
1647                        ret = -ENOTEMPTY;
1648        }
1649out:
1650        btrfs_free_path(path);
1651        return ret;
1652}
1653
1654static noinline int key_in_sk(struct btrfs_key *key,
1655                              struct btrfs_ioctl_search_key *sk)
1656{
1657        struct btrfs_key test;
1658        int ret;
1659
1660        test.objectid = sk->min_objectid;
1661        test.type = sk->min_type;
1662        test.offset = sk->min_offset;
1663
1664        ret = btrfs_comp_cpu_keys(key, &test);
1665        if (ret < 0)
1666                return 0;
1667
1668        test.objectid = sk->max_objectid;
1669        test.type = sk->max_type;
1670        test.offset = sk->max_offset;
1671
1672        ret = btrfs_comp_cpu_keys(key, &test);
1673        if (ret > 0)
1674                return 0;
1675        return 1;
1676}
1677
1678static noinline int copy_to_sk(struct btrfs_root *root,
1679                               struct btrfs_path *path,
1680                               struct btrfs_key *key,
1681                               struct btrfs_ioctl_search_key *sk,
1682                               char *buf,
1683                               unsigned long *sk_offset,
1684                               int *num_found)
1685{
1686        u64 found_transid;
1687        struct extent_buffer *leaf;
1688        struct btrfs_ioctl_search_header sh;
1689        unsigned long item_off;
1690        unsigned long item_len;
1691        int nritems;
1692        int i;
1693        int slot;
1694        int ret = 0;
1695
1696        leaf = path->nodes[0];
1697        slot = path->slots[0];
1698        nritems = btrfs_header_nritems(leaf);
1699
1700        if (btrfs_header_generation(leaf) > sk->max_transid) {
1701                i = nritems;
1702                goto advance_key;
1703        }
1704        found_transid = btrfs_header_generation(leaf);
1705
1706        for (i = slot; i < nritems; i++) {
1707                item_off = btrfs_item_ptr_offset(leaf, i);
1708                item_len = btrfs_item_size_nr(leaf, i);
1709
1710                if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
1711                        item_len = 0;
1712
1713                if (sizeof(sh) + item_len + *sk_offset >
1714                    BTRFS_SEARCH_ARGS_BUFSIZE) {
1715                        ret = 1;
1716                        goto overflow;
1717                }
1718
1719                btrfs_item_key_to_cpu(leaf, key, i);
1720                if (!key_in_sk(key, sk))
1721                        continue;
1722
1723                sh.objectid = key->objectid;
1724                sh.offset = key->offset;
1725                sh.type = key->type;
1726                sh.len = item_len;
1727                sh.transid = found_transid;
1728
1729                /* copy search result header */
1730                memcpy(buf + *sk_offset, &sh, sizeof(sh));
1731                *sk_offset += sizeof(sh);
1732
1733                if (item_len) {
1734                        char *p = buf + *sk_offset;
1735                        /* copy the item */
1736                        read_extent_buffer(leaf, p,
1737                                           item_off, item_len);
1738                        *sk_offset += item_len;
1739                }
1740                (*num_found)++;
1741
1742                if (*num_found >= sk->nr_items)
1743                        break;
1744        }
1745advance_key:
1746        ret = 0;
1747        if (key->offset < (u64)-1 && key->offset < sk->max_offset)
1748                key->offset++;
1749        else if (key->type < (u8)-1 && key->type < sk->max_type) {
1750                key->offset = 0;
1751                key->type++;
1752        } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
1753                key->offset = 0;
1754                key->type = 0;
1755                key->objectid++;
1756        } else
1757                ret = 1;
1758overflow:
1759        return ret;
1760}
1761
1762static noinline int search_ioctl(struct inode *inode,
1763                                 struct btrfs_ioctl_search_args *args)
1764{
1765        struct btrfs_root *root;
1766        struct btrfs_key key;
1767        struct btrfs_key max_key;
1768        struct btrfs_path *path;
1769        struct btrfs_ioctl_search_key *sk = &args->key;
1770        struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
1771        int ret;
1772        int num_found = 0;
1773        unsigned long sk_offset = 0;
1774
1775        path = btrfs_alloc_path();
1776        if (!path)
1777                return -ENOMEM;
1778
1779        if (sk->tree_id == 0) {
1780                /* search the root of the inode that was passed */
1781                root = BTRFS_I(inode)->root;
1782        } else {
1783                key.objectid = sk->tree_id;
1784                key.type = BTRFS_ROOT_ITEM_KEY;
1785                key.offset = (u64)-1;
1786                root = btrfs_read_fs_root_no_name(info, &key);
1787                if (IS_ERR(root)) {
1788                        printk(KERN_ERR "could not find root %llu\n",
1789                               sk->tree_id);
1790                        btrfs_free_path(path);
1791                        return -ENOENT;
1792                }
1793        }
1794
1795        key.objectid = sk->min_objectid;
1796        key.type = sk->min_type;
1797        key.offset = sk->min_offset;
1798
1799        max_key.objectid = sk->max_objectid;
1800        max_key.type = sk->max_type;
1801        max_key.offset = sk->max_offset;
1802
1803        path->keep_locks = 1;
1804
1805        while(1) {
1806                ret = btrfs_search_forward(root, &key, &max_key, path, 0,
1807                                           sk->min_transid);
1808                if (ret != 0) {
1809                        if (ret > 0)
1810                                ret = 0;
1811                        goto err;
1812                }
1813                ret = copy_to_sk(root, path, &key, sk, args->buf,
1814                                 &sk_offset, &num_found);
1815                btrfs_release_path(path);
1816                if (ret || num_found >= sk->nr_items)
1817                        break;
1818
1819        }
1820        ret = 0;
1821err:
1822        sk->nr_items = num_found;
1823        btrfs_free_path(path);
1824        return ret;
1825}
1826
1827static noinline int btrfs_ioctl_tree_search(struct file *file,
1828                                           void __user *argp)
1829{
1830         struct btrfs_ioctl_search_args *args;
1831         struct inode *inode;
1832         int ret;
1833
1834        if (!capable(CAP_SYS_ADMIN))
1835                return -EPERM;
1836
1837        args = memdup_user(argp, sizeof(*args));
1838        if (IS_ERR(args))
1839                return PTR_ERR(args);
1840
1841        inode = fdentry(file)->d_inode;
1842        ret = search_ioctl(inode, args);
1843        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1844                ret = -EFAULT;
1845        kfree(args);
1846        return ret;
1847}
1848
1849/*
1850 * Search INODE_REFs to identify path name of 'dirid' directory
1851 * in a 'tree_id' tree. and sets path name to 'name'.
1852 */
1853static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1854                                u64 tree_id, u64 dirid, char *name)
1855{
1856        struct btrfs_root *root;
1857        struct btrfs_key key;
1858        char *ptr;
1859        int ret = -1;
1860        int slot;
1861        int len;
1862        int total_len = 0;
1863        struct btrfs_inode_ref *iref;
1864        struct extent_buffer *l;
1865        struct btrfs_path *path;
1866
1867        if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1868                name[0]='\0';
1869                return 0;
1870        }
1871
1872        path = btrfs_alloc_path();
1873        if (!path)
1874                return -ENOMEM;
1875
1876        ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
1877
1878        key.objectid = tree_id;
1879        key.type = BTRFS_ROOT_ITEM_KEY;
1880        key.offset = (u64)-1;
1881        root = btrfs_read_fs_root_no_name(info, &key);
1882        if (IS_ERR(root)) {
1883                printk(KERN_ERR "could not find root %llu\n", tree_id);
1884                ret = -ENOENT;
1885                goto out;
1886        }
1887
1888        key.objectid = dirid;
1889        key.type = BTRFS_INODE_REF_KEY;
1890        key.offset = (u64)-1;
1891
1892        while(1) {
1893                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1894                if (ret < 0)
1895                        goto out;
1896
1897                l = path->nodes[0];
1898                slot = path->slots[0];
1899                if (ret > 0 && slot > 0)
1900                        slot--;
1901                btrfs_item_key_to_cpu(l, &key, slot);
1902
1903                if (ret > 0 && (key.objectid != dirid ||
1904                                key.type != BTRFS_INODE_REF_KEY)) {
1905                        ret = -ENOENT;
1906                        goto out;
1907                }
1908
1909                iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1910                len = btrfs_inode_ref_name_len(l, iref);
1911                ptr -= len + 1;
1912                total_len += len + 1;
1913                if (ptr < name)
1914                        goto out;
1915
1916                *(ptr + len) = '/';
1917                read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
1918
1919                if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1920                        break;
1921
1922                btrfs_release_path(path);
1923                key.objectid = key.offset;
1924                key.offset = (u64)-1;
1925                dirid = key.objectid;
1926        }
1927        if (ptr < name)
1928                goto out;
1929        memmove(name, ptr, total_len);
1930        name[total_len]='\0';
1931        ret = 0;
1932out:
1933        btrfs_free_path(path);
1934        return ret;
1935}
1936
1937static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1938                                           void __user *argp)
1939{
1940         struct btrfs_ioctl_ino_lookup_args *args;
1941         struct inode *inode;
1942         int ret;
1943
1944        if (!capable(CAP_SYS_ADMIN))
1945                return -EPERM;
1946
1947        args = memdup_user(argp, sizeof(*args));
1948        if (IS_ERR(args))
1949                return PTR_ERR(args);
1950
1951        inode = fdentry(file)->d_inode;
1952
1953        if (args->treeid == 0)
1954                args->treeid = BTRFS_I(inode)->root->root_key.objectid;
1955
1956        ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
1957                                        args->treeid, args->objectid,
1958                                        args->name);
1959
1960        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1961                ret = -EFAULT;
1962
1963        kfree(args);
1964        return ret;
1965}
1966
1967static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1968                                             void __user *arg)
1969{
1970        struct dentry *parent = fdentry(file);
1971        struct dentry *dentry;
1972        struct inode *dir = parent->d_inode;
1973        struct inode *inode;
1974        struct btrfs_root *root = BTRFS_I(dir)->root;
1975        struct btrfs_root *dest = NULL;
1976        struct btrfs_ioctl_vol_args *vol_args;
1977        struct btrfs_trans_handle *trans;
1978        int namelen;
1979        int ret;
1980        int err = 0;
1981
1982        vol_args = memdup_user(arg, sizeof(*vol_args));
1983        if (IS_ERR(vol_args))
1984                return PTR_ERR(vol_args);
1985
1986        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1987        namelen = strlen(vol_args->name);
1988        if (strchr(vol_args->name, '/') ||
1989            strncmp(vol_args->name, "..", namelen) == 0) {
1990                err = -EINVAL;
1991                goto out;
1992        }
1993
1994        err = mnt_want_write_file(file);
1995        if (err)
1996                goto out;
1997
1998        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
1999        dentry = lookup_one_len(vol_args->name, parent, namelen);
2000        if (IS_ERR(dentry)) {
2001                err = PTR_ERR(dentry);
2002                goto out_unlock_dir;
2003        }
2004
2005        if (!dentry->d_inode) {
2006                err = -ENOENT;
2007                goto out_dput;
2008        }
2009
2010        inode = dentry->d_inode;
2011        dest = BTRFS_I(inode)->root;
2012        if (!capable(CAP_SYS_ADMIN)){
2013                /*
2014                 * Regular user.  Only allow this with a special mount
2015                 * option, when the user has write+exec access to the
2016                 * subvol root, and when rmdir(2) would have been
2017                 * allowed.
2018                 *
2019                 * Note that this is _not_ check that the subvol is
2020                 * empty or doesn't contain data that we wouldn't
2021                 * otherwise be able to delete.
2022                 *
2023                 * Users who want to delete empty subvols should try
2024                 * rmdir(2).
2025                 */
2026                err = -EPERM;
2027                if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
2028                        goto out_dput;
2029
2030                /*
2031                 * Do not allow deletion if the parent dir is the same
2032                 * as the dir to be deleted.  That means the ioctl
2033                 * must be called on the dentry referencing the root
2034                 * of the subvol, not a random directory contained
2035                 * within it.
2036                 */
2037                err = -EINVAL;
2038                if (root == dest)
2039                        goto out_dput;
2040
2041                err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
2042                if (err)
2043                        goto out_dput;
2044
2045                /* check if subvolume may be deleted by a non-root user */
2046                err = btrfs_may_delete(dir, dentry, 1);
2047                if (err)
2048                        goto out_dput;
2049        }
2050
2051        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
2052                err = -EINVAL;
2053                goto out_dput;
2054        }
2055
2056        mutex_lock(&inode->i_mutex);
2057        err = d_invalidate(dentry);
2058        if (err)
2059                goto out_unlock;
2060
2061        down_write(&root->fs_info->subvol_sem);
2062
2063        err = may_destroy_subvol(dest);
2064        if (err)
2065                goto out_up_write;
2066
2067        trans = btrfs_start_transaction(root, 0);
2068        if (IS_ERR(trans)) {
2069                err = PTR_ERR(trans);
2070                goto out_up_write;
2071        }
2072        trans->block_rsv = &root->fs_info->global_block_rsv;
2073
2074        ret = btrfs_unlink_subvol(trans, root, dir,
2075                                dest->root_key.objectid,
2076                                dentry->d_name.name,
2077                                dentry->d_name.len);
2078        if (ret) {
2079                err = ret;
2080                btrfs_abort_transaction(trans, root, ret);
2081                goto out_end_trans;
2082        }
2083
2084        btrfs_record_root_in_trans(trans, dest);
2085
2086        memset(&dest->root_item.drop_progress, 0,
2087                sizeof(dest->root_item.drop_progress));
2088        dest->root_item.drop_level = 0;
2089        btrfs_set_root_refs(&dest->root_item, 0);
2090
2091        if (!xchg(&dest->orphan_item_inserted, 1)) {
2092                ret = btrfs_insert_orphan_item(trans,
2093                                        root->fs_info->tree_root,
2094                                        dest->root_key.objectid);
2095                if (ret) {
2096                        btrfs_abort_transaction(trans, root, ret);
2097                        err = ret;
2098                        goto out_end_trans;
2099                }
2100        }
2101out_end_trans:
2102        ret = btrfs_end_transaction(trans, root);
2103        if (ret && !err)
2104                err = ret;
2105        inode->i_flags |= S_DEAD;
2106out_up_write:
2107        up_write(&root->fs_info->subvol_sem);
2108out_unlock:
2109        mutex_unlock(&inode->i_mutex);
2110        if (!err) {
2111                shrink_dcache_sb(root->fs_info->sb);
2112                btrfs_invalidate_inodes(dest);
2113                d_delete(dentry);
2114        }
2115out_dput:
2116        dput(dentry);
2117out_unlock_dir:
2118        mutex_unlock(&dir->i_mutex);
2119        mnt_drop_write_file(file);
2120out:
2121        kfree(vol_args);
2122        return err;
2123}
2124
2125static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2126{
2127        struct inode *inode = fdentry(file)->d_inode;
2128        struct btrfs_root *root = BTRFS_I(inode)->root;
2129        struct btrfs_ioctl_defrag_range_args *range;
2130        int ret;
2131
2132        if (btrfs_root_readonly(root))
2133                return -EROFS;
2134
2135        ret = mnt_want_write_file(file);
2136        if (ret)
2137                return ret;
2138
2139        switch (inode->i_mode & S_IFMT) {
2140        case S_IFDIR:
2141                if (!capable(CAP_SYS_ADMIN)) {
2142                        ret = -EPERM;
2143                        goto out;
2144                }
2145                ret = btrfs_defrag_root(root, 0);
2146                if (ret)
2147                        goto out;
2148                ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
2149                break;
2150        case S_IFREG:
2151                if (!(file->f_mode & FMODE_WRITE)) {
2152                        ret = -EINVAL;
2153                        goto out;
2154                }
2155
2156                range = kzalloc(sizeof(*range), GFP_KERNEL);
2157                if (!range) {
2158                        ret = -ENOMEM;
2159                        goto out;
2160                }
2161
2162                if (argp) {
2163                        if (copy_from_user(range, argp,
2164                                           sizeof(*range))) {
2165                                ret = -EFAULT;
2166                                kfree(range);
2167                                goto out;
2168                        }
2169                        /* compression requires us to start the IO */
2170                        if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
2171                                range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
2172                                range->extent_thresh = (u32)-1;
2173                        }
2174                } else {
2175                        /* the rest are all set to zero by kzalloc */
2176                        range->len = (u64)-1;
2177                }
2178                ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
2179                                        range, 0, 0);
2180                if (ret > 0)
2181                        ret = 0;
2182                kfree(range);
2183                break;
2184        default:
2185                ret = -EINVAL;
2186        }
2187out:
2188        mnt_drop_write_file(file);
2189        return ret;
2190}
2191
2192static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2193{
2194        struct btrfs_ioctl_vol_args *vol_args;
2195        int ret;
2196
2197        if (!capable(CAP_SYS_ADMIN))
2198                return -EPERM;
2199
2200        mutex_lock(&root->fs_info->volume_mutex);
2201        if (root->fs_info->balance_ctl) {
2202                printk(KERN_INFO "btrfs: balance in progress\n");
2203                ret = -EINVAL;
2204                goto out;
2205        }
2206
2207        vol_args = memdup_user(arg, sizeof(*vol_args));
2208        if (IS_ERR(vol_args)) {
2209                ret = PTR_ERR(vol_args);
2210                goto out;
2211        }
2212
2213        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2214        ret = btrfs_init_new_device(root, vol_args->name);
2215
2216        kfree(vol_args);
2217out:
2218        mutex_unlock(&root->fs_info->volume_mutex);
2219        return ret;
2220}
2221
2222static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2223{
2224        struct btrfs_ioctl_vol_args *vol_args;
2225        int ret;
2226
2227        if (!capable(CAP_SYS_ADMIN))
2228                return -EPERM;
2229
2230        if (root->fs_info->sb->s_flags & MS_RDONLY)
2231                return -EROFS;
2232
2233        mutex_lock(&root->fs_info->volume_mutex);
2234        if (root->fs_info->balance_ctl) {
2235                printk(KERN_INFO "btrfs: balance in progress\n");
2236                ret = -EINVAL;
2237                goto out;
2238        }
2239
2240        vol_args = memdup_user(arg, sizeof(*vol_args));
2241        if (IS_ERR(vol_args)) {
2242                ret = PTR_ERR(vol_args);
2243                goto out;
2244        }
2245
2246        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2247        ret = btrfs_rm_device(root, vol_args->name);
2248
2249        kfree(vol_args);
2250out:
2251        mutex_unlock(&root->fs_info->volume_mutex);
2252        return ret;
2253}
2254
2255static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2256{
2257        struct btrfs_ioctl_fs_info_args *fi_args;
2258        struct btrfs_device *device;
2259        struct btrfs_device *next;
2260        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2261        int ret = 0;
2262
2263        if (!capable(CAP_SYS_ADMIN))
2264                return -EPERM;
2265
2266        fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2267        if (!fi_args)
2268                return -ENOMEM;
2269
2270        fi_args->num_devices = fs_devices->num_devices;
2271        memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2272
2273        mutex_lock(&fs_devices->device_list_mutex);
2274        list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2275                if (device->devid > fi_args->max_id)
2276                        fi_args->max_id = device->devid;
2277        }
2278        mutex_unlock(&fs_devices->device_list_mutex);
2279
2280        if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2281                ret = -EFAULT;
2282
2283        kfree(fi_args);
2284        return ret;
2285}
2286
2287static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2288{
2289        struct btrfs_ioctl_dev_info_args *di_args;
2290        struct btrfs_device *dev;
2291        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2292        int ret = 0;
2293        char *s_uuid = NULL;
2294        char empty_uuid[BTRFS_UUID_SIZE] = {0};
2295
2296        if (!capable(CAP_SYS_ADMIN))
2297                return -EPERM;
2298
2299        di_args = memdup_user(arg, sizeof(*di_args));
2300        if (IS_ERR(di_args))
2301                return PTR_ERR(di_args);
2302
2303        if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
2304                s_uuid = di_args->uuid;
2305
2306        mutex_lock(&fs_devices->device_list_mutex);
2307        dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
2308        mutex_unlock(&fs_devices->device_list_mutex);
2309
2310        if (!dev) {
2311                ret = -ENODEV;
2312                goto out;
2313        }
2314
2315        di_args->devid = dev->devid;
2316        di_args->bytes_used = dev->bytes_used;
2317        di_args->total_bytes = dev->total_bytes;
2318        memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2319        if (dev->name) {
2320                struct rcu_string *name;
2321
2322                rcu_read_lock();
2323                name = rcu_dereference(dev->name);
2324                strncpy(di_args->path, name->str, sizeof(di_args->path));
2325                rcu_read_unlock();
2326                di_args->path[sizeof(di_args->path) - 1] = 0;
2327        } else {
2328                di_args->path[0] = '\0';
2329        }
2330
2331out:
2332        if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2333                ret = -EFAULT;
2334
2335        kfree(di_args);
2336        return ret;
2337}
2338
2339static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2340                                       u64 off, u64 olen, u64 destoff)
2341{
2342        struct inode *inode = fdentry(file)->d_inode;
2343        struct btrfs_root *root = BTRFS_I(inode)->root;
2344        struct file *src_file;
2345        struct inode *src;
2346        struct btrfs_trans_handle *trans;
2347        struct btrfs_path *path;
2348        struct extent_buffer *leaf;
2349        char *buf;
2350        struct btrfs_key key;
2351        u32 nritems;
2352        int slot;
2353        int ret;
2354        u64 len = olen;
2355        u64 bs = root->fs_info->sb->s_blocksize;
2356        u64 hint_byte;
2357
2358        /*
2359         * TODO:
2360         * - split compressed inline extents.  annoying: we need to
2361         *   decompress into destination's address_space (the file offset
2362         *   may change, so source mapping won't do), then recompress (or
2363         *   otherwise reinsert) a subrange.
2364         * - allow ranges within the same file to be cloned (provided
2365         *   they don't overlap)?
2366         */
2367
2368        /* the destination must be opened for writing */
2369        if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
2370                return -EINVAL;
2371
2372        if (btrfs_root_readonly(root))
2373                return -EROFS;
2374
2375        ret = mnt_want_write_file(file);
2376        if (ret)
2377                return ret;
2378
2379        src_file = fget(srcfd);
2380        if (!src_file) {
2381                ret = -EBADF;
2382                goto out_drop_write;
2383        }
2384
2385        ret = -EXDEV;
2386        if (src_file->f_path.mnt != file->f_path.mnt)
2387                goto out_fput;
2388
2389        src = src_file->f_dentry->d_inode;
2390
2391        ret = -EINVAL;
2392        if (src == inode)
2393                goto out_fput;
2394
2395        /* the src must be open for reading */
2396        if (!(src_file->f_mode & FMODE_READ))
2397                goto out_fput;
2398
2399        /* don't make the dst file partly checksummed */
2400        if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
2401            (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
2402                goto out_fput;
2403
2404        ret = -EISDIR;
2405        if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
2406                goto out_fput;
2407
2408        ret = -EXDEV;
2409        if (src->i_sb != inode->i_sb)
2410                goto out_fput;
2411
2412        ret = -ENOMEM;
2413        buf = vmalloc(btrfs_level_size(root, 0));
2414        if (!buf)
2415                goto out_fput;
2416
2417        path = btrfs_alloc_path();
2418        if (!path) {
2419                vfree(buf);
2420                goto out_fput;
2421        }
2422        path->reada = 2;
2423
2424        if (inode < src) {
2425                mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
2426                mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
2427        } else {
2428                mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
2429                mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2430        }
2431
2432        /* determine range to clone */
2433        ret = -EINVAL;
2434        if (off + len > src->i_size || off + len < off)
2435                goto out_unlock;
2436        if (len == 0)
2437                olen = len = src->i_size - off;
2438        /* if we extend to eof, continue to block boundary */
2439        if (off + len == src->i_size)
2440                len = ALIGN(src->i_size, bs) - off;
2441
2442        /* verify the end result is block aligned */
2443        if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
2444            !IS_ALIGNED(destoff, bs))
2445                goto out_unlock;
2446
2447        if (destoff > inode->i_size) {
2448                ret = btrfs_cont_expand(inode, inode->i_size, destoff);
2449                if (ret)
2450                        goto out_unlock;
2451        }
2452
2453        /* truncate page cache pages from target inode range */
2454        truncate_inode_pages_range(&inode->i_data, destoff,
2455                                   PAGE_CACHE_ALIGN(destoff + len) - 1);
2456
2457        /* do any pending delalloc/csum calc on src, one way or
2458           another, and lock file content */
2459        while (1) {
2460                struct btrfs_ordered_extent *ordered;
2461                lock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2462                ordered = btrfs_lookup_first_ordered_extent(src, off+len);
2463                if (!ordered &&
2464                    !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
2465                                   EXTENT_DELALLOC, 0, NULL))
2466                        break;
2467                unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2468                if (ordered)
2469                        btrfs_put_ordered_extent(ordered);
2470                btrfs_wait_ordered_range(src, off, len);
2471        }
2472
2473        /* clone data */
2474        key.objectid = btrfs_ino(src);
2475        key.type = BTRFS_EXTENT_DATA_KEY;
2476        key.offset = 0;
2477
2478        while (1) {
2479                /*
2480                 * note the key will change type as we walk through the
2481                 * tree.
2482                 */
2483                ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
2484                                0, 0);
2485                if (ret < 0)
2486                        goto out;
2487
2488                nritems = btrfs_header_nritems(path->nodes[0]);
2489                if (path->slots[0] >= nritems) {
2490                        ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
2491                        if (ret < 0)
2492                                goto out;
2493                        if (ret > 0)
2494                                break;
2495                        nritems = btrfs_header_nritems(path->nodes[0]);
2496                }
2497                leaf = path->nodes[0];
2498                slot = path->slots[0];
2499
2500                btrfs_item_key_to_cpu(leaf, &key, slot);
2501                if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
2502                    key.objectid != btrfs_ino(src))
2503                        break;
2504
2505                if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
2506                        struct btrfs_file_extent_item *extent;
2507                        int type;
2508                        u32 size;
2509                        struct btrfs_key new_key;
2510                        u64 disko = 0, diskl = 0;
2511                        u64 datao = 0, datal = 0;
2512                        u8 comp;
2513                        u64 endoff;
2514
2515                        size = btrfs_item_size_nr(leaf, slot);
2516                        read_extent_buffer(leaf, buf,
2517                                           btrfs_item_ptr_offset(leaf, slot),
2518                                           size);
2519
2520                        extent = btrfs_item_ptr(leaf, slot,
2521                                                struct btrfs_file_extent_item);
2522                        comp = btrfs_file_extent_compression(leaf, extent);
2523                        type = btrfs_file_extent_type(leaf, extent);
2524                        if (type == BTRFS_FILE_EXTENT_REG ||
2525                            type == BTRFS_FILE_EXTENT_PREALLOC) {
2526                                disko = btrfs_file_extent_disk_bytenr(leaf,
2527                                                                      extent);
2528                                diskl = btrfs_file_extent_disk_num_bytes(leaf,
2529                                                                 extent);
2530                                datao = btrfs_file_extent_offset(leaf, extent);
2531                                datal = btrfs_file_extent_num_bytes(leaf,
2532                                                                    extent);
2533                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2534                                /* take upper bound, may be compressed */
2535                                datal = btrfs_file_extent_ram_bytes(leaf,
2536                                                                    extent);
2537                        }
2538                        btrfs_release_path(path);
2539
2540                        if (key.offset + datal <= off ||
2541                            key.offset >= off+len)
2542                                goto next;
2543
2544                        memcpy(&new_key, &key, sizeof(new_key));
2545                        new_key.objectid = btrfs_ino(inode);
2546                        if (off <= key.offset)
2547                                new_key.offset = key.offset + destoff - off;
2548                        else
2549                                new_key.offset = destoff;
2550
2551                        /*
2552                         * 1 - adjusting old extent (we may have to split it)
2553                         * 1 - add new extent
2554                         * 1 - inode update
2555                         */
2556                        trans = btrfs_start_transaction(root, 3);
2557                        if (IS_ERR(trans)) {
2558                                ret = PTR_ERR(trans);
2559                                goto out;
2560                        }
2561
2562                        if (type == BTRFS_FILE_EXTENT_REG ||
2563                            type == BTRFS_FILE_EXTENT_PREALLOC) {
2564                                /*
2565                                 *    a  | --- range to clone ---|  b
2566                                 * | ------------- extent ------------- |
2567                                 */
2568
2569                                /* substract range b */
2570                                if (key.offset + datal > off + len)
2571                                        datal = off + len - key.offset;
2572
2573                                /* substract range a */
2574                                if (off > key.offset) {
2575                                        datao += off - key.offset;
2576                                        datal -= off - key.offset;
2577                                }
2578
2579                                ret = btrfs_drop_extents(trans, inode,
2580                                                         new_key.offset,
2581                                                         new_key.offset + datal,
2582                                                         &hint_byte, 1);
2583                                if (ret) {
2584                                        btrfs_abort_transaction(trans, root,
2585                                                                ret);
2586                                        btrfs_end_transaction(trans, root);
2587                                        goto out;
2588                                }
2589
2590                                ret = btrfs_insert_empty_item(trans, root, path,
2591                                                              &new_key, size);
2592                                if (ret) {
2593                                        btrfs_abort_transaction(trans, root,
2594                                                                ret);
2595                                        btrfs_end_transaction(trans, root);
2596                                        goto out;
2597                                }
2598
2599                                leaf = path->nodes[0];
2600                                slot = path->slots[0];
2601                                write_extent_buffer(leaf, buf,
2602                                            btrfs_item_ptr_offset(leaf, slot),
2603                                            size);
2604
2605                                extent = btrfs_item_ptr(leaf, slot,
2606                                                struct btrfs_file_extent_item);
2607
2608                                /* disko == 0 means it's a hole */
2609                                if (!disko)
2610                                        datao = 0;
2611
2612                                btrfs_set_file_extent_offset(leaf, extent,
2613                                                             datao);
2614                                btrfs_set_file_extent_num_bytes(leaf, extent,
2615                                                                datal);
2616                                if (disko) {
2617                                        inode_add_bytes(inode, datal);
2618                                        ret = btrfs_inc_extent_ref(trans, root,
2619                                                        disko, diskl, 0,
2620                                                        root->root_key.objectid,
2621                                                        btrfs_ino(inode),
2622                                                        new_key.offset - datao,
2623                                                        0);
2624                                        if (ret) {
2625                                                btrfs_abort_transaction(trans,
2626                                                                        root,
2627                                                                        ret);
2628                                                btrfs_end_transaction(trans,
2629                                                                      root);
2630                                                goto out;
2631
2632                                        }
2633                                }
2634                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2635                                u64 skip = 0;
2636                                u64 trim = 0;
2637                                if (off > key.offset) {
2638                                        skip = off - key.offset;
2639                                        new_key.offset += skip;
2640                                }
2641
2642                                if (key.offset + datal > off+len)
2643                                        trim = key.offset + datal - (off+len);
2644
2645                                if (comp && (skip || trim)) {
2646                                        ret = -EINVAL;
2647                                        btrfs_end_transaction(trans, root);
2648                                        goto out;
2649                                }
2650                                size -= skip + trim;
2651                                datal -= skip + trim;
2652
2653                                ret = btrfs_drop_extents(trans, inode,
2654                                                         new_key.offset,
2655                                                         new_key.offset + datal,
2656                                                         &hint_byte, 1);
2657                                if (ret) {
2658                                        btrfs_abort_transaction(trans, root,
2659                                                                ret);
2660                                        btrfs_end_transaction(trans, root);
2661                                        goto out;
2662                                }
2663
2664                                ret = btrfs_insert_empty_item(trans, root, path,
2665                                                              &new_key, size);
2666                                if (ret) {
2667                                        btrfs_abort_transaction(trans, root,
2668                                                                ret);
2669                                        btrfs_end_transaction(trans, root);
2670                                        goto out;
2671                                }
2672
2673                                if (skip) {
2674                                        u32 start =
2675                                          btrfs_file_extent_calc_inline_size(0);
2676                                        memmove(buf+start, buf+start+skip,
2677                                                datal);
2678                                }
2679
2680                                leaf = path->nodes[0];
2681                                slot = path->slots[0];
2682                                write_extent_buffer(leaf, buf,
2683                                            btrfs_item_ptr_offset(leaf, slot),
2684                                            size);
2685                                inode_add_bytes(inode, datal);
2686                        }
2687
2688                        btrfs_mark_buffer_dirty(leaf);
2689                        btrfs_release_path(path);
2690
2691                        inode_inc_iversion(inode);
2692                        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2693
2694                        /*
2695                         * we round up to the block size at eof when
2696                         * determining which extents to clone above,
2697                         * but shouldn't round up the file size
2698                         */
2699                        endoff = new_key.offset + datal;
2700                        if (endoff > destoff+olen)
2701                                endoff = destoff+olen;
2702                        if (endoff > inode->i_size)
2703                                btrfs_i_size_write(inode, endoff);
2704
2705                        ret = btrfs_update_inode(trans, root, inode);
2706                        if (ret) {
2707                                btrfs_abort_transaction(trans, root, ret);
2708                                btrfs_end_transaction(trans, root);
2709                                goto out;
2710                        }
2711                        ret = btrfs_end_transaction(trans, root);
2712                }
2713next:
2714                btrfs_release_path(path);
2715                key.offset++;
2716        }
2717        ret = 0;
2718out:
2719        btrfs_release_path(path);
2720        unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
2721out_unlock:
2722        mutex_unlock(&src->i_mutex);
2723        mutex_unlock(&inode->i_mutex);
2724        vfree(buf);
2725        btrfs_free_path(path);
2726out_fput:
2727        fput(src_file);
2728out_drop_write:
2729        mnt_drop_write_file(file);
2730        return ret;
2731}
2732
2733static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
2734{
2735        struct btrfs_ioctl_clone_range_args args;
2736
2737        if (copy_from_user(&args, argp, sizeof(args)))
2738                return -EFAULT;
2739        return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
2740                                 args.src_length, args.dest_offset);
2741}
2742
2743/*
2744 * there are many ways the trans_start and trans_end ioctls can lead
2745 * to deadlocks.  They should only be used by applications that
2746 * basically own the machine, and have a very in depth understanding
2747 * of all the possible deadlocks and enospc problems.
2748 */
2749static long btrfs_ioctl_trans_start(struct file *file)
2750{
2751        struct inode *inode = fdentry(file)->d_inode;
2752        struct btrfs_root *root = BTRFS_I(inode)->root;
2753        struct btrfs_trans_handle *trans;
2754        int ret;
2755
2756        ret = -EPERM;
2757        if (!capable(CAP_SYS_ADMIN))
2758                goto out;
2759
2760        ret = -EINPROGRESS;
2761        if (file->private_data)
2762                goto out;
2763
2764        ret = -EROFS;
2765        if (btrfs_root_readonly(root))
2766                goto out;
2767
2768        ret = mnt_want_write_file(file);
2769        if (ret)
2770                goto out;
2771
2772        atomic_inc(&root->fs_info->open_ioctl_trans);
2773
2774        ret = -ENOMEM;
2775        trans = btrfs_start_ioctl_transaction(root);
2776        if (IS_ERR(trans))
2777                goto out_drop;
2778
2779        file->private_data = trans;
2780        return 0;
2781
2782out_drop:
2783        atomic_dec(&root->fs_info->open_ioctl_trans);
2784        mnt_drop_write_file(file);
2785out:
2786        return ret;
2787}
2788
2789static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
2790{
2791        struct inode *inode = fdentry(file)->d_inode;
2792        struct btrfs_root *root = BTRFS_I(inode)->root;
2793        struct btrfs_root *new_root;
2794        struct btrfs_dir_item *di;
2795        struct btrfs_trans_handle *trans;
2796        struct btrfs_path *path;
2797        struct btrfs_key location;
2798        struct btrfs_disk_key disk_key;
2799        u64 objectid = 0;
2800        u64 dir_id;
2801
2802        if (!capable(CAP_SYS_ADMIN))
2803                return -EPERM;
2804
2805        if (copy_from_user(&objectid, argp, sizeof(objectid)))
2806                return -EFAULT;
2807
2808        if (!objectid)
2809                objectid = root->root_key.objectid;
2810
2811        location.objectid = objectid;
2812        location.type = BTRFS_ROOT_ITEM_KEY;
2813        location.offset = (u64)-1;
2814
2815        new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
2816        if (IS_ERR(new_root))
2817                return PTR_ERR(new_root);
2818
2819        if (btrfs_root_refs(&new_root->root_item) == 0)
2820                return -ENOENT;
2821
2822        path = btrfs_alloc_path();
2823        if (!path)
2824                return -ENOMEM;
2825        path->leave_spinning = 1;
2826
2827        trans = btrfs_start_transaction(root, 1);
2828        if (IS_ERR(trans)) {
2829                btrfs_free_path(path);
2830                return PTR_ERR(trans);
2831        }
2832
2833        dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
2834        di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
2835                                   dir_id, "default", 7, 1);
2836        if (IS_ERR_OR_NULL(di)) {
2837                btrfs_free_path(path);
2838                btrfs_end_transaction(trans, root);
2839                printk(KERN_ERR "Umm, you don't have the default dir item, "
2840                       "this isn't going to work\n");
2841                return -ENOENT;
2842        }
2843
2844        btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
2845        btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
2846        btrfs_mark_buffer_dirty(path->nodes[0]);
2847        btrfs_free_path(path);
2848
2849        btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
2850        btrfs_end_transaction(trans, root);
2851
2852        return 0;
2853}
2854
2855static void get_block_group_info(struct list_head *groups_list,
2856                                 struct btrfs_ioctl_space_info *space)
2857{
2858        struct btrfs_block_group_cache *block_group;
2859
2860        space->total_bytes = 0;
2861        space->used_bytes = 0;
2862        space->flags = 0;
2863        list_for_each_entry(block_group, groups_list, list) {
2864                space->flags = block_group->flags;
2865                space->total_bytes += block_group->key.offset;
2866                space->used_bytes +=
2867                        btrfs_block_group_used(&block_group->item);
2868        }
2869}
2870
2871long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2872{
2873        struct btrfs_ioctl_space_args space_args;
2874        struct btrfs_ioctl_space_info space;
2875        struct btrfs_ioctl_space_info *dest;
2876        struct btrfs_ioctl_space_info *dest_orig;
2877        struct btrfs_ioctl_space_info __user *user_dest;
2878        struct btrfs_space_info *info;
2879        u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2880                       BTRFS_BLOCK_GROUP_SYSTEM,
2881                       BTRFS_BLOCK_GROUP_METADATA,
2882                       BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2883        int num_types = 4;
2884        int alloc_size;
2885        int ret = 0;
2886        u64 slot_count = 0;
2887        int i, c;
2888
2889        if (copy_from_user(&space_args,
2890                           (struct btrfs_ioctl_space_args __user *)arg,
2891                           sizeof(space_args)))
2892                return -EFAULT;
2893
2894        for (i = 0; i < num_types; i++) {
2895                struct btrfs_space_info *tmp;
2896
2897                info = NULL;
2898                rcu_read_lock();
2899                list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2900                                        list) {
2901                        if (tmp->flags == types[i]) {
2902                                info = tmp;
2903                                break;
2904                        }
2905                }
2906                rcu_read_unlock();
2907
2908                if (!info)
2909                        continue;
2910
2911                down_read(&info->groups_sem);
2912                for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2913                        if (!list_empty(&info->block_groups[c]))
2914                                slot_count++;
2915                }
2916                up_read(&info->groups_sem);
2917        }
2918
2919        /* space_slots == 0 means they are asking for a count */
2920        if (space_args.space_slots == 0) {
2921                space_args.total_spaces = slot_count;
2922                goto out;
2923        }
2924
2925        slot_count = min_t(u64, space_args.space_slots, slot_count);
2926
2927        alloc_size = sizeof(*dest) * slot_count;
2928
2929        /* we generally have at most 6 or so space infos, one for each raid
2930         * level.  So, a whole page should be more than enough for everyone
2931         */
2932        if (alloc_size > PAGE_CACHE_SIZE)
2933                return -ENOMEM;
2934
2935        space_args.total_spaces = 0;
2936        dest = kmalloc(alloc_size, GFP_NOFS);
2937        if (!dest)
2938                return -ENOMEM;
2939        dest_orig = dest;
2940
2941        /* now we have a buffer to copy into */
2942        for (i = 0; i < num_types; i++) {
2943                struct btrfs_space_info *tmp;
2944
2945                if (!slot_count)
2946                        break;
2947
2948                info = NULL;
2949                rcu_read_lock();
2950                list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2951                                        list) {
2952                        if (tmp->flags == types[i]) {
2953                                info = tmp;
2954                                break;
2955                        }
2956                }
2957                rcu_read_unlock();
2958
2959                if (!info)
2960                        continue;
2961                down_read(&info->groups_sem);
2962                for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2963                        if (!list_empty(&info->block_groups[c])) {
2964                                get_block_group_info(&info->block_groups[c],
2965                                                     &space);
2966                                memcpy(dest, &space, sizeof(space));
2967                                dest++;
2968                                space_args.total_spaces++;
2969                                slot_count--;
2970                        }
2971                        if (!slot_count)
2972                                break;
2973                }
2974                up_read(&info->groups_sem);
2975        }
2976
2977        user_dest = (struct btrfs_ioctl_space_info __user *)
2978                (arg + sizeof(struct btrfs_ioctl_space_args));
2979
2980        if (copy_to_user(user_dest, dest_orig, alloc_size))
2981                ret = -EFAULT;
2982
2983        kfree(dest_orig);
2984out:
2985        if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
2986                ret = -EFAULT;
2987
2988        return ret;
2989}
2990
2991/*
2992 * there are many ways the trans_start and trans_end ioctls can lead
2993 * to deadlocks.  They should only be used by applications that
2994 * basically own the machine, and have a very in depth understanding
2995 * of all the possible deadlocks and enospc problems.
2996 */
2997long btrfs_ioctl_trans_end(struct file *file)
2998{
2999        struct inode *inode = fdentry(file)->d_inode;
3000        struct btrfs_root *root = BTRFS_I(inode)->root;
3001        struct btrfs_trans_handle *trans;
3002
3003        trans = file->private_data;
3004        if (!trans)
3005                return -EINVAL;
3006        file->private_data = NULL;
3007
3008        btrfs_end_transaction(trans, root);
3009
3010        atomic_dec(&root->fs_info->open_ioctl_trans);
3011
3012        mnt_drop_write_file(file);
3013        return 0;
3014}
3015
3016static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
3017{
3018        struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
3019        struct btrfs_trans_handle *trans;
3020        u64 transid;
3021        int ret;
3022
3023        trans = btrfs_start_transaction(root, 0);
3024        if (IS_ERR(trans))
3025                return PTR_ERR(trans);
3026        transid = trans->transid;
3027        ret = btrfs_commit_transaction_async(trans, root, 0);
3028        if (ret) {
3029                btrfs_end_transaction(trans, root);
3030                return ret;
3031        }
3032
3033        if (argp)
3034                if (copy_to_user(argp, &transid, sizeof(transid)))
3035                        return -EFAULT;
3036        return 0;
3037}
3038
3039static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
3040{
3041        struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
3042        u64 transid;
3043
3044        if (argp) {
3045                if (copy_from_user(&transid, argp, sizeof(transid)))
3046                        return -EFAULT;
3047        } else {
3048                transid = 0;  /* current trans */
3049        }
3050        return btrfs_wait_for_commit(root, transid);
3051}
3052
3053static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
3054{
3055        int ret;
3056        struct btrfs_ioctl_scrub_args *sa;
3057
3058        if (!capable(CAP_SYS_ADMIN))
3059                return -EPERM;
3060
3061        sa = memdup_user(arg, sizeof(*sa));
3062        if (IS_ERR(sa))
3063                return PTR_ERR(sa);
3064
3065        ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
3066                              &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
3067
3068        if (copy_to_user(arg, sa, sizeof(*sa)))
3069                ret = -EFAULT;
3070
3071        kfree(sa);
3072        return ret;
3073}
3074
3075static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
3076{
3077        if (!capable(CAP_SYS_ADMIN))
3078                return -EPERM;
3079
3080        return btrfs_scrub_cancel(root);
3081}
3082
3083static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
3084                                       void __user *arg)
3085{
3086        struct btrfs_ioctl_scrub_args *sa;
3087        int ret;
3088
3089        if (!capable(CAP_SYS_ADMIN))
3090                return -EPERM;
3091
3092        sa = memdup_user(arg, sizeof(*sa));
3093        if (IS_ERR(sa))
3094                return PTR_ERR(sa);
3095
3096        ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
3097
3098        if (copy_to_user(arg, sa, sizeof(*sa)))
3099                ret = -EFAULT;
3100
3101        kfree(sa);
3102        return ret;
3103}
3104
3105static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
3106                                      void __user *arg)
3107{
3108        struct btrfs_ioctl_get_dev_stats *sa;
3109        int ret;
3110
3111        sa = memdup_user(arg, sizeof(*sa));
3112        if (IS_ERR(sa))
3113                return PTR_ERR(sa);
3114
3115        if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
3116                kfree(sa);
3117                return -EPERM;
3118        }
3119
3120        ret = btrfs_get_dev_stats(root, sa);
3121
3122        if (copy_to_user(arg, sa, sizeof(*sa)))
3123                ret = -EFAULT;
3124
3125        kfree(sa);
3126        return ret;
3127}
3128
3129static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
3130{
3131        int ret = 0;
3132        int i;
3133        u64 rel_ptr;
3134        int size;
3135        struct btrfs_ioctl_ino_path_args *ipa = NULL;
3136        struct inode_fs_paths *ipath = NULL;
3137        struct btrfs_path *path;
3138
3139        if (!capable(CAP_SYS_ADMIN))
3140                return -EPERM;
3141
3142        path = btrfs_alloc_path();
3143        if (!path) {
3144                ret = -ENOMEM;
3145                goto out;
3146        }
3147
3148        ipa = memdup_user(arg, sizeof(*ipa));
3149        if (IS_ERR(ipa)) {
3150                ret = PTR_ERR(ipa);
3151                ipa = NULL;
3152                goto out;
3153        }
3154
3155        size = min_t(u32, ipa->size, 4096);
3156        ipath = init_ipath(size, root, path);
3157        if (IS_ERR(ipath)) {
3158                ret = PTR_ERR(ipath);
3159                ipath = NULL;
3160                goto out;
3161        }
3162
3163        ret = paths_from_inode(ipa->inum, ipath);
3164        if (ret < 0)
3165                goto out;
3166
3167        for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
3168                rel_ptr = ipath->fspath->val[i] -
3169                          (u64)(unsigned long)ipath->fspath->val;
3170                ipath->fspath->val[i] = rel_ptr;
3171        }
3172
3173        ret = copy_to_user((void *)(unsigned long)ipa->fspath,
3174                           (void *)(unsigned long)ipath->fspath, size);
3175        if (ret) {
3176                ret = -EFAULT;
3177                goto out;
3178        }
3179
3180out:
3181        btrfs_free_path(path);
3182        free_ipath(ipath);
3183        kfree(ipa);
3184
3185        return ret;
3186}
3187
3188static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
3189{
3190        struct btrfs_data_container *inodes = ctx;
3191        const size_t c = 3 * sizeof(u64);
3192
3193        if (inodes->bytes_left >= c) {
3194                inodes->bytes_left -= c;
3195                inodes->val[inodes->elem_cnt] = inum;
3196                inodes->val[inodes->elem_cnt + 1] = offset;
3197                inodes->val[inodes->elem_cnt + 2] = root;
3198                inodes->elem_cnt += 3;
3199        } else {
3200                inodes->bytes_missing += c - inodes->bytes_left;
3201                inodes->bytes_left = 0;
3202                inodes->elem_missed += 3;
3203        }
3204
3205        return 0;
3206}
3207
3208static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
3209                                        void __user *arg)
3210{
3211        int ret = 0;
3212        int size;
3213        u64 extent_item_pos;
3214        struct btrfs_ioctl_logical_ino_args *loi;
3215        struct btrfs_data_container *inodes = NULL;
3216        struct btrfs_path *path = NULL;
3217        struct btrfs_key key;
3218
3219        if (!capable(CAP_SYS_ADMIN))
3220                return -EPERM;
3221
3222        loi = memdup_user(arg, sizeof(*loi));
3223        if (IS_ERR(loi)) {
3224                ret = PTR_ERR(loi);
3225                loi = NULL;
3226                goto out;
3227        }
3228
3229        path = btrfs_alloc_path();
3230        if (!path) {
3231                ret = -ENOMEM;
3232                goto out;
3233        }
3234
3235        size = min_t(u32, loi->size, 4096);
3236        inodes = init_data_container(size);
3237        if (IS_ERR(inodes)) {
3238                ret = PTR_ERR(inodes);
3239                inodes = NULL;
3240                goto out;
3241        }
3242
3243        ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
3244        btrfs_release_path(path);
3245
3246        if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
3247                ret = -ENOENT;
3248        if (ret < 0)
3249                goto out;
3250
3251        extent_item_pos = loi->logical - key.objectid;
3252        ret = iterate_extent_inodes(root->fs_info, key.objectid,
3253                                        extent_item_pos, 0, build_ino_list,
3254                                        inodes);
3255
3256        if (ret < 0)
3257                goto out;
3258
3259        ret = copy_to_user((void *)(unsigned long)loi->inodes,
3260                           (void *)(unsigned long)inodes, size);
3261        if (ret)
3262                ret = -EFAULT;
3263
3264out:
3265        btrfs_free_path(path);
3266        kfree(inodes);
3267        kfree(loi);
3268
3269        return ret;
3270}
3271
3272void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3273                               struct btrfs_ioctl_balance_args *bargs)
3274{
3275        struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3276
3277        bargs->flags = bctl->flags;
3278
3279        if (atomic_read(&fs_info->balance_running))
3280                bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
3281        if (atomic_read(&fs_info->balance_pause_req))
3282                bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
3283        if (atomic_read(&fs_info->balance_cancel_req))
3284                bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
3285
3286        memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
3287        memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
3288        memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3289
3290        if (lock) {
3291                spin_lock(&fs_info->balance_lock);
3292                memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3293                spin_unlock(&fs_info->balance_lock);
3294        } else {
3295                memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3296        }
3297}
3298
3299static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3300{
3301        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3302        struct btrfs_fs_info *fs_info = root->fs_info;
3303        struct btrfs_ioctl_balance_args *bargs;
3304        struct btrfs_balance_control *bctl;
3305        int ret;
3306
3307        if (!capable(CAP_SYS_ADMIN))
3308                return -EPERM;
3309
3310        ret = mnt_want_write_file(file);
3311        if (ret)
3312                return ret;
3313
3314        mutex_lock(&fs_info->volume_mutex);
3315        mutex_lock(&fs_info->balance_mutex);
3316
3317        if (arg) {
3318                bargs = memdup_user(arg, sizeof(*bargs));
3319                if (IS_ERR(bargs)) {
3320                        ret = PTR_ERR(bargs);
3321                        goto out;
3322                }
3323
3324                if (bargs->flags & BTRFS_BALANCE_RESUME) {
3325                        if (!fs_info->balance_ctl) {
3326                                ret = -ENOTCONN;
3327                                goto out_bargs;
3328                        }
3329
3330                        bctl = fs_info->balance_ctl;
3331                        spin_lock(&fs_info->balance_lock);
3332                        bctl->flags |= BTRFS_BALANCE_RESUME;
3333                        spin_unlock(&fs_info->balance_lock);
3334
3335                        goto do_balance;
3336                }
3337        } else {
3338                bargs = NULL;
3339        }
3340
3341        if (fs_info->balance_ctl) {
3342                ret = -EINPROGRESS;
3343                goto out_bargs;
3344        }
3345
3346        bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3347        if (!bctl) {
3348                ret = -ENOMEM;
3349                goto out_bargs;
3350        }
3351
3352        bctl->fs_info = fs_info;
3353        if (arg) {
3354                memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
3355                memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
3356                memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
3357
3358                bctl->flags = bargs->flags;
3359        } else {
3360                /* balance everything - no filters */
3361                bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
3362        }
3363
3364do_balance:
3365        ret = btrfs_balance(bctl, bargs);
3366        /*
3367         * bctl is freed in __cancel_balance or in free_fs_info if
3368         * restriper was paused all the way until unmount
3369         */
3370        if (arg) {
3371                if (copy_to_user(arg, bargs, sizeof(*bargs)))
3372                        ret = -EFAULT;
3373        }
3374
3375out_bargs:
3376        kfree(bargs);
3377out:
3378        mutex_unlock(&fs_info->balance_mutex);
3379        mutex_unlock(&fs_info->volume_mutex);
3380        mnt_drop_write_file(file);
3381        return ret;
3382}
3383
3384static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
3385{
3386        if (!capable(CAP_SYS_ADMIN))
3387                return -EPERM;
3388
3389        switch (cmd) {
3390        case BTRFS_BALANCE_CTL_PAUSE:
3391                return btrfs_pause_balance(root->fs_info);
3392        case BTRFS_BALANCE_CTL_CANCEL:
3393                return btrfs_cancel_balance(root->fs_info);
3394        }
3395
3396        return -EINVAL;
3397}
3398
3399static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
3400                                         void __user *arg)
3401{
3402        struct btrfs_fs_info *fs_info = root->fs_info;
3403        struct btrfs_ioctl_balance_args *bargs;
3404        int ret = 0;
3405
3406        if (!capable(CAP_SYS_ADMIN))
3407                return -EPERM;
3408
3409        mutex_lock(&fs_info->balance_mutex);
3410        if (!fs_info->balance_ctl) {
3411                ret = -ENOTCONN;
3412                goto out;
3413        }
3414
3415        bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
3416        if (!bargs) {
3417                ret = -ENOMEM;
3418                goto out;
3419        }
3420
3421        update_ioctl_balance_args(fs_info, 1, bargs);
3422
3423        if (copy_to_user(arg, bargs, sizeof(*bargs)))
3424                ret = -EFAULT;
3425
3426        kfree(bargs);
3427out:
3428        mutex_unlock(&fs_info->balance_mutex);
3429        return ret;
3430}
3431
3432static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
3433{
3434        struct btrfs_ioctl_quota_ctl_args *sa;
3435        struct btrfs_trans_handle *trans = NULL;
3436        int ret;
3437        int err;
3438
3439        if (!capable(CAP_SYS_ADMIN))
3440                return -EPERM;
3441
3442        if (root->fs_info->sb->s_flags & MS_RDONLY)
3443                return -EROFS;
3444
3445        sa = memdup_user(arg, sizeof(*sa));
3446        if (IS_ERR(sa))
3447                return PTR_ERR(sa);
3448
3449        if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
3450                trans = btrfs_start_transaction(root, 2);
3451                if (IS_ERR(trans)) {
3452                        ret = PTR_ERR(trans);
3453                        goto out;
3454                }
3455        }
3456
3457        switch (sa->cmd) {
3458        case BTRFS_QUOTA_CTL_ENABLE:
3459                ret = btrfs_quota_enable(trans, root->fs_info);
3460                break;
3461        case BTRFS_QUOTA_CTL_DISABLE:
3462                ret = btrfs_quota_disable(trans, root->fs_info);
3463                break;
3464        case BTRFS_QUOTA_CTL_RESCAN:
3465                ret = btrfs_quota_rescan(root->fs_info);
3466                break;
3467        default:
3468                ret = -EINVAL;
3469                break;
3470        }
3471
3472        if (copy_to_user(arg, sa, sizeof(*sa)))
3473                ret = -EFAULT;
3474
3475        if (trans) {
3476                err = btrfs_commit_transaction(trans, root);
3477                if (err && !ret)
3478                        ret = err;
3479        }
3480
3481out:
3482        kfree(sa);
3483        return ret;
3484}
3485
3486static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
3487{
3488        struct btrfs_ioctl_qgroup_assign_args *sa;
3489        struct btrfs_trans_handle *trans;
3490        int ret;
3491        int err;
3492
3493        if (!capable(CAP_SYS_ADMIN))
3494                return -EPERM;
3495
3496        if (root->fs_info->sb->s_flags & MS_RDONLY)
3497                return -EROFS;
3498
3499        sa = memdup_user(arg, sizeof(*sa));
3500        if (IS_ERR(sa))
3501                return PTR_ERR(sa);
3502
3503        trans = btrfs_join_transaction(root);
3504        if (IS_ERR(trans)) {
3505                ret = PTR_ERR(trans);
3506                goto out;
3507        }
3508
3509        /* FIXME: check if the IDs really exist */
3510        if (sa->assign) {
3511                ret = btrfs_add_qgroup_relation(trans, root->fs_info,
3512                                                sa->src, sa->dst);
3513        } else {
3514                ret = btrfs_del_qgroup_relation(trans, root->fs_info,
3515                                                sa->src, sa->dst);
3516        }
3517
3518        err = btrfs_end_transaction(trans, root);
3519        if (err && !ret)
3520                ret = err;
3521
3522out:
3523        kfree(sa);
3524        return ret;
3525}
3526
3527static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
3528{
3529        struct btrfs_ioctl_qgroup_create_args *sa;
3530        struct btrfs_trans_handle *trans;
3531        int ret;
3532        int err;
3533
3534        if (!capable(CAP_SYS_ADMIN))
3535                return -EPERM;
3536
3537        if (root->fs_info->sb->s_flags & MS_RDONLY)
3538                return -EROFS;
3539
3540        sa = memdup_user(arg, sizeof(*sa));
3541        if (IS_ERR(sa))
3542                return PTR_ERR(sa);
3543
3544        trans = btrfs_join_transaction(root);
3545        if (IS_ERR(trans)) {
3546                ret = PTR_ERR(trans);
3547                goto out;
3548        }
3549
3550        /* FIXME: check if the IDs really exist */
3551        if (sa->create) {
3552                ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
3553                                          NULL);
3554        } else {
3555                ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
3556        }
3557
3558        err = btrfs_end_transaction(trans, root);
3559        if (err && !ret)
3560                ret = err;
3561
3562out:
3563        kfree(sa);
3564        return ret;
3565}
3566
3567static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
3568{
3569        struct btrfs_ioctl_qgroup_limit_args *sa;
3570        struct btrfs_trans_handle *trans;
3571        int ret;
3572        int err;
3573        u64 qgroupid;
3574
3575        if (!capable(CAP_SYS_ADMIN))
3576                return -EPERM;
3577
3578        if (root->fs_info->sb->s_flags & MS_RDONLY)
3579                return -EROFS;
3580
3581        sa = memdup_user(arg, sizeof(*sa));
3582        if (IS_ERR(sa))
3583                return PTR_ERR(sa);
3584
3585        trans = btrfs_join_transaction(root);
3586        if (IS_ERR(trans)) {
3587                ret = PTR_ERR(trans);
3588                goto out;
3589        }
3590
3591        qgroupid = sa->qgroupid;
3592        if (!qgroupid) {
3593                /* take the current subvol as qgroup */
3594                qgroupid = root->root_key.objectid;
3595        }
3596
3597        /* FIXME: check if the IDs really exist */
3598        ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
3599
3600        err = btrfs_end_transaction(trans, root);
3601        if (err && !ret)
3602                ret = err;
3603
3604out:
3605        kfree(sa);
3606        return ret;
3607}
3608
3609static long btrfs_ioctl_set_received_subvol(struct file *file,
3610                                            void __user *arg)
3611{
3612        struct btrfs_ioctl_received_subvol_args *sa = NULL;
3613        struct inode *inode = fdentry(file)->d_inode;
3614        struct btrfs_root *root = BTRFS_I(inode)->root;
3615        struct btrfs_root_item *root_item = &root->root_item;
3616        struct btrfs_trans_handle *trans;
3617        struct timespec ct = CURRENT_TIME;
3618        int ret = 0;
3619
3620        ret = mnt_want_write_file(file);
3621        if (ret < 0)
3622                return ret;
3623
3624        down_write(&root->fs_info->subvol_sem);
3625
3626        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
3627                ret = -EINVAL;
3628                goto out;
3629        }
3630
3631        if (btrfs_root_readonly(root)) {
3632                ret = -EROFS;
3633                goto out;
3634        }
3635
3636        if (!inode_owner_or_capable(inode)) {
3637                ret = -EACCES;
3638                goto out;
3639        }
3640
3641        sa = memdup_user(arg, sizeof(*sa));
3642        if (IS_ERR(sa)) {
3643                ret = PTR_ERR(sa);
3644                sa = NULL;
3645                goto out;
3646        }
3647
3648        trans = btrfs_start_transaction(root, 1);
3649        if (IS_ERR(trans)) {
3650                ret = PTR_ERR(trans);
3651                trans = NULL;
3652                goto out;
3653        }
3654
3655        sa->rtransid = trans->transid;
3656        sa->rtime.sec = ct.tv_sec;
3657        sa->rtime.nsec = ct.tv_nsec;
3658
3659        memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
3660        btrfs_set_root_stransid(root_item, sa->stransid);
3661        btrfs_set_root_rtransid(root_item, sa->rtransid);
3662        root_item->stime.sec = cpu_to_le64(sa->stime.sec);
3663        root_item->stime.nsec = cpu_to_le32(sa->stime.nsec);
3664        root_item->rtime.sec = cpu_to_le64(sa->rtime.sec);
3665        root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec);
3666
3667        ret = btrfs_update_root(trans, root->fs_info->tree_root,
3668                                &root->root_key, &root->root_item);
3669        if (ret < 0) {
3670                btrfs_end_transaction(trans, root);
3671                trans = NULL;
3672                goto out;
3673        } else {
3674                ret = btrfs_commit_transaction(trans, root);
3675                if (ret < 0)
3676                        goto out;
3677        }
3678
3679        ret = copy_to_user(arg, sa, sizeof(*sa));
3680        if (ret)
3681                ret = -EFAULT;
3682
3683out:
3684        kfree(sa);
3685        up_write(&root->fs_info->subvol_sem);
3686        mnt_drop_write_file(file);
3687        return ret;
3688}
3689
3690long btrfs_ioctl(struct file *file, unsigned int
3691                cmd, unsigned long arg)
3692{
3693        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3694        void __user *argp = (void __user *)arg;
3695
3696        switch (cmd) {
3697        case FS_IOC_GETFLAGS:
3698                return btrfs_ioctl_getflags(file, argp);
3699        case FS_IOC_SETFLAGS:
3700                return btrfs_ioctl_setflags(file, argp);
3701        case FS_IOC_GETVERSION:
3702                return btrfs_ioctl_getversion(file, argp);
3703        case FITRIM:
3704                return btrfs_ioctl_fitrim(file, argp);
3705        case BTRFS_IOC_SNAP_CREATE:
3706                return btrfs_ioctl_snap_create(file, argp, 0);
3707        case BTRFS_IOC_SNAP_CREATE_V2:
3708                return btrfs_ioctl_snap_create_v2(file, argp, 0);
3709        case BTRFS_IOC_SUBVOL_CREATE:
3710                return btrfs_ioctl_snap_create(file, argp, 1);
3711        case BTRFS_IOC_SUBVOL_CREATE_V2:
3712                return btrfs_ioctl_snap_create_v2(file, argp, 1);
3713        case BTRFS_IOC_SNAP_DESTROY:
3714                return btrfs_ioctl_snap_destroy(file, argp);
3715        case BTRFS_IOC_SUBVOL_GETFLAGS:
3716                return btrfs_ioctl_subvol_getflags(file, argp);
3717        case BTRFS_IOC_SUBVOL_SETFLAGS:
3718                return btrfs_ioctl_subvol_setflags(file, argp);
3719        case BTRFS_IOC_DEFAULT_SUBVOL:
3720                return btrfs_ioctl_default_subvol(file, argp);
3721        case BTRFS_IOC_DEFRAG:
3722                return btrfs_ioctl_defrag(file, NULL);
3723        case BTRFS_IOC_DEFRAG_RANGE:
3724                return btrfs_ioctl_defrag(file, argp);
3725        case BTRFS_IOC_RESIZE:
3726                return btrfs_ioctl_resize(root, argp);
3727        case BTRFS_IOC_ADD_DEV:
3728                return btrfs_ioctl_add_dev(root, argp);
3729        case BTRFS_IOC_RM_DEV:
3730                return btrfs_ioctl_rm_dev(root, argp);
3731        case BTRFS_IOC_FS_INFO:
3732                return btrfs_ioctl_fs_info(root, argp);
3733        case BTRFS_IOC_DEV_INFO:
3734                return btrfs_ioctl_dev_info(root, argp);
3735        case BTRFS_IOC_BALANCE:
3736                return btrfs_ioctl_balance(file, NULL);
3737        case BTRFS_IOC_CLONE:
3738                return btrfs_ioctl_clone(file, arg, 0, 0, 0);
3739        case BTRFS_IOC_CLONE_RANGE:
3740                return btrfs_ioctl_clone_range(file, argp);
3741        case BTRFS_IOC_TRANS_START:
3742                return btrfs_ioctl_trans_start(file);
3743        case BTRFS_IOC_TRANS_END:
3744                return btrfs_ioctl_trans_end(file);
3745        case BTRFS_IOC_TREE_SEARCH:
3746                return btrfs_ioctl_tree_search(file, argp);
3747        case BTRFS_IOC_INO_LOOKUP:
3748                return btrfs_ioctl_ino_lookup(file, argp);
3749        case BTRFS_IOC_INO_PATHS:
3750                return btrfs_ioctl_ino_to_path(root, argp);
3751        case BTRFS_IOC_LOGICAL_INO:
3752                return btrfs_ioctl_logical_to_ino(root, argp);
3753        case BTRFS_IOC_SPACE_INFO:
3754                return btrfs_ioctl_space_info(root, argp);
3755        case BTRFS_IOC_SYNC:
3756                btrfs_sync_fs(file->f_dentry->d_sb, 1);
3757                return 0;
3758        case BTRFS_IOC_START_SYNC:
3759                return btrfs_ioctl_start_sync(file, argp);
3760        case BTRFS_IOC_WAIT_SYNC:
3761                return btrfs_ioctl_wait_sync(file, argp);
3762        case BTRFS_IOC_SCRUB:
3763                return btrfs_ioctl_scrub(root, argp);
3764        case BTRFS_IOC_SCRUB_CANCEL:
3765                return btrfs_ioctl_scrub_cancel(root, argp);
3766        case BTRFS_IOC_SCRUB_PROGRESS:
3767                return btrfs_ioctl_scrub_progress(root, argp);
3768        case BTRFS_IOC_BALANCE_V2:
3769                return btrfs_ioctl_balance(file, argp);
3770        case BTRFS_IOC_BALANCE_CTL:
3771                return btrfs_ioctl_balance_ctl(root, arg);
3772        case BTRFS_IOC_BALANCE_PROGRESS:
3773                return btrfs_ioctl_balance_progress(root, argp);
3774        case BTRFS_IOC_SET_RECEIVED_SUBVOL:
3775                return btrfs_ioctl_set_received_subvol(file, argp);
3776        case BTRFS_IOC_SEND:
3777                return btrfs_ioctl_send(file, argp);
3778        case BTRFS_IOC_GET_DEV_STATS:
3779                return btrfs_ioctl_get_dev_stats(root, argp);
3780        case BTRFS_IOC_QUOTA_CTL:
3781                return btrfs_ioctl_quota_ctl(root, argp);
3782        case BTRFS_IOC_QGROUP_ASSIGN:
3783                return btrfs_ioctl_qgroup_assign(root, argp);
3784        case BTRFS_IOC_QGROUP_CREATE:
3785                return btrfs_ioctl_qgroup_create(root, argp);
3786        case BTRFS_IOC_QGROUP_LIMIT:
3787                return btrfs_ioctl_qgroup_limit(root, argp);
3788        }
3789
3790        return -ENOTTY;
3791}
3792
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.