linux/fs/btrfs/ioctl.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007 Oracle.  All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public
   6 * License v2 as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public
  14 * License along with this program; if not, write to the
  15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16 * Boston, MA 021110-1307, USA.
  17 */
  18
  19#include <linux/kernel.h>
  20#include <linux/bio.h>
  21#include <linux/buffer_head.h>
  22#include <linux/file.h>
  23#include <linux/fs.h>
  24#include <linux/fsnotify.h>
  25#include <linux/pagemap.h>
  26#include <linux/highmem.h>
  27#include <linux/time.h>
  28#include <linux/init.h>
  29#include <linux/string.h>
  30#include <linux/backing-dev.h>
  31#include <linux/mount.h>
  32#include <linux/mpage.h>
  33#include <linux/namei.h>
  34#include <linux/swap.h>
  35#include <linux/writeback.h>
  36#include <linux/statfs.h>
  37#include <linux/compat.h>
  38#include <linux/bit_spinlock.h>
  39#include <linux/security.h>
  40#include <linux/xattr.h>
  41#include <linux/vmalloc.h>
  42#include <linux/slab.h>
  43#include <linux/blkdev.h>
  44#include <linux/uuid.h>
  45#include "compat.h"
  46#include "ctree.h"
  47#include "disk-io.h"
  48#include "transaction.h"
  49#include "btrfs_inode.h"
  50#include "ioctl.h"
  51#include "print-tree.h"
  52#include "volumes.h"
  53#include "locking.h"
  54#include "inode-map.h"
  55#include "backref.h"
  56#include "rcu-string.h"
  57#include "send.h"
  58
  59/* Mask out flags that are inappropriate for the given type of inode. */
  60static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
  61{
  62        if (S_ISDIR(mode))
  63                return flags;
  64        else if (S_ISREG(mode))
  65                return flags & ~FS_DIRSYNC_FL;
  66        else
  67                return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
  68}
  69
  70/*
  71 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
  72 */
  73static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
  74{
  75        unsigned int iflags = 0;
  76
  77        if (flags & BTRFS_INODE_SYNC)
  78                iflags |= FS_SYNC_FL;
  79        if (flags & BTRFS_INODE_IMMUTABLE)
  80                iflags |= FS_IMMUTABLE_FL;
  81        if (flags & BTRFS_INODE_APPEND)
  82                iflags |= FS_APPEND_FL;
  83        if (flags & BTRFS_INODE_NODUMP)
  84                iflags |= FS_NODUMP_FL;
  85        if (flags & BTRFS_INODE_NOATIME)
  86                iflags |= FS_NOATIME_FL;
  87        if (flags & BTRFS_INODE_DIRSYNC)
  88                iflags |= FS_DIRSYNC_FL;
  89        if (flags & BTRFS_INODE_NODATACOW)
  90                iflags |= FS_NOCOW_FL;
  91
  92        if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
  93                iflags |= FS_COMPR_FL;
  94        else if (flags & BTRFS_INODE_NOCOMPRESS)
  95                iflags |= FS_NOCOMP_FL;
  96
  97        return iflags;
  98}
  99
 100/*
 101 * Update inode->i_flags based on the btrfs internal flags.
 102 */
 103void btrfs_update_iflags(struct inode *inode)
 104{
 105        struct btrfs_inode *ip = BTRFS_I(inode);
 106
 107        inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
 108
 109        if (ip->flags & BTRFS_INODE_SYNC)
 110                inode->i_flags |= S_SYNC;
 111        if (ip->flags & BTRFS_INODE_IMMUTABLE)
 112                inode->i_flags |= S_IMMUTABLE;
 113        if (ip->flags & BTRFS_INODE_APPEND)
 114                inode->i_flags |= S_APPEND;
 115        if (ip->flags & BTRFS_INODE_NOATIME)
 116                inode->i_flags |= S_NOATIME;
 117        if (ip->flags & BTRFS_INODE_DIRSYNC)
 118                inode->i_flags |= S_DIRSYNC;
 119}
 120
 121/*
 122 * Inherit flags from the parent inode.
 123 *
 124 * Currently only the compression flags and the cow flags are inherited.
 125 */
 126void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 127{
 128        unsigned int flags;
 129
 130        if (!dir)
 131                return;
 132
 133        flags = BTRFS_I(dir)->flags;
 134
 135        if (flags & BTRFS_INODE_NOCOMPRESS) {
 136                BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
 137                BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
 138        } else if (flags & BTRFS_INODE_COMPRESS) {
 139                BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
 140                BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
 141        }
 142
 143        if (flags & BTRFS_INODE_NODATACOW)
 144                BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
 145
 146        btrfs_update_iflags(inode);
 147}
 148
 149static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
 150{
 151        struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
 152        unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
 153
 154        if (copy_to_user(arg, &flags, sizeof(flags)))
 155                return -EFAULT;
 156        return 0;
 157}
 158
 159static int check_flags(unsigned int flags)
 160{
 161        if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
 162                      FS_NOATIME_FL | FS_NODUMP_FL | \
 163                      FS_SYNC_FL | FS_DIRSYNC_FL | \
 164                      FS_NOCOMP_FL | FS_COMPR_FL |
 165                      FS_NOCOW_FL))
 166                return -EOPNOTSUPP;
 167
 168        if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
 169                return -EINVAL;
 170
 171        return 0;
 172}
 173
 174static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 175{
 176        struct inode *inode = file->f_path.dentry->d_inode;
 177        struct btrfs_inode *ip = BTRFS_I(inode);
 178        struct btrfs_root *root = ip->root;
 179        struct btrfs_trans_handle *trans;
 180        unsigned int flags, oldflags;
 181        int ret;
 182        u64 ip_oldflags;
 183        unsigned int i_oldflags;
 184        umode_t mode;
 185
 186        if (btrfs_root_readonly(root))
 187                return -EROFS;
 188
 189        if (copy_from_user(&flags, arg, sizeof(flags)))
 190                return -EFAULT;
 191
 192        ret = check_flags(flags);
 193        if (ret)
 194                return ret;
 195
 196        if (!inode_owner_or_capable(inode))
 197                return -EACCES;
 198
 199        ret = mnt_want_write_file(file);
 200        if (ret)
 201                return ret;
 202
 203        mutex_lock(&inode->i_mutex);
 204
 205        ip_oldflags = ip->flags;
 206        i_oldflags = inode->i_flags;
 207        mode = inode->i_mode;
 208
 209        flags = btrfs_mask_flags(inode->i_mode, flags);
 210        oldflags = btrfs_flags_to_ioctl(ip->flags);
 211        if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
 212                if (!capable(CAP_LINUX_IMMUTABLE)) {
 213                        ret = -EPERM;
 214                        goto out_unlock;
 215                }
 216        }
 217
 218        if (flags & FS_SYNC_FL)
 219                ip->flags |= BTRFS_INODE_SYNC;
 220        else
 221                ip->flags &= ~BTRFS_INODE_SYNC;
 222        if (flags & FS_IMMUTABLE_FL)
 223                ip->flags |= BTRFS_INODE_IMMUTABLE;
 224        else
 225                ip->flags &= ~BTRFS_INODE_IMMUTABLE;
 226        if (flags & FS_APPEND_FL)
 227                ip->flags |= BTRFS_INODE_APPEND;
 228        else
 229                ip->flags &= ~BTRFS_INODE_APPEND;
 230        if (flags & FS_NODUMP_FL)
 231                ip->flags |= BTRFS_INODE_NODUMP;
 232        else
 233                ip->flags &= ~BTRFS_INODE_NODUMP;
 234        if (flags & FS_NOATIME_FL)
 235                ip->flags |= BTRFS_INODE_NOATIME;
 236        else
 237                ip->flags &= ~BTRFS_INODE_NOATIME;
 238        if (flags & FS_DIRSYNC_FL)
 239                ip->flags |= BTRFS_INODE_DIRSYNC;
 240        else
 241                ip->flags &= ~BTRFS_INODE_DIRSYNC;
 242        if (flags & FS_NOCOW_FL) {
 243                if (S_ISREG(mode)) {
 244                        /*
 245                         * It's safe to turn csums off here, no extents exist.
 246                         * Otherwise we want the flag to reflect the real COW
 247                         * status of the file and will not set it.
 248                         */
 249                        if (inode->i_size == 0)
 250                                ip->flags |= BTRFS_INODE_NODATACOW
 251                                           | BTRFS_INODE_NODATASUM;
 252                } else {
 253                        ip->flags |= BTRFS_INODE_NODATACOW;
 254                }
 255        } else {
 256                /*
 257                 * Revert back under same assuptions as above
 258                 */
 259                if (S_ISREG(mode)) {
 260                        if (inode->i_size == 0)
 261                                ip->flags &= ~(BTRFS_INODE_NODATACOW
 262                                             | BTRFS_INODE_NODATASUM);
 263                } else {
 264                        ip->flags &= ~BTRFS_INODE_NODATACOW;
 265                }
 266        }
 267
 268        /*
 269         * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
 270         * flag may be changed automatically if compression code won't make
 271         * things smaller.
 272         */
 273        if (flags & FS_NOCOMP_FL) {
 274                ip->flags &= ~BTRFS_INODE_COMPRESS;
 275                ip->flags |= BTRFS_INODE_NOCOMPRESS;
 276        } else if (flags & FS_COMPR_FL) {
 277                ip->flags |= BTRFS_INODE_COMPRESS;
 278                ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
 279        } else {
 280                ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
 281        }
 282
 283        trans = btrfs_start_transaction(root, 1);
 284        if (IS_ERR(trans)) {
 285                ret = PTR_ERR(trans);
 286                goto out_drop;
 287        }
 288
 289        btrfs_update_iflags(inode);
 290        inode_inc_iversion(inode);
 291        inode->i_ctime = CURRENT_TIME;
 292        ret = btrfs_update_inode(trans, root, inode);
 293
 294        btrfs_end_transaction(trans, root);
 295 out_drop:
 296        if (ret) {
 297                ip->flags = ip_oldflags;
 298                inode->i_flags = i_oldflags;
 299        }
 300
 301 out_unlock:
 302        mutex_unlock(&inode->i_mutex);
 303        mnt_drop_write_file(file);
 304        return ret;
 305}
 306
 307static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 308{
 309        struct inode *inode = file->f_path.dentry->d_inode;
 310
 311        return put_user(inode->i_generation, arg);
 312}
 313
 314static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
 315{
 316        struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb);
 317        struct btrfs_device *device;
 318        struct request_queue *q;
 319        struct fstrim_range range;
 320        u64 minlen = ULLONG_MAX;
 321        u64 num_devices = 0;
 322        u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
 323        int ret;
 324
 325        if (!capable(CAP_SYS_ADMIN))
 326                return -EPERM;
 327
 328        rcu_read_lock();
 329        list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
 330                                dev_list) {
 331                if (!device->bdev)
 332                        continue;
 333                q = bdev_get_queue(device->bdev);
 334                if (blk_queue_discard(q)) {
 335                        num_devices++;
 336                        minlen = min((u64)q->limits.discard_granularity,
 337                                     minlen);
 338                }
 339        }
 340        rcu_read_unlock();
 341
 342        if (!num_devices)
 343                return -EOPNOTSUPP;
 344        if (copy_from_user(&range, arg, sizeof(range)))
 345                return -EFAULT;
 346        if (range.start > total_bytes ||
 347            range.len < fs_info->sb->s_blocksize)
 348                return -EINVAL;
 349
 350        range.len = min(range.len, total_bytes - range.start);
 351        range.minlen = max(range.minlen, minlen);
 352        ret = btrfs_trim_fs(fs_info->tree_root, &range);
 353        if (ret < 0)
 354                return ret;
 355
 356        if (copy_to_user(arg, &range, sizeof(range)))
 357                return -EFAULT;
 358
 359        return 0;
 360}
 361
 362static noinline int create_subvol(struct btrfs_root *root,
 363                                  struct dentry *dentry,
 364                                  char *name, int namelen,
 365                                  u64 *async_transid,
 366                                  struct btrfs_qgroup_inherit **inherit)
 367{
 368        struct btrfs_trans_handle *trans;
 369        struct btrfs_key key;
 370        struct btrfs_root_item root_item;
 371        struct btrfs_inode_item *inode_item;
 372        struct extent_buffer *leaf;
 373        struct btrfs_root *new_root;
 374        struct dentry *parent = dentry->d_parent;
 375        struct inode *dir;
 376        struct timespec cur_time = CURRENT_TIME;
 377        int ret;
 378        int err;
 379        u64 objectid;
 380        u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
 381        u64 index = 0;
 382        uuid_le new_uuid;
 383
 384        ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
 385        if (ret)
 386                return ret;
 387
 388        dir = parent->d_inode;
 389
 390        /*
 391         * 1 - inode item
 392         * 2 - refs
 393         * 1 - root item
 394         * 2 - dir items
 395         */
 396        trans = btrfs_start_transaction(root, 6);
 397        if (IS_ERR(trans))
 398                return PTR_ERR(trans);
 399
 400        ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
 401                                   inherit ? *inherit : NULL);
 402        if (ret)
 403                goto fail;
 404
 405        leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
 406                                      0, objectid, NULL, 0, 0, 0);
 407        if (IS_ERR(leaf)) {
 408                ret = PTR_ERR(leaf);
 409                goto fail;
 410        }
 411
 412        memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
 413        btrfs_set_header_bytenr(leaf, leaf->start);
 414        btrfs_set_header_generation(leaf, trans->transid);
 415        btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
 416        btrfs_set_header_owner(leaf, objectid);
 417
 418        write_extent_buffer(leaf, root->fs_info->fsid,
 419                            (unsigned long)btrfs_header_fsid(leaf),
 420                            BTRFS_FSID_SIZE);
 421        write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
 422                            (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
 423                            BTRFS_UUID_SIZE);
 424        btrfs_mark_buffer_dirty(leaf);
 425
 426        memset(&root_item, 0, sizeof(root_item));
 427
 428        inode_item = &root_item.inode;
 429        inode_item->generation = cpu_to_le64(1);
 430        inode_item->size = cpu_to_le64(3);
 431        inode_item->nlink = cpu_to_le32(1);
 432        inode_item->nbytes = cpu_to_le64(root->leafsize);
 433        inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
 434
 435        root_item.flags = 0;
 436        root_item.byte_limit = 0;
 437        inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
 438
 439        btrfs_set_root_bytenr(&root_item, leaf->start);
 440        btrfs_set_root_generation(&root_item, trans->transid);
 441        btrfs_set_root_level(&root_item, 0);
 442        btrfs_set_root_refs(&root_item, 1);
 443        btrfs_set_root_used(&root_item, leaf->len);
 444        btrfs_set_root_last_snapshot(&root_item, 0);
 445
 446        btrfs_set_root_generation_v2(&root_item,
 447                        btrfs_root_generation(&root_item));
 448        uuid_le_gen(&new_uuid);
 449        memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
 450        root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
 451        root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
 452        root_item.ctime = root_item.otime;
 453        btrfs_set_root_ctransid(&root_item, trans->transid);
 454        btrfs_set_root_otransid(&root_item, trans->transid);
 455
 456        btrfs_tree_unlock(leaf);
 457        free_extent_buffer(leaf);
 458        leaf = NULL;
 459
 460        btrfs_set_root_dirid(&root_item, new_dirid);
 461
 462        key.objectid = objectid;
 463        key.offset = 0;
 464        btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 465        ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 466                                &root_item);
 467        if (ret)
 468                goto fail;
 469
 470        key.offset = (u64)-1;
 471        new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
 472        if (IS_ERR(new_root)) {
 473                btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
 474                ret = PTR_ERR(new_root);
 475                goto fail;
 476        }
 477
 478        btrfs_record_root_in_trans(trans, new_root);
 479
 480        ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
 481        if (ret) {
 482                /* We potentially lose an unused inode item here */
 483                btrfs_abort_transaction(trans, root, ret);
 484                goto fail;
 485        }
 486
 487        /*
 488         * insert the directory item
 489         */
 490        ret = btrfs_set_inode_index(dir, &index);
 491        if (ret) {
 492                btrfs_abort_transaction(trans, root, ret);
 493                goto fail;
 494        }
 495
 496        ret = btrfs_insert_dir_item(trans, root,
 497                                    name, namelen, dir, &key,
 498                                    BTRFS_FT_DIR, index);
 499        if (ret) {
 500                btrfs_abort_transaction(trans, root, ret);
 501                goto fail;
 502        }
 503
 504        btrfs_i_size_write(dir, dir->i_size + namelen * 2);
 505        ret = btrfs_update_inode(trans, root, dir);
 506        BUG_ON(ret);
 507
 508        ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
 509                                 objectid, root->root_key.objectid,
 510                                 btrfs_ino(dir), index, name, namelen);
 511
 512        BUG_ON(ret);
 513
 514        d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 515fail:
 516        if (async_transid) {
 517                *async_transid = trans->transid;
 518                err = btrfs_commit_transaction_async(trans, root, 1);
 519        } else {
 520                err = btrfs_commit_transaction(trans, root);
 521        }
 522        if (err && !ret)
 523                ret = err;
 524        return ret;
 525}
 526
 527static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 528                           char *name, int namelen, u64 *async_transid,
 529                           bool readonly, struct btrfs_qgroup_inherit **inherit)
 530{
 531        struct inode *inode;
 532        struct btrfs_pending_snapshot *pending_snapshot;
 533        struct btrfs_trans_handle *trans;
 534        int ret;
 535
 536        if (!root->ref_cows)
 537                return -EINVAL;
 538
 539        pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
 540        if (!pending_snapshot)
 541                return -ENOMEM;
 542
 543        btrfs_init_block_rsv(&pending_snapshot->block_rsv,
 544                             BTRFS_BLOCK_RSV_TEMP);
 545        pending_snapshot->dentry = dentry;
 546        pending_snapshot->root = root;
 547        pending_snapshot->readonly = readonly;
 548        if (inherit) {
 549                pending_snapshot->inherit = *inherit;
 550                *inherit = NULL;        /* take responsibility to free it */
 551        }
 552
 553        trans = btrfs_start_transaction(root->fs_info->extent_root, 6);
 554        if (IS_ERR(trans)) {
 555                ret = PTR_ERR(trans);
 556                goto fail;
 557        }
 558
 559        ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
 560        BUG_ON(ret);
 561
 562        spin_lock(&root->fs_info->trans_lock);
 563        list_add(&pending_snapshot->list,
 564                 &trans->transaction->pending_snapshots);
 565        spin_unlock(&root->fs_info->trans_lock);
 566        if (async_transid) {
 567                *async_transid = trans->transid;
 568                ret = btrfs_commit_transaction_async(trans,
 569                                     root->fs_info->extent_root, 1);
 570        } else {
 571                ret = btrfs_commit_transaction(trans,
 572                                               root->fs_info->extent_root);
 573        }
 574        if (ret)
 575                goto fail;
 576
 577        ret = pending_snapshot->error;
 578        if (ret)
 579                goto fail;
 580
 581        ret = btrfs_orphan_cleanup(pending_snapshot->snap);
 582        if (ret)
 583                goto fail;
 584
 585        inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
 586        if (IS_ERR(inode)) {
 587                ret = PTR_ERR(inode);
 588                goto fail;
 589        }
 590        BUG_ON(!inode);
 591        d_instantiate(dentry, inode);
 592        ret = 0;
 593fail:
 594        kfree(pending_snapshot);
 595        return ret;
 596}
 597
 598/*  copy of check_sticky in fs/namei.c()
 599* It's inline, so penalty for filesystems that don't use sticky bit is
 600* minimal.
 601*/
 602static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
 603{
 604        kuid_t fsuid = current_fsuid();
 605
 606        if (!(dir->i_mode & S_ISVTX))
 607                return 0;
 608        if (uid_eq(inode->i_uid, fsuid))
 609                return 0;
 610        if (uid_eq(dir->i_uid, fsuid))
 611                return 0;
 612        return !capable(CAP_FOWNER);
 613}
 614
 615/*  copy of may_delete in fs/namei.c()
 616 *      Check whether we can remove a link victim from directory dir, check
 617 *  whether the type of victim is right.
 618 *  1. We can't do it if dir is read-only (done in permission())
 619 *  2. We should have write and exec permissions on dir
 620 *  3. We can't remove anything from append-only dir
 621 *  4. We can't do anything with immutable dir (done in permission())
 622 *  5. If the sticky bit on dir is set we should either
 623 *      a. be owner of dir, or
 624 *      b. be owner of victim, or
 625 *      c. have CAP_FOWNER capability
 626 *  6. If the victim is append-only or immutable we can't do antyhing with
 627 *     links pointing to it.
 628 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 629 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 630 *  9. We can't remove a root or mountpoint.
 631 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
 632 *     nfs_async_unlink().
 633 */
 634
 635static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
 636{
 637        int error;
 638
 639        if (!victim->d_inode)
 640                return -ENOENT;
 641
 642        BUG_ON(victim->d_parent->d_inode != dir);
 643        audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 644
 645        error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
 646        if (error)
 647                return error;
 648        if (IS_APPEND(dir))
 649                return -EPERM;
 650        if (btrfs_check_sticky(dir, victim->d_inode)||
 651                IS_APPEND(victim->d_inode)||
 652            IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
 653                return -EPERM;
 654        if (isdir) {
 655                if (!S_ISDIR(victim->d_inode->i_mode))
 656                        return -ENOTDIR;
 657                if (IS_ROOT(victim))
 658                        return -EBUSY;
 659        } else if (S_ISDIR(victim->d_inode->i_mode))
 660                return -EISDIR;
 661        if (IS_DEADDIR(dir))
 662                return -ENOENT;
 663        if (victim->d_flags & DCACHE_NFSFS_RENAMED)
 664                return -EBUSY;
 665        return 0;
 666}
 667
 668/* copy of may_create in fs/namei.c() */
 669static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 670{
 671        if (child->d_inode)
 672                return -EEXIST;
 673        if (IS_DEADDIR(dir))
 674                return -ENOENT;
 675        return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 676}
 677
 678/*
 679 * Create a new subvolume below @parent.  This is largely modeled after
 680 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
 681 * inside this filesystem so it's quite a bit simpler.
 682 */
 683static noinline int btrfs_mksubvol(struct path *parent,
 684                                   char *name, int namelen,
 685                                   struct btrfs_root *snap_src,
 686                                   u64 *async_transid, bool readonly,
 687                                   struct btrfs_qgroup_inherit **inherit)
 688{
 689        struct inode *dir  = parent->dentry->d_inode;
 690        struct dentry *dentry;
 691        int error;
 692
 693        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
 694
 695        dentry = lookup_one_len(name, parent->dentry, namelen);
 696        error = PTR_ERR(dentry);
 697        if (IS_ERR(dentry))
 698                goto out_unlock;
 699
 700        error = -EEXIST;
 701        if (dentry->d_inode)
 702                goto out_dput;
 703
 704        error = btrfs_may_create(dir, dentry);
 705        if (error)
 706                goto out_dput;
 707
 708        down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
 709
 710        if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
 711                goto out_up_read;
 712
 713        if (snap_src) {
 714                error = create_snapshot(snap_src, dentry, name, namelen,
 715                                        async_transid, readonly, inherit);
 716        } else {
 717                error = create_subvol(BTRFS_I(dir)->root, dentry,
 718                                      name, namelen, async_transid, inherit);
 719        }
 720        if (!error)
 721                fsnotify_mkdir(dir, dentry);
 722out_up_read:
 723        up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
 724out_dput:
 725        dput(dentry);
 726out_unlock:
 727        mutex_unlock(&dir->i_mutex);
 728        return error;
 729}
 730
 731/*
 732 * When we're defragging a range, we don't want to kick it off again
 733 * if it is really just waiting for delalloc to send it down.
 734 * If we find a nice big extent or delalloc range for the bytes in the
 735 * file you want to defrag, we return 0 to let you know to skip this
 736 * part of the file
 737 */
 738static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
 739{
 740        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 741        struct extent_map *em = NULL;
 742        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 743        u64 end;
 744
 745        read_lock(&em_tree->lock);
 746        em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
 747        read_unlock(&em_tree->lock);
 748
 749        if (em) {
 750                end = extent_map_end(em);
 751                free_extent_map(em);
 752                if (end - offset > thresh)
 753                        return 0;
 754        }
 755        /* if we already have a nice delalloc here, just stop */
 756        thresh /= 2;
 757        end = count_range_bits(io_tree, &offset, offset + thresh,
 758                               thresh, EXTENT_DELALLOC, 1);
 759        if (end >= thresh)
 760                return 0;
 761        return 1;
 762}
 763
 764/*
 765 * helper function to walk through a file and find extents
 766 * newer than a specific transid, and smaller than thresh.
 767 *
 768 * This is used by the defragging code to find new and small
 769 * extents
 770 */
 771static int find_new_extents(struct btrfs_root *root,
 772                            struct inode *inode, u64 newer_than,
 773                            u64 *off, int thresh)
 774{
 775        struct btrfs_path *path;
 776        struct btrfs_key min_key;
 777        struct btrfs_key max_key;
 778        struct extent_buffer *leaf;
 779        struct btrfs_file_extent_item *extent;
 780        int type;
 781        int ret;
 782        u64 ino = btrfs_ino(inode);
 783
 784        path = btrfs_alloc_path();
 785        if (!path)
 786                return -ENOMEM;
 787
 788        min_key.objectid = ino;
 789        min_key.type = BTRFS_EXTENT_DATA_KEY;
 790        min_key.offset = *off;
 791
 792        max_key.objectid = ino;
 793        max_key.type = (u8)-1;
 794        max_key.offset = (u64)-1;
 795
 796        path->keep_locks = 1;
 797
 798        while(1) {
 799                ret = btrfs_search_forward(root, &min_key, &max_key,
 800                                           path, 0, newer_than);
 801                if (ret != 0)
 802                        goto none;
 803                if (min_key.objectid != ino)
 804                        goto none;
 805                if (min_key.type != BTRFS_EXTENT_DATA_KEY)
 806                        goto none;
 807
 808                leaf = path->nodes[0];
 809                extent = btrfs_item_ptr(leaf, path->slots[0],
 810                                        struct btrfs_file_extent_item);
 811
 812                type = btrfs_file_extent_type(leaf, extent);
 813                if (type == BTRFS_FILE_EXTENT_REG &&
 814                    btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
 815                    check_defrag_in_cache(inode, min_key.offset, thresh)) {
 816                        *off = min_key.offset;
 817                        btrfs_free_path(path);
 818                        return 0;
 819                }
 820
 821                if (min_key.offset == (u64)-1)
 822                        goto none;
 823
 824                min_key.offset++;
 825                btrfs_release_path(path);
 826        }
 827none:
 828        btrfs_free_path(path);
 829        return -ENOENT;
 830}
 831
 832static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
 833{
 834        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 835        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 836        struct extent_map *em;
 837        u64 len = PAGE_CACHE_SIZE;
 838
 839        /*
 840         * hopefully we have this extent in the tree already, try without
 841         * the full extent lock
 842         */
 843        read_lock(&em_tree->lock);
 844        em = lookup_extent_mapping(em_tree, start, len);
 845        read_unlock(&em_tree->lock);
 846
 847        if (!em) {
 848                /* get the big lock and read metadata off disk */
 849                lock_extent(io_tree, start, start + len - 1);
 850                em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
 851                unlock_extent(io_tree, start, start + len - 1);
 852
 853                if (IS_ERR(em))
 854                        return NULL;
 855        }
 856
 857        return em;
 858}
 859
 860static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
 861{
 862        struct extent_map *next;
 863        bool ret = true;
 864
 865        /* this is the last extent */
 866        if (em->start + em->len >= i_size_read(inode))
 867                return false;
 868
 869        next = defrag_lookup_extent(inode, em->start + em->len);
 870        if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
 871                ret = false;
 872
 873        free_extent_map(next);
 874        return ret;
 875}
 876
 877static int should_defrag_range(struct inode *inode, u64 start, int thresh,
 878                               u64 *last_len, u64 *skip, u64 *defrag_end,
 879                               int compress)
 880{
 881        struct extent_map *em;
 882        int ret = 1;
 883        bool next_mergeable = true;
 884
 885        /*
 886         * make sure that once we start defragging an extent, we keep on
 887         * defragging it
 888         */
 889        if (start < *defrag_end)
 890                return 1;
 891
 892        *skip = 0;
 893
 894        em = defrag_lookup_extent(inode, start);
 895        if (!em)
 896                return 0;
 897
 898        /* this will cover holes, and inline extents */
 899        if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
 900                ret = 0;
 901                goto out;
 902        }
 903
 904        next_mergeable = defrag_check_next_extent(inode, em);
 905
 906        /*
 907         * we hit a real extent, if it is big or the next extent is not a
 908         * real extent, don't bother defragging it
 909         */
 910        if (!compress && (*last_len == 0 || *last_len >= thresh) &&
 911            (em->len >= thresh || !next_mergeable))
 912                ret = 0;
 913out:
 914        /*
 915         * last_len ends up being a counter of how many bytes we've defragged.
 916         * every time we choose not to defrag an extent, we reset *last_len
 917         * so that the next tiny extent will force a defrag.
 918         *
 919         * The end result of this is that tiny extents before a single big
 920         * extent will force at least part of that big extent to be defragged.
 921         */
 922        if (ret) {
 923                *defrag_end = extent_map_end(em);
 924        } else {
 925                *last_len = 0;
 926                *skip = extent_map_end(em);
 927                *defrag_end = 0;
 928        }
 929
 930        free_extent_map(em);
 931        return ret;
 932}
 933
 934/*
 935 * it doesn't do much good to defrag one or two pages
 936 * at a time.  This pulls in a nice chunk of pages
 937 * to COW and defrag.
 938 *
 939 * It also makes sure the delalloc code has enough
 940 * dirty data to avoid making new small extents as part
 941 * of the defrag
 942 *
 943 * It's a good idea to start RA on this range
 944 * before calling this.
 945 */
 946static int cluster_pages_for_defrag(struct inode *inode,
 947                                    struct page **pages,
 948                                    unsigned long start_index,
 949                                    int num_pages)
 950{
 951        unsigned long file_end;
 952        u64 isize = i_size_read(inode);
 953        u64 page_start;
 954        u64 page_end;
 955        u64 page_cnt;
 956        int ret;
 957        int i;
 958        int i_done;
 959        struct btrfs_ordered_extent *ordered;
 960        struct extent_state *cached_state = NULL;
 961        struct extent_io_tree *tree;
 962        gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 963
 964        file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
 965        if (!isize || start_index > file_end)
 966                return 0;
 967
 968        page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
 969
 970        ret = btrfs_delalloc_reserve_space(inode,
 971                                           page_cnt << PAGE_CACHE_SHIFT);
 972        if (ret)
 973                return ret;
 974        i_done = 0;
 975        tree = &BTRFS_I(inode)->io_tree;
 976
 977        /* step one, lock all the pages */
 978        for (i = 0; i < page_cnt; i++) {
 979                struct page *page;
 980again:
 981                page = find_or_create_page(inode->i_mapping,
 982                                           start_index + i, mask);
 983                if (!page)
 984                        break;
 985
 986                page_start = page_offset(page);
 987                page_end = page_start + PAGE_CACHE_SIZE - 1;
 988                while (1) {
 989                        lock_extent(tree, page_start, page_end);
 990                        ordered = btrfs_lookup_ordered_extent(inode,
 991                                                              page_start);
 992                        unlock_extent(tree, page_start, page_end);
 993                        if (!ordered)
 994                                break;
 995
 996                        unlock_page(page);
 997                        btrfs_start_ordered_extent(inode, ordered, 1);
 998                        btrfs_put_ordered_extent(ordered);
 999                        lock_page(page);
1000                        /*
1001                         * we unlocked the page above, so we need check if
1002                         * it was released or not.
1003                         */
1004                        if (page->mapping != inode->i_mapping) {
1005                                unlock_page(page);
1006                                page_cache_release(page);
1007                                goto again;
1008                        }
1009                }
1010
1011                if (!PageUptodate(page)) {
1012                        btrfs_readpage(NULL, page);
1013                        lock_page(page);
1014                        if (!PageUptodate(page)) {
1015                                unlock_page(page);
1016                                page_cache_release(page);
1017                                ret = -EIO;
1018                                break;
1019                        }
1020                }
1021
1022                if (page->mapping != inode->i_mapping) {
1023                        unlock_page(page);
1024                        page_cache_release(page);
1025                        goto again;
1026                }
1027
1028                pages[i] = page;
1029                i_done++;
1030        }
1031        if (!i_done || ret)
1032                goto out;
1033
1034        if (!(inode->i_sb->s_flags & MS_ACTIVE))
1035                goto out;
1036
1037        /*
1038         * so now we have a nice long stream of locked
1039         * and up to date pages, lets wait on them
1040         */
1041        for (i = 0; i < i_done; i++)
1042                wait_on_page_writeback(pages[i]);
1043
1044        page_start = page_offset(pages[0]);
1045        page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
1046
1047        lock_extent_bits(&BTRFS_I(inode)->io_tree,
1048                         page_start, page_end - 1, 0, &cached_state);
1049        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
1050                          page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
1051                          EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
1052                          &cached_state, GFP_NOFS);
1053
1054        if (i_done != page_cnt) {
1055                spin_lock(&BTRFS_I(inode)->lock);
1056                BTRFS_I(inode)->outstanding_extents++;
1057                spin_unlock(&BTRFS_I(inode)->lock);
1058                btrfs_delalloc_release_space(inode,
1059                                     (page_cnt - i_done) << PAGE_CACHE_SHIFT);
1060        }
1061
1062
1063        set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1,
1064                          &cached_state, GFP_NOFS);
1065
1066        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1067                             page_start, page_end - 1, &cached_state,
1068                             GFP_NOFS);
1069
1070        for (i = 0; i < i_done; i++) {
1071                clear_page_dirty_for_io(pages[i]);
1072                ClearPageChecked(pages[i]);
1073                set_page_extent_mapped(pages[i]);
1074                set_page_dirty(pages[i]);
1075                unlock_page(pages[i]);
1076                page_cache_release(pages[i]);
1077        }
1078        return i_done;
1079out:
1080        for (i = 0; i < i_done; i++) {
1081                unlock_page(pages[i]);
1082                page_cache_release(pages[i]);
1083        }
1084        btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
1085        return ret;
1086
1087}
1088
1089int btrfs_defrag_file(struct inode *inode, struct file *file,
1090                      struct btrfs_ioctl_defrag_range_args *range,
1091                      u64 newer_than, unsigned long max_to_defrag)
1092{
1093        struct btrfs_root *root = BTRFS_I(inode)->root;
1094        struct file_ra_state *ra = NULL;
1095        unsigned long last_index;
1096        u64 isize = i_size_read(inode);
1097        u64 last_len = 0;
1098        u64 skip = 0;
1099        u64 defrag_end = 0;
1100        u64 newer_off = range->start;
1101        unsigned long i;
1102        unsigned long ra_index = 0;
1103        int ret;
1104        int defrag_count = 0;
1105        int compress_type = BTRFS_COMPRESS_ZLIB;
1106        int extent_thresh = range->extent_thresh;
1107        int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
1108        int cluster = max_cluster;
1109        u64 new_align = ~((u64)128 * 1024 - 1);
1110        struct page **pages = NULL;
1111
1112        if (extent_thresh == 0)
1113                extent_thresh = 256 * 1024;
1114
1115        if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
1116                if (range->compress_type > BTRFS_COMPRESS_TYPES)
1117                        return -EINVAL;
1118                if (range->compress_type)
1119                        compress_type = range->compress_type;
1120        }
1121
1122        if (isize == 0)
1123                return 0;
1124
1125        /*
1126         * if we were not given a file, allocate a readahead
1127         * context
1128         */
1129        if (!file) {
1130                ra = kzalloc(sizeof(*ra), GFP_NOFS);
1131                if (!ra)
1132                        return -ENOMEM;
1133                file_ra_state_init(ra, inode->i_mapping);
1134        } else {
1135                ra = &file->f_ra;
1136        }
1137
1138        pages = kmalloc(sizeof(struct page *) * max_cluster,
1139                        GFP_NOFS);
1140        if (!pages) {
1141                ret = -ENOMEM;
1142                goto out_ra;
1143        }
1144
1145        /* find the last page to defrag */
1146        if (range->start + range->len > range->start) {
1147                last_index = min_t(u64, isize - 1,
1148                         range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
1149        } else {
1150                last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
1151        }
1152
1153        if (newer_than) {
1154                ret = find_new_extents(root, inode, newer_than,
1155                                       &newer_off, 64 * 1024);
1156                if (!ret) {
1157                        range->start = newer_off;
1158                        /*
1159                         * we always align our defrag to help keep
1160                         * the extents in the file evenly spaced
1161                         */
1162                        i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1163                } else
1164                        goto out_ra;
1165        } else {
1166                i = range->start >> PAGE_CACHE_SHIFT;
1167        }
1168        if (!max_to_defrag)
1169                max_to_defrag = last_index + 1;
1170
1171        /*
1172         * make writeback starts from i, so the defrag range can be
1173         * written sequentially.
1174         */
1175        if (i < inode->i_mapping->writeback_index)
1176                inode->i_mapping->writeback_index = i;
1177
1178        while (i <= last_index && defrag_count < max_to_defrag &&
1179               (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
1180                PAGE_CACHE_SHIFT)) {
1181                /*
1182                 * make sure we stop running if someone unmounts
1183                 * the FS
1184                 */
1185                if (!(inode->i_sb->s_flags & MS_ACTIVE))
1186                        break;
1187
1188                if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1189                                         extent_thresh, &last_len, &skip,
1190                                         &defrag_end, range->flags &
1191                                         BTRFS_DEFRAG_RANGE_COMPRESS)) {
1192                        unsigned long next;
1193                        /*
1194                         * the should_defrag function tells us how much to skip
1195                         * bump our counter by the suggested amount
1196                         */
1197                        next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1198                        i = max(i + 1, next);
1199                        continue;
1200                }
1201
1202                if (!newer_than) {
1203                        cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
1204                                   PAGE_CACHE_SHIFT) - i;
1205                        cluster = min(cluster, max_cluster);
1206                } else {
1207                        cluster = max_cluster;
1208                }
1209
1210                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
1211                        BTRFS_I(inode)->force_compress = compress_type;
1212
1213                if (i + cluster > ra_index) {
1214                        ra_index = max(i, ra_index);
1215                        btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
1216                                       cluster);
1217                        ra_index += max_cluster;
1218                }
1219
1220                mutex_lock(&inode->i_mutex);
1221                ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1222                if (ret < 0) {
1223                        mutex_unlock(&inode->i_mutex);
1224                        goto out_ra;
1225                }
1226
1227                defrag_count += ret;
1228                balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
1229                mutex_unlock(&inode->i_mutex);
1230
1231                if (newer_than) {
1232                        if (newer_off == (u64)-1)
1233                                break;
1234
1235                        if (ret > 0)
1236                                i += ret;
1237
1238                        newer_off = max(newer_off + 1,
1239                                        (u64)i << PAGE_CACHE_SHIFT);
1240
1241                        ret = find_new_extents(root, inode,
1242                                               newer_than, &newer_off,
1243                                               64 * 1024);
1244                        if (!ret) {
1245                                range->start = newer_off;
1246                                i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
1247                        } else {
1248                                break;
1249                        }
1250                } else {
1251                        if (ret > 0) {
1252                                i += ret;
1253                                last_len += ret << PAGE_CACHE_SHIFT;
1254                        } else {
1255                                i++;
1256                                last_len = 0;
1257                        }
1258                }
1259        }
1260
1261        if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
1262                filemap_flush(inode->i_mapping);
1263
1264        if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
1265                /* the filemap_flush will queue IO into the worker threads, but
1266                 * we have to make sure the IO is actually started and that
1267                 * ordered extents get created before we return
1268                 */
1269                atomic_inc(&root->fs_info->async_submit_draining);
1270                while (atomic_read(&root->fs_info->nr_async_submits) ||
1271                      atomic_read(&root->fs_info->async_delalloc_pages)) {
1272                        wait_event(root->fs_info->async_submit_wait,
1273                           (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
1274                            atomic_read(&root->fs_info->async_delalloc_pages) == 0));
1275                }
1276                atomic_dec(&root->fs_info->async_submit_draining);
1277
1278                mutex_lock(&inode->i_mutex);
1279                BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
1280                mutex_unlock(&inode->i_mutex);
1281        }
1282
1283        if (range->compress_type == BTRFS_COMPRESS_LZO) {
1284                btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
1285        }
1286
1287        ret = defrag_count;
1288
1289out_ra:
1290        if (!file)
1291                kfree(ra);
1292        kfree(pages);
1293        return ret;
1294}
1295
1296static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1297                                        void __user *arg)
1298{
1299        u64 new_size;
1300        u64 old_size;
1301        u64 devid = 1;
1302        struct btrfs_ioctl_vol_args *vol_args;
1303        struct btrfs_trans_handle *trans;
1304        struct btrfs_device *device = NULL;
1305        char *sizestr;
1306        char *devstr = NULL;
1307        int ret = 0;
1308        int mod = 0;
1309
1310        if (root->fs_info->sb->s_flags & MS_RDONLY)
1311                return -EROFS;
1312
1313        if (!capable(CAP_SYS_ADMIN))
1314                return -EPERM;
1315
1316        mutex_lock(&root->fs_info->volume_mutex);
1317        if (root->fs_info->balance_ctl) {
1318                printk(KERN_INFO "btrfs: balance in progress\n");
1319                ret = -EINVAL;
1320                goto out;
1321        }
1322
1323        vol_args = memdup_user(arg, sizeof(*vol_args));
1324        if (IS_ERR(vol_args)) {
1325                ret = PTR_ERR(vol_args);
1326                goto out;
1327        }
1328
1329        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1330
1331        sizestr = vol_args->name;
1332        devstr = strchr(sizestr, ':');
1333        if (devstr) {
1334                char *end;
1335                sizestr = devstr + 1;
1336                *devstr = '\0';
1337                devstr = vol_args->name;
1338                devid = simple_strtoull(devstr, &end, 10);
1339                printk(KERN_INFO "btrfs: resizing devid %llu\n",
1340                       (unsigned long long)devid);
1341        }
1342        device = btrfs_find_device(root, devid, NULL, NULL);
1343        if (!device) {
1344                printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1345                       (unsigned long long)devid);
1346                ret = -EINVAL;
1347                goto out_free;
1348        }
1349        if (device->fs_devices && device->fs_devices->seeding) {
1350                printk(KERN_INFO "btrfs: resizer unable to apply on "
1351                       "seeding device %llu\n",
1352                       (unsigned long long)devid);
1353                ret = -EINVAL;
1354                goto out_free;
1355        }
1356
1357        if (!strcmp(sizestr, "max"))
1358                new_size = device->bdev->bd_inode->i_size;
1359        else {
1360                if (sizestr[0] == '-') {
1361                        mod = -1;
1362                        sizestr++;
1363                } else if (sizestr[0] == '+') {
1364                        mod = 1;
1365                        sizestr++;
1366                }
1367                new_size = memparse(sizestr, NULL);
1368                if (new_size == 0) {
1369                        ret = -EINVAL;
1370                        goto out_free;
1371                }
1372        }
1373
1374        old_size = device->total_bytes;
1375
1376        if (mod < 0) {
1377                if (new_size > old_size) {
1378                        ret = -EINVAL;
1379                        goto out_free;
1380                }
1381                new_size = old_size - new_size;
1382        } else if (mod > 0) {
1383                new_size = old_size + new_size;
1384        }
1385
1386        if (new_size < 256 * 1024 * 1024) {
1387                ret = -EINVAL;
1388                goto out_free;
1389        }
1390        if (new_size > device->bdev->bd_inode->i_size) {
1391                ret = -EFBIG;
1392                goto out_free;
1393        }
1394
1395        do_div(new_size, root->sectorsize);
1396        new_size *= root->sectorsize;
1397
1398        printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1399                      rcu_str_deref(device->name),
1400                      (unsigned long long)new_size);
1401
1402        if (new_size > old_size) {
1403                trans = btrfs_start_transaction(root, 0);
1404                if (IS_ERR(trans)) {
1405                        ret = PTR_ERR(trans);
1406                        goto out_free;
1407                }
1408                ret = btrfs_grow_device(trans, device, new_size);
1409                btrfs_commit_transaction(trans, root);
1410        } else if (new_size < old_size) {
1411                ret = btrfs_shrink_device(device, new_size);
1412        }
1413
1414out_free:
1415        kfree(vol_args);
1416out:
1417        mutex_unlock(&root->fs_info->volume_mutex);
1418        return ret;
1419}
1420
1421static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1422                                char *name, unsigned long fd, int subvol,
1423                                u64 *transid, bool readonly,
1424                                struct btrfs_qgroup_inherit **inherit)
1425{
1426        int namelen;
1427        int ret = 0;
1428
1429        ret = mnt_want_write_file(file);
1430        if (ret)
1431                goto out;
1432
1433        namelen = strlen(name);
1434        if (strchr(name, '/')) {
1435                ret = -EINVAL;
1436                goto out_drop_write;
1437        }
1438
1439        if (name[0] == '.' &&
1440           (namelen == 1 || (name[1] == '.' && namelen == 2))) {
1441                ret = -EEXIST;
1442                goto out_drop_write;
1443        }
1444
1445        if (subvol) {
1446                ret = btrfs_mksubvol(&file->f_path, name, namelen,
1447                                     NULL, transid, readonly, inherit);
1448        } else {
1449                struct fd src = fdget(fd);
1450                struct inode *src_inode;
1451                if (!src.file) {
1452                        ret = -EINVAL;
1453                        goto out_drop_write;
1454                }
1455
1456                src_inode = src.file->f_path.dentry->d_inode;
1457                if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
1458                        printk(KERN_INFO "btrfs: Snapshot src from "
1459                               "another FS\n");
1460                        ret = -EINVAL;
1461                } else {
1462                        ret = btrfs_mksubvol(&file->f_path, name, namelen,
1463                                             BTRFS_I(src_inode)->root,
1464                                             transid, readonly, inherit);
1465                }
1466                fdput(src);
1467        }
1468out_drop_write:
1469        mnt_drop_write_file(file);
1470out:
1471        return ret;
1472}
1473
1474static noinline int btrfs_ioctl_snap_create(struct file *file,
1475                                            void __user *arg, int subvol)
1476{
1477        struct btrfs_ioctl_vol_args *vol_args;
1478        int ret;
1479
1480        vol_args = memdup_user(arg, sizeof(*vol_args));
1481        if (IS_ERR(vol_args))
1482                return PTR_ERR(vol_args);
1483        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1484
1485        ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1486                                              vol_args->fd, subvol,
1487                                              NULL, false, NULL);
1488
1489        kfree(vol_args);
1490        return ret;
1491}
1492
1493static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1494                                               void __user *arg, int subvol)
1495{
1496        struct btrfs_ioctl_vol_args_v2 *vol_args;
1497        int ret;
1498        u64 transid = 0;
1499        u64 *ptr = NULL;
1500        bool readonly = false;
1501        struct btrfs_qgroup_inherit *inherit = NULL;
1502
1503        vol_args = memdup_user(arg, sizeof(*vol_args));
1504        if (IS_ERR(vol_args))
1505                return PTR_ERR(vol_args);
1506        vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
1507
1508        if (vol_args->flags &
1509            ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
1510              BTRFS_SUBVOL_QGROUP_INHERIT)) {
1511                ret = -EOPNOTSUPP;
1512                goto out;
1513        }
1514
1515        if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
1516                ptr = &transid;
1517        if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
1518                readonly = true;
1519        if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
1520                if (vol_args->size > PAGE_CACHE_SIZE) {
1521                        ret = -EINVAL;
1522                        goto out;
1523                }
1524                inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
1525                if (IS_ERR(inherit)) {
1526                        ret = PTR_ERR(inherit);
1527                        goto out;
1528                }
1529        }
1530
1531        ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1532                                              vol_args->fd, subvol, ptr,
1533                                              readonly, &inherit);
1534
1535        if (ret == 0 && ptr &&
1536            copy_to_user(arg +
1537                         offsetof(struct btrfs_ioctl_vol_args_v2,
1538                                  transid), ptr, sizeof(*ptr)))
1539                ret = -EFAULT;
1540out:
1541        kfree(vol_args);
1542        kfree(inherit);
1543        return ret;
1544}
1545
1546static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1547                                                void __user *arg)
1548{
1549        struct inode *inode = fdentry(file)->d_inode;
1550        struct btrfs_root *root = BTRFS_I(inode)->root;
1551        int ret = 0;
1552        u64 flags = 0;
1553
1554        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
1555                return -EINVAL;
1556
1557        down_read(&root->fs_info->subvol_sem);
1558        if (btrfs_root_readonly(root))
1559                flags |= BTRFS_SUBVOL_RDONLY;
1560        up_read(&root->fs_info->subvol_sem);
1561
1562        if (copy_to_user(arg, &flags, sizeof(flags)))
1563                ret = -EFAULT;
1564
1565        return ret;
1566}
1567
1568static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1569                                              void __user *arg)
1570{
1571        struct inode *inode = fdentry(file)->d_inode;
1572        struct btrfs_root *root = BTRFS_I(inode)->root;
1573        struct btrfs_trans_handle *trans;
1574        u64 root_flags;
1575        u64 flags;
1576        int ret = 0;
1577
1578        ret = mnt_want_write_file(file);
1579        if (ret)
1580                goto out;
1581
1582        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
1583                ret = -EINVAL;
1584                goto out_drop_write;
1585        }
1586
1587        if (copy_from_user(&flags, arg, sizeof(flags))) {
1588                ret = -EFAULT;
1589                goto out_drop_write;
1590        }
1591
1592        if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
1593                ret = -EINVAL;
1594                goto out_drop_write;
1595        }
1596
1597        if (flags & ~BTRFS_SUBVOL_RDONLY) {
1598                ret = -EOPNOTSUPP;
1599                goto out_drop_write;
1600        }
1601
1602        if (!inode_owner_or_capable(inode)) {
1603                ret = -EACCES;
1604                goto out_drop_write;
1605        }
1606
1607        down_write(&root->fs_info->subvol_sem);
1608
1609        /* nothing to do */
1610        if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
1611                goto out_drop_sem;
1612
1613        root_flags = btrfs_root_flags(&root->root_item);
1614        if (flags & BTRFS_SUBVOL_RDONLY)
1615                btrfs_set_root_flags(&root->root_item,
1616                                     root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
1617        else
1618                btrfs_set_root_flags(&root->root_item,
1619                                     root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1620
1621        trans = btrfs_start_transaction(root, 1);
1622        if (IS_ERR(trans)) {
1623                ret = PTR_ERR(trans);
1624                goto out_reset;
1625        }
1626
1627        ret = btrfs_update_root(trans, root->fs_info->tree_root,
1628                                &root->root_key, &root->root_item);
1629
1630        btrfs_commit_transaction(trans, root);
1631out_reset:
1632        if (ret)
1633                btrfs_set_root_flags(&root->root_item, root_flags);
1634out_drop_sem:
1635        up_write(&root->fs_info->subvol_sem);
1636out_drop_write:
1637        mnt_drop_write_file(file);
1638out:
1639        return ret;
1640}
1641
1642/*
1643 * helper to check if the subvolume references other subvolumes
1644 */
1645static noinline int may_destroy_subvol(struct btrfs_root *root)
1646{
1647        struct btrfs_path *path;
1648        struct btrfs_key key;
1649        int ret;
1650
1651        path = btrfs_alloc_path();
1652        if (!path)
1653                return -ENOMEM;
1654
1655        key.objectid = root->root_key.objectid;
1656        key.type = BTRFS_ROOT_REF_KEY;
1657        key.offset = (u64)-1;
1658
1659        ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
1660                                &key, path, 0, 0);
1661        if (ret < 0)
1662                goto out;
1663        BUG_ON(ret == 0);
1664
1665        ret = 0;
1666        if (path->slots[0] > 0) {
1667                path->slots[0]--;
1668                btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1669                if (key.objectid == root->root_key.objectid &&
1670                    key.type == BTRFS_ROOT_REF_KEY)
1671                        ret = -ENOTEMPTY;
1672        }
1673out:
1674        btrfs_free_path(path);
1675        return ret;
1676}
1677
1678static noinline int key_in_sk(struct btrfs_key *key,
1679                              struct btrfs_ioctl_search_key *sk)
1680{
1681        struct btrfs_key test;
1682        int ret;
1683
1684        test.objectid = sk->min_objectid;
1685        test.type = sk->min_type;
1686        test.offset = sk->min_offset;
1687
1688        ret = btrfs_comp_cpu_keys(key, &test);
1689        if (ret < 0)
1690                return 0;
1691
1692        test.objectid = sk->max_objectid;
1693        test.type = sk->max_type;
1694        test.offset = sk->max_offset;
1695
1696        ret = btrfs_comp_cpu_keys(key, &test);
1697        if (ret > 0)
1698                return 0;
1699        return 1;
1700}
1701
1702static noinline int copy_to_sk(struct btrfs_root *root,
1703                               struct btrfs_path *path,
1704                               struct btrfs_key *key,
1705                               struct btrfs_ioctl_search_key *sk,
1706                               char *buf,
1707                               unsigned long *sk_offset,
1708                               int *num_found)
1709{
1710        u64 found_transid;
1711        struct extent_buffer *leaf;
1712        struct btrfs_ioctl_search_header sh;
1713        unsigned long item_off;
1714        unsigned long item_len;
1715        int nritems;
1716        int i;
1717        int slot;
1718        int ret = 0;
1719
1720        leaf = path->nodes[0];
1721        slot = path->slots[0];
1722        nritems = btrfs_header_nritems(leaf);
1723
1724        if (btrfs_header_generation(leaf) > sk->max_transid) {
1725                i = nritems;
1726                goto advance_key;
1727        }
1728        found_transid = btrfs_header_generation(leaf);
1729
1730        for (i = slot; i < nritems; i++) {
1731                item_off = btrfs_item_ptr_offset(leaf, i);
1732                item_len = btrfs_item_size_nr(leaf, i);
1733
1734                if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
1735                        item_len = 0;
1736
1737                if (sizeof(sh) + item_len + *sk_offset >
1738                    BTRFS_SEARCH_ARGS_BUFSIZE) {
1739                        ret = 1;
1740                        goto overflow;
1741                }
1742
1743                btrfs_item_key_to_cpu(leaf, key, i);
1744                if (!key_in_sk(key, sk))
1745                        continue;
1746
1747                sh.objectid = key->objectid;
1748                sh.offset = key->offset;
1749                sh.type = key->type;
1750                sh.len = item_len;
1751                sh.transid = found_transid;
1752
1753                /* copy search result header */
1754                memcpy(buf + *sk_offset, &sh, sizeof(sh));
1755                *sk_offset += sizeof(sh);
1756
1757                if (item_len) {
1758                        char *p = buf + *sk_offset;
1759                        /* copy the item */
1760                        read_extent_buffer(leaf, p,
1761                                           item_off, item_len);
1762                        *sk_offset += item_len;
1763                }
1764                (*num_found)++;
1765
1766                if (*num_found >= sk->nr_items)
1767                        break;
1768        }
1769advance_key:
1770        ret = 0;
1771        if (key->offset < (u64)-1 && key->offset < sk->max_offset)
1772                key->offset++;
1773        else if (key->type < (u8)-1 && key->type < sk->max_type) {
1774                key->offset = 0;
1775                key->type++;
1776        } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
1777                key->offset = 0;
1778                key->type = 0;
1779                key->objectid++;
1780        } else
1781                ret = 1;
1782overflow:
1783        return ret;
1784}
1785
1786static noinline int search_ioctl(struct inode *inode,
1787                                 struct btrfs_ioctl_search_args *args)
1788{
1789        struct btrfs_root *root;
1790        struct btrfs_key key;
1791        struct btrfs_key max_key;
1792        struct btrfs_path *path;
1793        struct btrfs_ioctl_search_key *sk = &args->key;
1794        struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
1795        int ret;
1796        int num_found = 0;
1797        unsigned long sk_offset = 0;
1798
1799        path = btrfs_alloc_path();
1800        if (!path)
1801                return -ENOMEM;
1802
1803        if (sk->tree_id == 0) {
1804                /* search the root of the inode that was passed */
1805                root = BTRFS_I(inode)->root;
1806        } else {
1807                key.objectid = sk->tree_id;
1808                key.type = BTRFS_ROOT_ITEM_KEY;
1809                key.offset = (u64)-1;
1810                root = btrfs_read_fs_root_no_name(info, &key);
1811                if (IS_ERR(root)) {
1812                        printk(KERN_ERR "could not find root %llu\n",
1813                               sk->tree_id);
1814                        btrfs_free_path(path);
1815                        return -ENOENT;
1816                }
1817        }
1818
1819        key.objectid = sk->min_objectid;
1820        key.type = sk->min_type;
1821        key.offset = sk->min_offset;
1822
1823        max_key.objectid = sk->max_objectid;
1824        max_key.type = sk->max_type;
1825        max_key.offset = sk->max_offset;
1826
1827        path->keep_locks = 1;
1828
1829        while(1) {
1830                ret = btrfs_search_forward(root, &key, &max_key, path, 0,
1831                                           sk->min_transid);
1832                if (ret != 0) {
1833                        if (ret > 0)
1834                                ret = 0;
1835                        goto err;
1836                }
1837                ret = copy_to_sk(root, path, &key, sk, args->buf,
1838                                 &sk_offset, &num_found);
1839                btrfs_release_path(path);
1840                if (ret || num_found >= sk->nr_items)
1841                        break;
1842
1843        }
1844        ret = 0;
1845err:
1846        sk->nr_items = num_found;
1847        btrfs_free_path(path);
1848        return ret;
1849}
1850
1851static noinline int btrfs_ioctl_tree_search(struct file *file,
1852                                           void __user *argp)
1853{
1854         struct btrfs_ioctl_search_args *args;
1855         struct inode *inode;
1856         int ret;
1857
1858        if (!capable(CAP_SYS_ADMIN))
1859                return -EPERM;
1860
1861        args = memdup_user(argp, sizeof(*args));
1862        if (IS_ERR(args))
1863                return PTR_ERR(args);
1864
1865        inode = fdentry(file)->d_inode;
1866        ret = search_ioctl(inode, args);
1867        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1868                ret = -EFAULT;
1869        kfree(args);
1870        return ret;
1871}
1872
1873/*
1874 * Search INODE_REFs to identify path name of 'dirid' directory
1875 * in a 'tree_id' tree. and sets path name to 'name'.
1876 */
1877static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1878                                u64 tree_id, u64 dirid, char *name)
1879{
1880        struct btrfs_root *root;
1881        struct btrfs_key key;
1882        char *ptr;
1883        int ret = -1;
1884        int slot;
1885        int len;
1886        int total_len = 0;
1887        struct btrfs_inode_ref *iref;
1888        struct extent_buffer *l;
1889        struct btrfs_path *path;
1890
1891        if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1892                name[0]='\0';
1893                return 0;
1894        }
1895
1896        path = btrfs_alloc_path();
1897        if (!path)
1898                return -ENOMEM;
1899
1900        ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
1901
1902        key.objectid = tree_id;
1903        key.type = BTRFS_ROOT_ITEM_KEY;
1904        key.offset = (u64)-1;
1905        root = btrfs_read_fs_root_no_name(info, &key);
1906        if (IS_ERR(root)) {
1907                printk(KERN_ERR "could not find root %llu\n", tree_id);
1908                ret = -ENOENT;
1909                goto out;
1910        }
1911
1912        key.objectid = dirid;
1913        key.type = BTRFS_INODE_REF_KEY;
1914        key.offset = (u64)-1;
1915
1916        while(1) {
1917                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1918                if (ret < 0)
1919                        goto out;
1920
1921                l = path->nodes[0];
1922                slot = path->slots[0];
1923                if (ret > 0 && slot > 0)
1924                        slot--;
1925                btrfs_item_key_to_cpu(l, &key, slot);
1926
1927                if (ret > 0 && (key.objectid != dirid ||
1928                                key.type != BTRFS_INODE_REF_KEY)) {
1929                        ret = -ENOENT;
1930                        goto out;
1931                }
1932
1933                iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1934                len = btrfs_inode_ref_name_len(l, iref);
1935                ptr -= len + 1;
1936                total_len += len + 1;
1937                if (ptr < name)
1938                        goto out;
1939
1940                *(ptr + len) = '/';
1941                read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
1942
1943                if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1944                        break;
1945
1946                btrfs_release_path(path);
1947                key.objectid = key.offset;
1948                key.offset = (u64)-1;
1949                dirid = key.objectid;
1950        }
1951        if (ptr < name)
1952                goto out;
1953        memmove(name, ptr, total_len);
1954        name[total_len]='\0';
1955        ret = 0;
1956out:
1957        btrfs_free_path(path);
1958        return ret;
1959}
1960
1961static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1962                                           void __user *argp)
1963{
1964         struct btrfs_ioctl_ino_lookup_args *args;
1965         struct inode *inode;
1966         int ret;
1967
1968        if (!capable(CAP_SYS_ADMIN))
1969                return -EPERM;
1970
1971        args = memdup_user(argp, sizeof(*args));
1972        if (IS_ERR(args))
1973                return PTR_ERR(args);
1974
1975        inode = fdentry(file)->d_inode;
1976
1977        if (args->treeid == 0)
1978                args->treeid = BTRFS_I(inode)->root->root_key.objectid;
1979
1980        ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
1981                                        args->treeid, args->objectid,
1982                                        args->name);
1983
1984        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1985                ret = -EFAULT;
1986
1987        kfree(args);
1988        return ret;
1989}
1990
1991static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1992                                             void __user *arg)
1993{
1994        struct dentry *parent = fdentry(file);
1995        struct dentry *dentry;
1996        struct inode *dir = parent->d_inode;
1997        struct inode *inode;
1998        struct btrfs_root *root = BTRFS_I(dir)->root;
1999        struct btrfs_root *dest = NULL;
2000        struct btrfs_ioctl_vol_args *vol_args;
2001        struct btrfs_trans_handle *trans;
2002        int namelen;
2003        int ret;
2004        int err = 0;
2005
2006        vol_args = memdup_user(arg, sizeof(*vol_args));
2007        if (IS_ERR(vol_args))
2008                return PTR_ERR(vol_args);
2009
2010        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2011        namelen = strlen(vol_args->name);
2012        if (strchr(vol_args->name, '/') ||
2013            strncmp(vol_args->name, "..", namelen) == 0) {
2014                err = -EINVAL;
2015                goto out;
2016        }
2017
2018        err = mnt_want_write_file(file);
2019        if (err)
2020                goto out;
2021
2022        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
2023        dentry = lookup_one_len(vol_args->name, parent, namelen);
2024        if (IS_ERR(dentry)) {
2025                err = PTR_ERR(dentry);
2026                goto out_unlock_dir;
2027        }
2028
2029        if (!dentry->d_inode) {
2030                err = -ENOENT;
2031                goto out_dput;
2032        }
2033
2034        inode = dentry->d_inode;
2035        dest = BTRFS_I(inode)->root;
2036        if (!capable(CAP_SYS_ADMIN)){
2037                /*
2038                 * Regular user.  Only allow this with a special mount
2039                 * option, when the user has write+exec access to the
2040                 * subvol root, and when rmdir(2) would have been
2041                 * allowed.
2042                 *
2043                 * Note that this is _not_ check that the subvol is
2044                 * empty or doesn't contain data that we wouldn't
2045                 * otherwise be able to delete.
2046                 *
2047                 * Users who want to delete empty subvols should try
2048                 * rmdir(2).
2049                 */
2050                err = -EPERM;
2051                if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
2052                        goto out_dput;
2053
2054                /*
2055                 * Do not allow deletion if the parent dir is the same
2056                 * as the dir to be deleted.  That means the ioctl
2057                 * must be called on the dentry referencing the root
2058                 * of the subvol, not a random directory contained
2059                 * within it.
2060                 */
2061                err = -EINVAL;
2062                if (root == dest)
2063                        goto out_dput;
2064
2065                err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
2066                if (err)
2067                        goto out_dput;
2068
2069                /* check if subvolume may be deleted by a non-root user */
2070                err = btrfs_may_delete(dir, dentry, 1);
2071                if (err)
2072                        goto out_dput;
2073        }
2074
2075        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
2076                err = -EINVAL;
2077                goto out_dput;
2078        }
2079
2080        mutex_lock(&inode->i_mutex);
2081        err = d_invalidate(dentry);
2082        if (err)
2083                goto out_unlock;
2084
2085        down_write(&root->fs_info->subvol_sem);
2086
2087        err = may_destroy_subvol(dest);
2088        if (err)
2089                goto out_up_write;
2090
2091        trans = btrfs_start_transaction(root, 0);
2092        if (IS_ERR(trans)) {
2093                err = PTR_ERR(trans);
2094                goto out_up_write;
2095        }
2096        trans->block_rsv = &root->fs_info->global_block_rsv;
2097
2098        ret = btrfs_unlink_subvol(trans, root, dir,
2099                                dest->root_key.objectid,
2100                                dentry->d_name.name,
2101                                dentry->d_name.len);
2102        if (ret) {
2103                err = ret;
2104                btrfs_abort_transaction(trans, root, ret);
2105                goto out_end_trans;
2106        }
2107
2108        btrfs_record_root_in_trans(trans, dest);
2109
2110        memset(&dest->root_item.drop_progress, 0,
2111                sizeof(dest->root_item.drop_progress));
2112        dest->root_item.drop_level = 0;
2113        btrfs_set_root_refs(&dest->root_item, 0);
2114
2115        if (!xchg(&dest->orphan_item_inserted, 1)) {
2116                ret = btrfs_insert_orphan_item(trans,
2117                                        root->fs_info->tree_root,
2118                                        dest->root_key.objectid);
2119                if (ret) {
2120                        btrfs_abort_transaction(trans, root, ret);
2121                        err = ret;
2122                        goto out_end_trans;
2123                }
2124        }
2125out_end_trans:
2126        ret = btrfs_end_transaction(trans, root);
2127        if (ret && !err)
2128                err = ret;
2129        inode->i_flags |= S_DEAD;
2130out_up_write:
2131        up_write(&root->fs_info->subvol_sem);
2132out_unlock:
2133        mutex_unlock(&inode->i_mutex);
2134        if (!err) {
2135                shrink_dcache_sb(root->fs_info->sb);
2136                btrfs_invalidate_inodes(dest);
2137                d_delete(dentry);
2138        }
2139out_dput:
2140        dput(dentry);
2141out_unlock_dir:
2142        mutex_unlock(&dir->i_mutex);
2143        mnt_drop_write_file(file);
2144out:
2145        kfree(vol_args);
2146        return err;
2147}
2148
2149static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2150{
2151        struct inode *inode = fdentry(file)->d_inode;
2152        struct btrfs_root *root = BTRFS_I(inode)->root;
2153        struct btrfs_ioctl_defrag_range_args *range;
2154        int ret;
2155
2156        if (btrfs_root_readonly(root))
2157                return -EROFS;
2158
2159        ret = mnt_want_write_file(file);
2160        if (ret)
2161                return ret;
2162
2163        switch (inode->i_mode & S_IFMT) {
2164        case S_IFDIR:
2165                if (!capable(CAP_SYS_ADMIN)) {
2166                        ret = -EPERM;
2167                        goto out;
2168                }
2169                ret = btrfs_defrag_root(root, 0);
2170                if (ret)
2171                        goto out;
2172                ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
2173                break;
2174        case S_IFREG:
2175                if (!(file->f_mode & FMODE_WRITE)) {
2176                        ret = -EINVAL;
2177                        goto out;
2178                }
2179
2180                range = kzalloc(sizeof(*range), GFP_KERNEL);
2181                if (!range) {
2182                        ret = -ENOMEM;
2183                        goto out;
2184                }
2185
2186                if (argp) {
2187                        if (copy_from_user(range, argp,
2188                                           sizeof(*range))) {
2189                                ret = -EFAULT;
2190                                kfree(range);
2191                                goto out;
2192                        }
2193                        /* compression requires us to start the IO */
2194                        if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
2195                                range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
2196                                range->extent_thresh = (u32)-1;
2197                        }
2198                } else {
2199                        /* the rest are all set to zero by kzalloc */
2200                        range->len = (u64)-1;
2201                }
2202                ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
2203                                        range, 0, 0);
2204                if (ret > 0)
2205                        ret = 0;
2206                kfree(range);
2207                break;
2208        default:
2209                ret = -EINVAL;
2210        }
2211out:
2212        mnt_drop_write_file(file);
2213        return ret;
2214}
2215
2216static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2217{
2218        struct btrfs_ioctl_vol_args *vol_args;
2219        int ret;
2220
2221        if (!capable(CAP_SYS_ADMIN))
2222                return -EPERM;
2223
2224        mutex_lock(&root->fs_info->volume_mutex);
2225        if (root->fs_info->balance_ctl) {
2226                printk(KERN_INFO "btrfs: balance in progress\n");
2227                ret = -EINVAL;
2228                goto out;
2229        }
2230
2231        vol_args = memdup_user(arg, sizeof(*vol_args));
2232        if (IS_ERR(vol_args)) {
2233                ret = PTR_ERR(vol_args);
2234                goto out;
2235        }
2236
2237        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2238        ret = btrfs_init_new_device(root, vol_args->name);
2239
2240        kfree(vol_args);
2241out:
2242        mutex_unlock(&root->fs_info->volume_mutex);
2243        return ret;
2244}
2245
2246static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2247{
2248        struct btrfs_ioctl_vol_args *vol_args;
2249        int ret;
2250
2251        if (!capable(CAP_SYS_ADMIN))
2252                return -EPERM;
2253
2254        if (root->fs_info->sb->s_flags & MS_RDONLY)
2255                return -EROFS;
2256
2257        mutex_lock(&root->fs_info->volume_mutex);
2258        if (root->fs_info->balance_ctl) {
2259                printk(KERN_INFO "btrfs: balance in progress\n");
2260                ret = -EINVAL;
2261                goto out;
2262        }
2263
2264        vol_args = memdup_user(arg, sizeof(*vol_args));
2265        if (IS_ERR(vol_args)) {
2266                ret = PTR_ERR(vol_args);
2267                goto out;
2268        }
2269
2270        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
2271        ret = btrfs_rm_device(root, vol_args->name);
2272
2273        kfree(vol_args);
2274out:
2275        mutex_unlock(&root->fs_info->volume_mutex);
2276        return ret;
2277}
2278
2279static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2280{
2281        struct btrfs_ioctl_fs_info_args *fi_args;
2282        struct btrfs_device *device;
2283        struct btrfs_device *next;
2284        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2285        int ret = 0;
2286
2287        if (!capable(CAP_SYS_ADMIN))
2288                return -EPERM;
2289
2290        fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2291        if (!fi_args)
2292                return -ENOMEM;
2293
2294        fi_args->num_devices = fs_devices->num_devices;
2295        memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2296
2297        mutex_lock(&fs_devices->device_list_mutex);
2298        list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2299                if (device->devid > fi_args->max_id)
2300                        fi_args->max_id = device->devid;
2301        }
2302        mutex_unlock(&fs_devices->device_list_mutex);
2303
2304        if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2305                ret = -EFAULT;
2306
2307        kfree(fi_args);
2308        return ret;
2309}
2310
2311static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2312{
2313        struct btrfs_ioctl_dev_info_args *di_args;
2314        struct btrfs_device *dev;
2315        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2316        int ret = 0;
2317        char *s_uuid = NULL;
2318        char empty_uuid[BTRFS_UUID_SIZE] = {0};
2319
2320        if (!capable(CAP_SYS_ADMIN))
2321                return -EPERM;
2322
2323        di_args = memdup_user(arg, sizeof(*di_args));
2324        if (IS_ERR(di_args))
2325                return PTR_ERR(di_args);
2326
2327        if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
2328                s_uuid = di_args->uuid;
2329
2330        mutex_lock(&fs_devices->device_list_mutex);
2331        dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
2332        mutex_unlock(&fs_devices->device_list_mutex);
2333
2334        if (!dev) {
2335                ret = -ENODEV;
2336                goto out;
2337        }
2338
2339        di_args->devid = dev->devid;
2340        di_args->bytes_used = dev->bytes_used;
2341        di_args->total_bytes = dev->total_bytes;
2342        memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
2343        if (dev->name) {
2344                struct rcu_string *name;
2345
2346                rcu_read_lock();
2347                name = rcu_dereference(dev->name);
2348                strncpy(di_args->path, name->str, sizeof(di_args->path));
2349                rcu_read_unlock();
2350                di_args->path[sizeof(di_args->path) - 1] = 0;
2351        } else {
2352                di_args->path[0] = '\0';
2353        }
2354
2355out:
2356        if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2357                ret = -EFAULT;
2358
2359        kfree(di_args);
2360        return ret;
2361}
2362
2363static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2364                                       u64 off, u64 olen, u64 destoff)
2365{
2366        struct inode *inode = fdentry(file)->d_inode;
2367        struct btrfs_root *root = BTRFS_I(inode)->root;
2368        struct fd src_file;
2369        struct inode *src;
2370        struct btrfs_trans_handle *trans;
2371        struct btrfs_path *path;
2372        struct extent_buffer *leaf;
2373        char *buf;
2374        struct btrfs_key key;
2375        u32 nritems;
2376        int slot;
2377        int ret;
2378        u64 len = olen;
2379        u64 bs = root->fs_info->sb->s_blocksize;
2380
2381        /*
2382         * TODO:
2383         * - split compressed inline extents.  annoying: we need to
2384         *   decompress into destination's address_space (the file offset
2385         *   may change, so source mapping won't do), then recompress (or
2386         *   otherwise reinsert) a subrange.
2387         * - allow ranges within the same file to be cloned (provided
2388         *   they don't overlap)?
2389         */
2390
2391        /* the destination must be opened for writing */
2392        if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
2393                return -EINVAL;
2394
2395        if (btrfs_root_readonly(root))
2396                return -EROFS;
2397
2398        ret = mnt_want_write_file(file);
2399        if (ret)
2400                return ret;
2401
2402        src_file = fdget(srcfd);
2403        if (!src_file.file) {
2404                ret = -EBADF;
2405                goto out_drop_write;
2406        }
2407
2408        ret = -EXDEV;
2409        if (src_file.file->f_path.mnt != file->f_path.mnt)
2410                goto out_fput;
2411
2412        src = src_file.file->f_dentry->d_inode;
2413
2414        ret = -EINVAL;
2415        if (src == inode)
2416                goto out_fput;
2417
2418        /* the src must be open for reading */
2419        if (!(src_file.file->f_mode & FMODE_READ))
2420                goto out_fput;
2421
2422        /* don't make the dst file partly checksummed */
2423        if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
2424            (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
2425                goto out_fput;
2426
2427        ret = -EISDIR;
2428        if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
2429                goto out_fput;
2430
2431        ret = -EXDEV;
2432        if (src->i_sb != inode->i_sb)
2433                goto out_fput;
2434
2435        ret = -ENOMEM;
2436        buf = vmalloc(btrfs_level_size(root, 0));
2437        if (!buf)
2438                goto out_fput;
2439
2440        path = btrfs_alloc_path();
2441        if (!path) {
2442                vfree(buf);
2443                goto out_fput;
2444        }
2445        path->reada = 2;
2446
2447        if (inode < src) {
2448                mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
2449                mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
2450        } else {
2451                mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
2452                mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2453        }
2454
2455        /* determine range to clone */
2456        ret = -EINVAL;
2457        if (off + len > src->i_size || off + len < off)
2458                goto out_unlock;
2459        if (len == 0)
2460                olen = len = src->i_size - off;
2461        /* if we extend to eof, continue to block boundary */
2462        if (off + len == src->i_size)
2463                len = ALIGN(src->i_size, bs) - off;
2464
2465        /* verify the end result is block aligned */
2466        if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
2467            !IS_ALIGNED(destoff, bs))
2468                goto out_unlock;
2469
2470        if (destoff > inode->i_size) {
2471                ret = btrfs_cont_expand(inode, inode->i_size, destoff);
2472                if (ret)
2473                        goto out_unlock;
2474        }
2475
2476        /* truncate page cache pages from target inode range */
2477        truncate_inode_pages_range(&inode->i_data, destoff,
2478                                   PAGE_CACHE_ALIGN(destoff + len) - 1);
2479
2480        /* do any pending delalloc/csum calc on src, one way or
2481           another, and lock file content */
2482        while (1) {
2483                struct btrfs_ordered_extent *ordered;
2484                lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2485                ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1);
2486                if (!ordered &&
2487                    !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1,
2488                                    EXTENT_DELALLOC, 0, NULL))
2489                        break;
2490                unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2491                if (ordered)
2492                        btrfs_put_ordered_extent(ordered);
2493                btrfs_wait_ordered_range(src, off, len);
2494        }
2495
2496        /* clone data */
2497        key.objectid = btrfs_ino(src);
2498        key.type = BTRFS_EXTENT_DATA_KEY;
2499        key.offset = 0;
2500
2501        while (1) {
2502                /*
2503                 * note the key will change type as we walk through the
2504                 * tree.
2505                 */
2506                ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
2507                                0, 0);
2508                if (ret < 0)
2509                        goto out;
2510
2511                nritems = btrfs_header_nritems(path->nodes[0]);
2512                if (path->slots[0] >= nritems) {
2513                        ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
2514                        if (ret < 0)
2515                                goto out;
2516                        if (ret > 0)
2517                                break;
2518                        nritems = btrfs_header_nritems(path->nodes[0]);
2519                }
2520                leaf = path->nodes[0];
2521                slot = path->slots[0];
2522
2523                btrfs_item_key_to_cpu(leaf, &key, slot);
2524                if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
2525                    key.objectid != btrfs_ino(src))
2526                        break;
2527
2528                if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
2529                        struct btrfs_file_extent_item *extent;
2530                        int type;
2531                        u32 size;
2532                        struct btrfs_key new_key;
2533                        u64 disko = 0, diskl = 0;
2534                        u64 datao = 0, datal = 0;
2535                        u8 comp;
2536                        u64 endoff;
2537
2538                        size = btrfs_item_size_nr(leaf, slot);
2539                        read_extent_buffer(leaf, buf,
2540                                           btrfs_item_ptr_offset(leaf, slot),
2541                                           size);
2542
2543                        extent = btrfs_item_ptr(leaf, slot,
2544                                                struct btrfs_file_extent_item);
2545                        comp = btrfs_file_extent_compression(leaf, extent);
2546                        type = btrfs_file_extent_type(leaf, extent);
2547                        if (type == BTRFS_FILE_EXTENT_REG ||
2548                            type == BTRFS_FILE_EXTENT_PREALLOC) {
2549                                disko = btrfs_file_extent_disk_bytenr(leaf,
2550                                                                      extent);
2551                                diskl = btrfs_file_extent_disk_num_bytes(leaf,
2552                                                                 extent);
2553                                datao = btrfs_file_extent_offset(leaf, extent);
2554                                datal = btrfs_file_extent_num_bytes(leaf,
2555                                                                    extent);
2556                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2557                                /* take upper bound, may be compressed */
2558                                datal = btrfs_file_extent_ram_bytes(leaf,
2559                                                                    extent);
2560                        }
2561                        btrfs_release_path(path);
2562
2563                        if (key.offset + datal <= off ||
2564                            key.offset >= off + len - 1)
2565                                goto next;
2566
2567                        memcpy(&new_key, &key, sizeof(new_key));
2568                        new_key.objectid = btrfs_ino(inode);
2569                        if (off <= key.offset)
2570                                new_key.offset = key.offset + destoff - off;
2571                        else
2572                                new_key.offset = destoff;
2573
2574                        /*
2575                         * 1 - adjusting old extent (we may have to split it)
2576                         * 1 - add new extent
2577                         * 1 - inode update
2578                         */
2579                        trans = btrfs_start_transaction(root, 3);
2580                        if (IS_ERR(trans)) {
2581                                ret = PTR_ERR(trans);
2582                                goto out;
2583                        }
2584
2585                        if (type == BTRFS_FILE_EXTENT_REG ||
2586                            type == BTRFS_FILE_EXTENT_PREALLOC) {
2587                                /*
2588                                 *    a  | --- range to clone ---|  b
2589                                 * | ------------- extent ------------- |
2590                                 */
2591
2592                                /* substract range b */
2593                                if (key.offset + datal > off + len)
2594                                        datal = off + len - key.offset;
2595
2596                                /* substract range a */
2597                                if (off > key.offset) {
2598                                        datao += off - key.offset;
2599                                        datal -= off - key.offset;
2600                                }
2601
2602                                ret = btrfs_drop_extents(trans, root, inode,
2603                                                         new_key.offset,
2604                                                         new_key.offset + datal,
2605                                                         1);
2606                                if (ret) {
2607                                        btrfs_abort_transaction(trans, root,
2608                                                                ret);
2609                                        btrfs_end_transaction(trans, root);
2610                                        goto out;
2611                                }
2612
2613                                ret = btrfs_insert_empty_item(trans, root, path,
2614                                                              &new_key, size);
2615                                if (ret) {
2616                                        btrfs_abort_transaction(trans, root,
2617                                                                ret);
2618                                        btrfs_end_transaction(trans, root);
2619                                        goto out;
2620                                }
2621
2622                                leaf = path->nodes[0];
2623                                slot = path->slots[0];
2624                                write_extent_buffer(leaf, buf,
2625                                            btrfs_item_ptr_offset(leaf, slot),
2626                                            size);
2627
2628                                extent = btrfs_item_ptr(leaf, slot,
2629                                                struct btrfs_file_extent_item);
2630
2631                                /* disko == 0 means it's a hole */
2632                                if (!disko)
2633                                        datao = 0;
2634
2635                                btrfs_set_file_extent_offset(leaf, extent,
2636                                                             datao);
2637                                btrfs_set_file_extent_num_bytes(leaf, extent,
2638                                                                datal);
2639                                if (disko) {
2640                                        inode_add_bytes(inode, datal);
2641                                        ret = btrfs_inc_extent_ref(trans, root,
2642                                                        disko, diskl, 0,
2643                                                        root->root_key.objectid,
2644                                                        btrfs_ino(inode),
2645                                                        new_key.offset - datao,
2646                                                        0);
2647                                        if (ret) {
2648                                                btrfs_abort_transaction(trans,
2649                                                                        root,
2650                                                                        ret);
2651                                                btrfs_end_transaction(trans,
2652                                                                      root);
2653                                                goto out;
2654
2655                                        }
2656                                }
2657                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
2658                                u64 skip = 0;
2659                                u64 trim = 0;
2660                                if (off > key.offset) {
2661                                        skip = off - key.offset;
2662                                        new_key.offset += skip;
2663                                }
2664
2665                                if (key.offset + datal > off + len)
2666                                        trim = key.offset + datal - (off + len);
2667
2668                                if (comp && (skip || trim)) {
2669                                        ret = -EINVAL;
2670                                        btrfs_end_transaction(trans, root);
2671                                        goto out;
2672                                }
2673                                size -= skip + trim;
2674                                datal -= skip + trim;
2675
2676                                ret = btrfs_drop_extents(trans, root, inode,
2677                                                         new_key.offset,
2678                                                         new_key.offset + datal,
2679                                                         1);
2680                                if (ret) {
2681                                        btrfs_abort_transaction(trans, root,
2682                                                                ret);
2683                                        btrfs_end_transaction(trans, root);
2684                                        goto out;
2685                                }
2686
2687                                ret = btrfs_insert_empty_item(trans, root, path,
2688                                                              &new_key, size);
2689                                if (ret) {
2690                                        btrfs_abort_transaction(trans, root,
2691                                                                ret);
2692                                        btrfs_end_transaction(trans, root);
2693                                        goto out;
2694                                }
2695
2696                                if (skip) {
2697                                        u32 start =
2698                                          btrfs_file_extent_calc_inline_size(0);
2699                                        memmove(buf+start, buf+start+skip,
2700                                                datal);
2701                                }
2702
2703                                leaf = path->nodes[0];
2704                                slot = path->slots[0];
2705                                write_extent_buffer(leaf, buf,
2706                                            btrfs_item_ptr_offset(leaf, slot),
2707                                            size);
2708                                inode_add_bytes(inode, datal);
2709                        }
2710
2711                        btrfs_mark_buffer_dirty(leaf);
2712                        btrfs_release_path(path);
2713
2714                        inode_inc_iversion(inode);
2715                        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2716
2717                        /*
2718                         * we round up to the block size at eof when
2719                         * determining which extents to clone above,
2720                         * but shouldn't round up the file size
2721                         */
2722                        endoff = new_key.offset + datal;
2723                        if (endoff > destoff+olen)
2724                                endoff = destoff+olen;
2725                        if (endoff > inode->i_size)
2726                                btrfs_i_size_write(inode, endoff);
2727
2728                        ret = btrfs_update_inode(trans, root, inode);
2729                        if (ret) {
2730                                btrfs_abort_transaction(trans, root, ret);
2731                                btrfs_end_transaction(trans, root);
2732                                goto out;
2733                        }
2734                        ret = btrfs_end_transaction(trans, root);
2735                }
2736next:
2737                btrfs_release_path(path);
2738                key.offset++;
2739        }
2740        ret = 0;
2741out:
2742        btrfs_release_path(path);
2743        unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2744out_unlock:
2745        mutex_unlock(&src->i_mutex);
2746        mutex_unlock(&inode->i_mutex);
2747        vfree(buf);
2748        btrfs_free_path(path);
2749out_fput:
2750        fdput(src_file);
2751out_drop_write:
2752        mnt_drop_write_file(file);
2753        return ret;
2754}
2755
2756static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
2757{
2758        struct btrfs_ioctl_clone_range_args args;
2759
2760        if (copy_from_user(&args, argp, sizeof(args)))
2761                return -EFAULT;
2762        return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
2763                                 args.src_length, args.dest_offset);
2764}
2765
2766/*
2767 * there are many ways the trans_start and trans_end ioctls can lead
2768 * to deadlocks.  They should only be used by applications that
2769 * basically own the machine, and have a very in depth understanding
2770 * of all the possible deadlocks and enospc problems.
2771 */
2772static long btrfs_ioctl_trans_start(struct file *file)
2773{
2774        struct inode *inode = fdentry(file)->d_inode;
2775        struct btrfs_root *root = BTRFS_I(inode)->root;
2776        struct btrfs_trans_handle *trans;
2777        int ret;
2778
2779        ret = -EPERM;
2780        if (!capable(CAP_SYS_ADMIN))
2781                goto out;
2782
2783        ret = -EINPROGRESS;
2784        if (file->private_data)
2785                goto out;
2786
2787        ret = -EROFS;
2788        if (btrfs_root_readonly(root))
2789                goto out;
2790
2791        ret = mnt_want_write_file(file);
2792        if (ret)
2793                goto out;
2794
2795        atomic_inc(&root->fs_info->open_ioctl_trans);
2796
2797        ret = -ENOMEM;
2798        trans = btrfs_start_ioctl_transaction(root);
2799        if (IS_ERR(trans))
2800                goto out_drop;
2801
2802        file->private_data = trans;
2803        return 0;
2804
2805out_drop:
2806        atomic_dec(&root->fs_info->open_ioctl_trans);
2807        mnt_drop_write_file(file);
2808out:
2809        return ret;
2810}
2811
2812static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
2813{
2814        struct inode *inode = fdentry(file)->d_inode;
2815        struct btrfs_root *root = BTRFS_I(inode)->root;
2816        struct btrfs_root *new_root;
2817        struct btrfs_dir_item *di;
2818        struct btrfs_trans_handle *trans;
2819        struct btrfs_path *path;
2820        struct btrfs_key location;
2821        struct btrfs_disk_key disk_key;
2822        u64 objectid = 0;
2823        u64 dir_id;
2824
2825        if (!capable(CAP_SYS_ADMIN))
2826                return -EPERM;
2827
2828        if (copy_from_user(&objectid, argp, sizeof(objectid)))
2829                return -EFAULT;
2830
2831        if (!objectid)
2832                objectid = root->root_key.objectid;
2833
2834        location.objectid = objectid;
2835        location.type = BTRFS_ROOT_ITEM_KEY;
2836        location.offset = (u64)-1;
2837
2838        new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
2839        if (IS_ERR(new_root))
2840                return PTR_ERR(new_root);
2841
2842        if (btrfs_root_refs(&new_root->root_item) == 0)
2843                return -ENOENT;
2844
2845        path = btrfs_alloc_path();
2846        if (!path)
2847                return -ENOMEM;
2848        path->leave_spinning = 1;
2849
2850        trans = btrfs_start_transaction(root, 1);
2851        if (IS_ERR(trans)) {
2852                btrfs_free_path(path);
2853                return PTR_ERR(trans);
2854        }
2855
2856        dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
2857        di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
2858                                   dir_id, "default", 7, 1);
2859        if (IS_ERR_OR_NULL(di)) {
2860                btrfs_free_path(path);
2861                btrfs_end_transaction(trans, root);
2862                printk(KERN_ERR "Umm, you don't have the default dir item, "
2863                       "this isn't going to work\n");
2864                return -ENOENT;
2865        }
2866
2867        btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
2868        btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
2869        btrfs_mark_buffer_dirty(path->nodes[0]);
2870        btrfs_free_path(path);
2871
2872        btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
2873        btrfs_end_transaction(trans, root);
2874
2875        return 0;
2876}
2877
2878void btrfs_get_block_group_info(struct list_head *groups_list,
2879                                struct btrfs_ioctl_space_info *space)
2880{
2881        struct btrfs_block_group_cache *block_group;
2882
2883        space->total_bytes = 0;
2884        space->used_bytes = 0;
2885        space->flags = 0;
2886        list_for_each_entry(block_group, groups_list, list) {
2887                space->flags = block_group->flags;
2888                space->total_bytes += block_group->key.offset;
2889                space->used_bytes +=
2890                        btrfs_block_group_used(&block_group->item);
2891        }
2892}
2893
2894long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2895{
2896        struct btrfs_ioctl_space_args space_args;
2897        struct btrfs_ioctl_space_info space;
2898        struct btrfs_ioctl_space_info *dest;
2899        struct btrfs_ioctl_space_info *dest_orig;
2900        struct btrfs_ioctl_space_info __user *user_dest;
2901        struct btrfs_space_info *info;
2902        u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2903                       BTRFS_BLOCK_GROUP_SYSTEM,
2904                       BTRFS_BLOCK_GROUP_METADATA,
2905                       BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2906        int num_types = 4;
2907        int alloc_size;
2908        int ret = 0;
2909        u64 slot_count = 0;
2910        int i, c;
2911
2912        if (copy_from_user(&space_args,
2913                           (struct btrfs_ioctl_space_args __user *)arg,
2914                           sizeof(space_args)))
2915                return -EFAULT;
2916
2917        for (i = 0; i < num_types; i++) {
2918                struct btrfs_space_info *tmp;
2919
2920                info = NULL;
2921                rcu_read_lock();
2922                list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2923                                        list) {
2924                        if (tmp->flags == types[i]) {
2925                                info = tmp;
2926                                break;
2927                        }
2928                }
2929                rcu_read_unlock();
2930
2931                if (!info)
2932                        continue;
2933
2934                down_read(&info->groups_sem);
2935                for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2936                        if (!list_empty(&info->block_groups[c]))
2937                                slot_count++;
2938                }
2939                up_read(&info->groups_sem);
2940        }
2941
2942        /* space_slots == 0 means they are asking for a count */
2943        if (space_args.space_slots == 0) {
2944                space_args.total_spaces = slot_count;
2945                goto out;
2946        }
2947
2948        slot_count = min_t(u64, space_args.space_slots, slot_count);
2949
2950        alloc_size = sizeof(*dest) * slot_count;
2951
2952        /* we generally have at most 6 or so space infos, one for each raid
2953         * level.  So, a whole page should be more than enough for everyone
2954         */
2955        if (alloc_size > PAGE_CACHE_SIZE)
2956                return -ENOMEM;
2957
2958        space_args.total_spaces = 0;
2959        dest = kmalloc(alloc_size, GFP_NOFS);
2960        if (!dest)
2961                return -ENOMEM;
2962        dest_orig = dest;
2963
2964        /* now we have a buffer to copy into */
2965        for (i = 0; i < num_types; i++) {
2966                struct btrfs_space_info *tmp;
2967
2968                if (!slot_count)
2969                        break;
2970
2971                info = NULL;
2972                rcu_read_lock();
2973                list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2974                                        list) {
2975                        if (tmp->flags == types[i]) {
2976                                info = tmp;
2977                                break;
2978                        }
2979                }
2980                rcu_read_unlock();
2981
2982                if (!info)
2983                        continue;
2984                down_read(&info->groups_sem);
2985                for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2986                        if (!list_empty(&info->block_groups[c])) {
2987                                btrfs_get_block_group_info(
2988                                        &info->block_groups[c], &space);
2989                                memcpy(dest, &space, sizeof(space));
2990                                dest++;
2991                                space_args.total_spaces++;
2992                                slot_count--;
2993                        }
2994                        if (!slot_count)
2995                                break;
2996                }
2997                up_read(&info->groups_sem);
2998        }
2999
3000        user_dest = (struct btrfs_ioctl_space_info __user *)
3001                (arg + sizeof(struct btrfs_ioctl_space_args));
3002
3003        if (copy_to_user(user_dest, dest_orig, alloc_size))
3004                ret = -EFAULT;
3005
3006        kfree(dest_orig);
3007out:
3008        if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
3009                ret = -EFAULT;
3010
3011        return ret;
3012}
3013
3014/*
3015 * there are many ways the trans_start and trans_end ioctls can lead
3016 * to deadlocks.  They should only be used by applications that
3017 * basically own the machine, and have a very in depth understanding
3018 * of all the possible deadlocks and enospc problems.
3019 */
3020long btrfs_ioctl_trans_end(struct file *file)
3021{
3022        struct inode *inode = fdentry(file)->d_inode;
3023        struct btrfs_root *root = BTRFS_I(inode)->root;
3024        struct btrfs_trans_handle *trans;
3025
3026        trans = file->private_data;
3027        if (!trans)
3028                return -EINVAL;
3029        file->private_data = NULL;
3030
3031        btrfs_end_transaction(trans, root);
3032
3033        atomic_dec(&root->fs_info->open_ioctl_trans);
3034
3035        mnt_drop_write_file(file);
3036        return 0;
3037}
3038
3039static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
3040{
3041        struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
3042        struct btrfs_trans_handle *trans;
3043        u64 transid;
3044        int ret;
3045
3046        trans = btrfs_start_transaction(root, 0);
3047        if (IS_ERR(trans))
3048                return PTR_ERR(trans);
3049        transid = trans->transid;
3050        ret = btrfs_commit_transaction_async(trans, root, 0);
3051        if (ret) {
3052                btrfs_end_transaction(trans, root);
3053                return ret;
3054        }
3055
3056        if (argp)
3057                if (copy_to_user(argp, &transid, sizeof(transid)))
3058                        return -EFAULT;
3059        return 0;
3060}
3061
3062static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
3063{
3064        struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
3065        u64 transid;
3066
3067        if (argp) {
3068                if (copy_from_user(&transid, argp, sizeof(transid)))
3069                        return -EFAULT;
3070        } else {
3071                transid = 0;  /* current trans */
3072        }
3073        return btrfs_wait_for_commit(root, transid);
3074}
3075
3076static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
3077{
3078        int ret;
3079        struct btrfs_ioctl_scrub_args *sa;
3080
3081        if (!capable(CAP_SYS_ADMIN))
3082                return -EPERM;
3083
3084        sa = memdup_user(arg, sizeof(*sa));
3085        if (IS_ERR(sa))
3086                return PTR_ERR(sa);
3087
3088        ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
3089                              &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
3090
3091        if (copy_to_user(arg, sa, sizeof(*sa)))
3092                ret = -EFAULT;
3093
3094        kfree(sa);
3095        return ret;
3096}
3097
3098static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
3099{
3100        if (!capable(CAP_SYS_ADMIN))
3101                return -EPERM;
3102
3103        return btrfs_scrub_cancel(root);
3104}
3105
3106static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
3107                                       void __user *arg)
3108{
3109        struct btrfs_ioctl_scrub_args *sa;
3110        int ret;
3111
3112        if (!capable(CAP_SYS_ADMIN))
3113                return -EPERM;
3114
3115        sa = memdup_user(arg, sizeof(*sa));
3116        if (IS_ERR(sa))
3117                return PTR_ERR(sa);
3118
3119        ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
3120
3121        if (copy_to_user(arg, sa, sizeof(*sa)))
3122                ret = -EFAULT;
3123
3124        kfree(sa);
3125        return ret;
3126}
3127
3128static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
3129                                      void __user *arg)
3130{
3131        struct btrfs_ioctl_get_dev_stats *sa;
3132        int ret;
3133
3134        sa = memdup_user(arg, sizeof(*sa));
3135        if (IS_ERR(sa))
3136                return PTR_ERR(sa);
3137
3138        if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
3139                kfree(sa);
3140                return -EPERM;
3141        }
3142
3143        ret = btrfs_get_dev_stats(root, sa);
3144
3145        if (copy_to_user(arg, sa, sizeof(*sa)))
3146                ret = -EFAULT;
3147
3148        kfree(sa);
3149        return ret;
3150}
3151
3152static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
3153{
3154        int ret = 0;
3155        int i;
3156        u64 rel_ptr;
3157        int size;
3158        struct btrfs_ioctl_ino_path_args *ipa = NULL;
3159        struct inode_fs_paths *ipath = NULL;
3160        struct btrfs_path *path;
3161
3162        if (!capable(CAP_SYS_ADMIN))
3163                return -EPERM;
3164
3165        path = btrfs_alloc_path();
3166        if (!path) {
3167                ret = -ENOMEM;
3168                goto out;
3169        }
3170
3171        ipa = memdup_user(arg, sizeof(*ipa));
3172        if (IS_ERR(ipa)) {
3173                ret = PTR_ERR(ipa);
3174                ipa = NULL;
3175                goto out;
3176        }
3177
3178        size = min_t(u32, ipa->size, 4096);
3179        ipath = init_ipath(size, root, path);
3180        if (IS_ERR(ipath)) {
3181                ret = PTR_ERR(ipath);
3182                ipath = NULL;
3183                goto out;
3184        }
3185
3186        ret = paths_from_inode(ipa->inum, ipath);
3187        if (ret < 0)
3188                goto out;
3189
3190        for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
3191                rel_ptr = ipath->fspath->val[i] -
3192                          (u64)(unsigned long)ipath->fspath->val;
3193                ipath->fspath->val[i] = rel_ptr;
3194        }
3195
3196        ret = copy_to_user((void *)(unsigned long)ipa->fspath,
3197                           (void *)(unsigned long)ipath->fspath, size);
3198        if (ret) {
3199                ret = -EFAULT;
3200                goto out;
3201        }
3202
3203out:
3204        btrfs_free_path(path);
3205        free_ipath(ipath);
3206        kfree(ipa);
3207
3208        return ret;
3209}
3210
3211static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
3212{
3213        struct btrfs_data_container *inodes = ctx;
3214        const size_t c = 3 * sizeof(u64);
3215
3216        if (inodes->bytes_left >= c) {
3217                inodes->bytes_left -= c;
3218                inodes->val[inodes->elem_cnt] = inum;
3219                inodes->val[inodes->elem_cnt + 1] = offset;
3220                inodes->val[inodes->elem_cnt + 2] = root;
3221                inodes->elem_cnt += 3;
3222        } else {
3223                inodes->bytes_missing += c - inodes->bytes_left;
3224                inodes->bytes_left = 0;
3225                inodes->elem_missed += 3;
3226        }
3227
3228        return 0;
3229}
3230
3231static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
3232                                        void __user *arg)
3233{
3234        int ret = 0;
3235        int size;
3236        struct btrfs_ioctl_logical_ino_args *loi;
3237        struct btrfs_data_container *inodes = NULL;
3238        struct btrfs_path *path = NULL;
3239
3240        if (!capable(CAP_SYS_ADMIN))
3241                return -EPERM;
3242
3243        loi = memdup_user(arg, sizeof(*loi));
3244        if (IS_ERR(loi)) {
3245                ret = PTR_ERR(loi);
3246                loi = NULL;
3247                goto out;
3248        }
3249
3250        path = btrfs_alloc_path();
3251        if (!path) {
3252                ret = -ENOMEM;
3253                goto out;
3254        }
3255
3256        size = min_t(u32, loi->size, 64 * 1024);
3257        inodes = init_data_container(size);
3258        if (IS_ERR(inodes)) {
3259                ret = PTR_ERR(inodes);
3260                inodes = NULL;
3261                goto out;
3262        }
3263
3264        ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path,
3265                                          build_ino_list, inodes);
3266        if (ret == -EINVAL)
3267                ret = -ENOENT;
3268        if (ret < 0)
3269                goto out;
3270
3271        ret = copy_to_user((void *)(unsigned long)loi->inodes,
3272                           (void *)(unsigned long)inodes, size);
3273        if (ret)
3274                ret = -EFAULT;
3275
3276out:
3277        btrfs_free_path(path);
3278        vfree(inodes);
3279        kfree(loi);
3280
3281        return ret;
3282}
3283
3284void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3285                               struct btrfs_ioctl_balance_args *bargs)
3286{
3287        struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3288
3289        bargs->flags = bctl->flags;
3290
3291        if (atomic_read(&fs_info->balance_running))
3292                bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
3293        if (atomic_read(&fs_info->balance_pause_req))
3294                bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
3295        if (atomic_read(&fs_info->balance_cancel_req))
3296                bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
3297
3298        memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
3299        memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
3300        memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
3301
3302        if (lock) {
3303                spin_lock(&fs_info->balance_lock);
3304                memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3305                spin_unlock(&fs_info->balance_lock);
3306        } else {
3307                memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
3308        }
3309}
3310
3311static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3312{
3313        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3314        struct btrfs_fs_info *fs_info = root->fs_info;
3315        struct btrfs_ioctl_balance_args *bargs;
3316        struct btrfs_balance_control *bctl;
3317        int ret;
3318
3319        if (!capable(CAP_SYS_ADMIN))
3320                return -EPERM;
3321
3322        ret = mnt_want_write_file(file);
3323        if (ret)
3324                return ret;
3325
3326        mutex_lock(&fs_info->volume_mutex);
3327        mutex_lock(&fs_info->balance_mutex);
3328
3329        if (arg) {
3330                bargs = memdup_user(arg, sizeof(*bargs));
3331                if (IS_ERR(bargs)) {
3332                        ret = PTR_ERR(bargs);
3333                        goto out;
3334                }
3335
3336                if (bargs->flags & BTRFS_BALANCE_RESUME) {
3337                        if (!fs_info->balance_ctl) {
3338                                ret = -ENOTCONN;
3339                                goto out_bargs;
3340                        }
3341
3342                        bctl = fs_info->balance_ctl;
3343                        spin_lock(&fs_info->balance_lock);
3344                        bctl->flags |= BTRFS_BALANCE_RESUME;
3345                        spin_unlock(&fs_info->balance_lock);
3346
3347                        goto do_balance;
3348                }
3349        } else {
3350                bargs = NULL;
3351        }
3352
3353        if (fs_info->balance_ctl) {
3354                ret = -EINPROGRESS;
3355                goto out_bargs;
3356        }
3357
3358        bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3359        if (!bctl) {
3360                ret = -ENOMEM;
3361                goto out_bargs;
3362        }
3363
3364        bctl->fs_info = fs_info;
3365        if (arg) {
3366                memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
3367                memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
3368                memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
3369
3370                bctl->flags = bargs->flags;
3371        } else {
3372                /* balance everything - no filters */
3373                bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
3374        }
3375
3376do_balance:
3377        ret = btrfs_balance(bctl, bargs);
3378        /*
3379         * bctl is freed in __cancel_balance or in free_fs_info if
3380         * restriper was paused all the way until unmount
3381         */
3382        if (arg) {
3383                if (copy_to_user(arg, bargs, sizeof(*bargs)))
3384                        ret = -EFAULT;
3385        }
3386
3387out_bargs:
3388        kfree(bargs);
3389out:
3390        mutex_unlock(&fs_info->balance_mutex);
3391        mutex_unlock(&fs_info->volume_mutex);
3392        mnt_drop_write_file(file);
3393        return ret;
3394}
3395
3396static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
3397{
3398        if (!capable(CAP_SYS_ADMIN))
3399                return -EPERM;
3400
3401        switch (cmd) {
3402        case BTRFS_BALANCE_CTL_PAUSE:
3403                return btrfs_pause_balance(root->fs_info);
3404        case BTRFS_BALANCE_CTL_CANCEL:
3405                return btrfs_cancel_balance(root->fs_info);
3406        }
3407
3408        return -EINVAL;
3409}
3410
3411static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
3412                                         void __user *arg)
3413{
3414        struct btrfs_fs_info *fs_info = root->fs_info;
3415        struct btrfs_ioctl_balance_args *bargs;
3416        int ret = 0;
3417
3418        if (!capable(CAP_SYS_ADMIN))
3419                return -EPERM;
3420
3421        mutex_lock(&fs_info->balance_mutex);
3422        if (!fs_info->balance_ctl) {
3423                ret = -ENOTCONN;
3424                goto out;
3425        }
3426
3427        bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
3428        if (!bargs) {
3429                ret = -ENOMEM;
3430                goto out;
3431        }
3432
3433        update_ioctl_balance_args(fs_info, 1, bargs);
3434
3435        if (copy_to_user(arg, bargs, sizeof(*bargs)))
3436                ret = -EFAULT;
3437
3438        kfree(bargs);
3439out:
3440        mutex_unlock(&fs_info->balance_mutex);
3441        return ret;
3442}
3443
3444static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
3445{
3446        struct btrfs_ioctl_quota_ctl_args *sa;
3447        struct btrfs_trans_handle *trans = NULL;
3448        int ret;
3449        int err;
3450
3451        if (!capable(CAP_SYS_ADMIN))
3452                return -EPERM;
3453
3454        if (root->fs_info->sb->s_flags & MS_RDONLY)
3455                return -EROFS;
3456
3457        sa = memdup_user(arg, sizeof(*sa));
3458        if (IS_ERR(sa))
3459                return PTR_ERR(sa);
3460
3461        if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
3462                trans = btrfs_start_transaction(root, 2);
3463                if (IS_ERR(trans)) {
3464                        ret = PTR_ERR(trans);
3465                        goto out;
3466                }
3467        }
3468
3469        switch (sa->cmd) {
3470        case BTRFS_QUOTA_CTL_ENABLE:
3471                ret = btrfs_quota_enable(trans, root->fs_info);
3472                break;
3473        case BTRFS_QUOTA_CTL_DISABLE:
3474                ret = btrfs_quota_disable(trans, root->fs_info);
3475                break;
3476        case BTRFS_QUOTA_CTL_RESCAN:
3477                ret = btrfs_quota_rescan(root->fs_info);
3478                break;
3479        default:
3480                ret = -EINVAL;
3481                break;
3482        }
3483
3484        if (copy_to_user(arg, sa, sizeof(*sa)))
3485                ret = -EFAULT;
3486
3487        if (trans) {
3488                err = btrfs_commit_transaction(trans, root);
3489                if (err && !ret)
3490                        ret = err;
3491        }
3492
3493out:
3494        kfree(sa);
3495        return ret;
3496}
3497
3498static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
3499{
3500        struct btrfs_ioctl_qgroup_assign_args *sa;
3501        struct btrfs_trans_handle *trans;
3502        int ret;
3503        int err;
3504
3505        if (!capable(CAP_SYS_ADMIN))
3506                return -EPERM;
3507
3508        if (root->fs_info->sb->s_flags & MS_RDONLY)
3509                return -EROFS;
3510
3511        sa = memdup_user(arg, sizeof(*sa));
3512        if (IS_ERR(sa))
3513                return PTR_ERR(sa);
3514
3515        trans = btrfs_join_transaction(root);
3516        if (IS_ERR(trans)) {
3517                ret = PTR_ERR(trans);
3518                goto out;
3519        }
3520
3521        /* FIXME: check if the IDs really exist */
3522        if (sa->assign) {
3523                ret = btrfs_add_qgroup_relation(trans, root->fs_info,
3524                                                sa->src, sa->dst);
3525        } else {
3526                ret = btrfs_del_qgroup_relation(trans, root->fs_info,
3527                                                sa->src, sa->dst);
3528        }
3529
3530        err = btrfs_end_transaction(trans, root);
3531        if (err && !ret)
3532                ret = err;
3533
3534out:
3535        kfree(sa);
3536        return ret;
3537}
3538
3539static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
3540{
3541        struct btrfs_ioctl_qgroup_create_args *sa;
3542        struct btrfs_trans_handle *trans;
3543        int ret;
3544        int err;
3545
3546        if (!capable(CAP_SYS_ADMIN))
3547                return -EPERM;
3548
3549        if (root->fs_info->sb->s_flags & MS_RDONLY)
3550                return -EROFS;
3551
3552        sa = memdup_user(arg, sizeof(*sa));
3553        if (IS_ERR(sa))
3554                return PTR_ERR(sa);
3555
3556        trans = btrfs_join_transaction(root);
3557        if (IS_ERR(trans)) {
3558                ret = PTR_ERR(trans);
3559                goto out;
3560        }
3561
3562        /* FIXME: check if the IDs really exist */
3563        if (sa->create) {
3564                ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
3565                                          NULL);
3566        } else {
3567                ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
3568        }
3569
3570        err = btrfs_end_transaction(trans, root);
3571        if (err && !ret)
3572                ret = err;
3573
3574out:
3575        kfree(sa);
3576        return ret;
3577}
3578
3579static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
3580{
3581        struct btrfs_ioctl_qgroup_limit_args *sa;
3582        struct btrfs_trans_handle *trans;
3583        int ret;
3584        int err;
3585        u64 qgroupid;
3586
3587        if (!capable(CAP_SYS_ADMIN))
3588                return -EPERM;
3589
3590        if (root->fs_info->sb->s_flags & MS_RDONLY)
3591                return -EROFS;
3592
3593        sa = memdup_user(arg, sizeof(*sa));
3594        if (IS_ERR(sa))
3595                return PTR_ERR(sa);
3596
3597        trans = btrfs_join_transaction(root);
3598        if (IS_ERR(trans)) {
3599                ret = PTR_ERR(trans);
3600                goto out;
3601        }
3602
3603        qgroupid = sa->qgroupid;
3604        if (!qgroupid) {
3605                /* take the current subvol as qgroup */
3606                qgroupid = root->root_key.objectid;
3607        }
3608
3609        /* FIXME: check if the IDs really exist */
3610        ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
3611
3612        err = btrfs_end_transaction(trans, root);
3613        if (err && !ret)
3614                ret = err;
3615
3616out:
3617        kfree(sa);
3618        return ret;
3619}
3620
3621static long btrfs_ioctl_set_received_subvol(struct file *file,
3622                                            void __user *arg)
3623{
3624        struct btrfs_ioctl_received_subvol_args *sa = NULL;
3625        struct inode *inode = fdentry(file)->d_inode;
3626        struct btrfs_root *root = BTRFS_I(inode)->root;
3627        struct btrfs_root_item *root_item = &root->root_item;
3628        struct btrfs_trans_handle *trans;
3629        struct timespec ct = CURRENT_TIME;
3630        int ret = 0;
3631
3632        ret = mnt_want_write_file(file);
3633        if (ret < 0)
3634                return ret;
3635
3636        down_write(&root->fs_info->subvol_sem);
3637
3638        if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
3639                ret = -EINVAL;
3640                goto out;
3641        }
3642
3643        if (btrfs_root_readonly(root)) {
3644                ret = -EROFS;
3645                goto out;
3646        }
3647
3648        if (!inode_owner_or_capable(inode)) {
3649                ret = -EACCES;
3650                goto out;
3651        }
3652
3653        sa = memdup_user(arg, sizeof(*sa));
3654        if (IS_ERR(sa)) {
3655                ret = PTR_ERR(sa);
3656                sa = NULL;
3657                goto out;
3658        }
3659
3660        trans = btrfs_start_transaction(root, 1);
3661        if (IS_ERR(trans)) {
3662                ret = PTR_ERR(trans);
3663                trans = NULL;
3664                goto out;
3665        }
3666
3667        sa->rtransid = trans->transid;
3668        sa->rtime.sec = ct.tv_sec;
3669        sa->rtime.nsec = ct.tv_nsec;
3670
3671        memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
3672        btrfs_set_root_stransid(root_item, sa->stransid);
3673        btrfs_set_root_rtransid(root_item, sa->rtransid);
3674        root_item->stime.sec = cpu_to_le64(sa->stime.sec);
3675        root_item->stime.nsec = cpu_to_le32(sa->stime.nsec);
3676        root_item->rtime.sec = cpu_to_le64(sa->rtime.sec);
3677        root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec);
3678
3679        ret = btrfs_update_root(trans, root->fs_info->tree_root,
3680                                &root->root_key, &root->root_item);
3681        if (ret < 0) {
3682                btrfs_end_transaction(trans, root);
3683                trans = NULL;
3684                goto out;
3685        } else {
3686                ret = btrfs_commit_transaction(trans, root);
3687                if (ret < 0)
3688                        goto out;
3689        }
3690
3691        ret = copy_to_user(arg, sa, sizeof(*sa));
3692        if (ret)
3693                ret = -EFAULT;
3694
3695out:
3696        kfree(sa);
3697        up_write(&root->fs_info->subvol_sem);
3698        mnt_drop_write_file(file);
3699        return ret;
3700}
3701
3702long btrfs_ioctl(struct file *file, unsigned int
3703                cmd, unsigned long arg)
3704{
3705        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3706        void __user *argp = (void __user *)arg;
3707
3708        switch (cmd) {
3709        case FS_IOC_GETFLAGS:
3710                return btrfs_ioctl_getflags(file, argp);
3711        case FS_IOC_SETFLAGS:
3712                return btrfs_ioctl_setflags(file, argp);
3713        case FS_IOC_GETVERSION:
3714                return btrfs_ioctl_getversion(file, argp);
3715        case FITRIM:
3716                return btrfs_ioctl_fitrim(file, argp);
3717        case BTRFS_IOC_SNAP_CREATE:
3718                return btrfs_ioctl_snap_create(file, argp, 0);
3719        case BTRFS_IOC_SNAP_CREATE_V2:
3720                return btrfs_ioctl_snap_create_v2(file, argp, 0);
3721        case BTRFS_IOC_SUBVOL_CREATE:
3722                return btrfs_ioctl_snap_create(file, argp, 1);
3723        case BTRFS_IOC_SUBVOL_CREATE_V2:
3724                return btrfs_ioctl_snap_create_v2(file, argp, 1);
3725        case BTRFS_IOC_SNAP_DESTROY:
3726                return btrfs_ioctl_snap_destroy(file, argp);
3727        case BTRFS_IOC_SUBVOL_GETFLAGS:
3728                return btrfs_ioctl_subvol_getflags(file, argp);
3729        case BTRFS_IOC_SUBVOL_SETFLAGS:
3730                return btrfs_ioctl_subvol_setflags(file, argp);
3731        case BTRFS_IOC_DEFAULT_SUBVOL:
3732                return btrfs_ioctl_default_subvol(file, argp);
3733        case BTRFS_IOC_DEFRAG:
3734                return btrfs_ioctl_defrag(file, NULL);
3735        case BTRFS_IOC_DEFRAG_RANGE:
3736                return btrfs_ioctl_defrag(file, argp);
3737        case BTRFS_IOC_RESIZE:
3738                return btrfs_ioctl_resize(root, argp);
3739        case BTRFS_IOC_ADD_DEV:
3740                return btrfs_ioctl_add_dev(root, argp);
3741        case BTRFS_IOC_RM_DEV:
3742                return btrfs_ioctl_rm_dev(root, argp);
3743        case BTRFS_IOC_FS_INFO:
3744                return btrfs_ioctl_fs_info(root, argp);
3745        case BTRFS_IOC_DEV_INFO:
3746                return btrfs_ioctl_dev_info(root, argp);
3747        case BTRFS_IOC_BALANCE:
3748                return btrfs_ioctl_balance(file, NULL);
3749        case BTRFS_IOC_CLONE:
3750                return btrfs_ioctl_clone(file, arg, 0, 0, 0);
3751        case BTRFS_IOC_CLONE_RANGE:
3752                return btrfs_ioctl_clone_range(file, argp);
3753        case BTRFS_IOC_TRANS_START:
3754                return btrfs_ioctl_trans_start(file);
3755        case BTRFS_IOC_TRANS_END:
3756                return btrfs_ioctl_trans_end(file);
3757        case BTRFS_IOC_TREE_SEARCH:
3758                return btrfs_ioctl_tree_search(file, argp);
3759        case BTRFS_IOC_INO_LOOKUP:
3760                return btrfs_ioctl_ino_lookup(file, argp);
3761        case BTRFS_IOC_INO_PATHS:
3762                return btrfs_ioctl_ino_to_path(root, argp);
3763        case BTRFS_IOC_LOGICAL_INO:
3764                return btrfs_ioctl_logical_to_ino(root, argp);
3765        case BTRFS_IOC_SPACE_INFO:
3766                return btrfs_ioctl_space_info(root, argp);
3767        case BTRFS_IOC_SYNC:
3768                btrfs_sync_fs(file->f_dentry->d_sb, 1);
3769                return 0;
3770        case BTRFS_IOC_START_SYNC:
3771                return btrfs_ioctl_start_sync(file, argp);
3772        case BTRFS_IOC_WAIT_SYNC:
3773                return btrfs_ioctl_wait_sync(file, argp);
3774        case BTRFS_IOC_SCRUB:
3775                return btrfs_ioctl_scrub(root, argp);
3776        case BTRFS_IOC_SCRUB_CANCEL:
3777                return btrfs_ioctl_scrub_cancel(root, argp);
3778        case BTRFS_IOC_SCRUB_PROGRESS:
3779                return btrfs_ioctl_scrub_progress(root, argp);
3780        case BTRFS_IOC_BALANCE_V2:
3781                return btrfs_ioctl_balance(file, argp);
3782        case BTRFS_IOC_BALANCE_CTL:
3783                return btrfs_ioctl_balance_ctl(root, arg);
3784        case BTRFS_IOC_BALANCE_PROGRESS:
3785                return btrfs_ioctl_balance_progress(root, argp);
3786        case BTRFS_IOC_SET_RECEIVED_SUBVOL:
3787                return btrfs_ioctl_set_received_subvol(file, argp);
3788        case BTRFS_IOC_SEND:
3789                return btrfs_ioctl_send(file, argp);
3790        case BTRFS_IOC_GET_DEV_STATS:
3791                return btrfs_ioctl_get_dev_stats(root, argp);
3792        case BTRFS_IOC_QUOTA_CTL:
3793                return btrfs_ioctl_quota_ctl(root, argp);
3794        case BTRFS_IOC_QGROUP_ASSIGN:
3795                return btrfs_ioctl_qgroup_assign(root, argp);
3796        case BTRFS_IOC_QGROUP_CREATE:
3797                return btrfs_ioctl_qgroup_create(root, argp);
3798        case BTRFS_IOC_QGROUP_LIMIT:
3799                return btrfs_ioctl_qgroup_limit(root, argp);
3800        }
3801
3802        return -ENOTTY;
3803}
3804
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.