linux/fs/overlayfs/copy_up.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *
   4 * Copyright (C) 2011 Novell Inc.
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/fs.h>
   9#include <linux/slab.h>
  10#include <linux/file.h>
  11#include <linux/splice.h>
  12#include <linux/xattr.h>
  13#include <linux/security.h>
  14#include <linux/uaccess.h>
  15#include <linux/sched/signal.h>
  16#include <linux/cred.h>
  17#include <linux/namei.h>
  18#include <linux/fdtable.h>
  19#include <linux/ratelimit.h>
  20#include <linux/exportfs.h>
  21#include "overlayfs.h"
  22
  23#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  24
  25static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
  26{
  27        pr_warn("\"check_copy_up\" module option is obsolete\n");
  28        return 0;
  29}
  30
  31static int ovl_ccup_get(char *buf, const struct kernel_param *param)
  32{
  33        return sprintf(buf, "N\n");
  34}
  35
  36module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
  37MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing");
  38
  39static bool ovl_must_copy_xattr(const char *name)
  40{
  41        return !strcmp(name, XATTR_POSIX_ACL_ACCESS) ||
  42               !strcmp(name, XATTR_POSIX_ACL_DEFAULT) ||
  43               !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN);
  44}
  45
  46int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
  47                   struct dentry *new)
  48{
  49        ssize_t list_size, size, value_size = 0;
  50        char *buf, *name, *value = NULL;
  51        int error = 0;
  52        size_t slen;
  53
  54        if (!(old->d_inode->i_opflags & IOP_XATTR) ||
  55            !(new->d_inode->i_opflags & IOP_XATTR))
  56                return 0;
  57
  58        list_size = vfs_listxattr(old, NULL, 0);
  59        if (list_size <= 0) {
  60                if (list_size == -EOPNOTSUPP)
  61                        return 0;
  62                return list_size;
  63        }
  64
  65        buf = kzalloc(list_size, GFP_KERNEL);
  66        if (!buf)
  67                return -ENOMEM;
  68
  69        list_size = vfs_listxattr(old, buf, list_size);
  70        if (list_size <= 0) {
  71                error = list_size;
  72                goto out;
  73        }
  74
  75        for (name = buf; list_size; name += slen) {
  76                slen = strnlen(name, list_size) + 1;
  77
  78                /* underlying fs providing us with an broken xattr list? */
  79                if (WARN_ON(slen > list_size)) {
  80                        error = -EIO;
  81                        break;
  82                }
  83                list_size -= slen;
  84
  85                if (ovl_is_private_xattr(sb, name))
  86                        continue;
  87
  88                error = security_inode_copy_up_xattr(name);
  89                if (error < 0 && error != -EOPNOTSUPP)
  90                        break;
  91                if (error == 1) {
  92                        error = 0;
  93                        continue; /* Discard */
  94                }
  95retry:
  96                size = vfs_getxattr(&init_user_ns, old, name, value, value_size);
  97                if (size == -ERANGE)
  98                        size = vfs_getxattr(&init_user_ns, old, name, NULL, 0);
  99
 100                if (size < 0) {
 101                        error = size;
 102                        break;
 103                }
 104
 105                if (size > value_size) {
 106                        void *new;
 107
 108                        new = krealloc(value, size, GFP_KERNEL);
 109                        if (!new) {
 110                                error = -ENOMEM;
 111                                break;
 112                        }
 113                        value = new;
 114                        value_size = size;
 115                        goto retry;
 116                }
 117
 118                error = vfs_setxattr(&init_user_ns, new, name, value, size, 0);
 119                if (error) {
 120                        if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
 121                                break;
 122
 123                        /* Ignore failure to copy unknown xattrs */
 124                        error = 0;
 125                }
 126        }
 127        kfree(value);
 128out:
 129        kfree(buf);
 130        return error;
 131}
 132
 133static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old,
 134                            struct path *new, loff_t len)
 135{
 136        struct file *old_file;
 137        struct file *new_file;
 138        loff_t old_pos = 0;
 139        loff_t new_pos = 0;
 140        loff_t cloned;
 141        loff_t data_pos = -1;
 142        loff_t hole_len;
 143        bool skip_hole = false;
 144        int error = 0;
 145
 146        if (len == 0)
 147                return 0;
 148
 149        old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
 150        if (IS_ERR(old_file))
 151                return PTR_ERR(old_file);
 152
 153        new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
 154        if (IS_ERR(new_file)) {
 155                error = PTR_ERR(new_file);
 156                goto out_fput;
 157        }
 158
 159        /* Try to use clone_file_range to clone up within the same fs */
 160        cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
 161        if (cloned == len)
 162                goto out;
 163        /* Couldn't clone, so now we try to copy the data */
 164
 165        /* Check if lower fs supports seek operation */
 166        if (old_file->f_mode & FMODE_LSEEK &&
 167            old_file->f_op->llseek)
 168                skip_hole = true;
 169
 170        while (len) {
 171                size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
 172                long bytes;
 173
 174                if (len < this_len)
 175                        this_len = len;
 176
 177                if (signal_pending_state(TASK_KILLABLE, current)) {
 178                        error = -EINTR;
 179                        break;
 180                }
 181
 182                /*
 183                 * Fill zero for hole will cost unnecessary disk space
 184                 * and meanwhile slow down the copy-up speed, so we do
 185                 * an optimization for hole during copy-up, it relies
 186                 * on SEEK_DATA implementation in lower fs so if lower
 187                 * fs does not support it, copy-up will behave as before.
 188                 *
 189                 * Detail logic of hole detection as below:
 190                 * When we detect next data position is larger than current
 191                 * position we will skip that hole, otherwise we copy
 192                 * data in the size of OVL_COPY_UP_CHUNK_SIZE. Actually,
 193                 * it may not recognize all kind of holes and sometimes
 194                 * only skips partial of hole area. However, it will be
 195                 * enough for most of the use cases.
 196                 */
 197
 198                if (skip_hole && data_pos < old_pos) {
 199                        data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
 200                        if (data_pos > old_pos) {
 201                                hole_len = data_pos - old_pos;
 202                                len -= hole_len;
 203                                old_pos = new_pos = data_pos;
 204                                continue;
 205                        } else if (data_pos == -ENXIO) {
 206                                break;
 207                        } else if (data_pos < 0) {
 208                                skip_hole = false;
 209                        }
 210                }
 211
 212                bytes = do_splice_direct(old_file, &old_pos,
 213                                         new_file, &new_pos,
 214                                         this_len, SPLICE_F_MOVE);
 215                if (bytes <= 0) {
 216                        error = bytes;
 217                        break;
 218                }
 219                WARN_ON(old_pos != new_pos);
 220
 221                len -= bytes;
 222        }
 223out:
 224        if (!error && ovl_should_sync(ofs))
 225                error = vfs_fsync(new_file, 0);
 226        fput(new_file);
 227out_fput:
 228        fput(old_file);
 229        return error;
 230}
 231
 232static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
 233{
 234        struct iattr attr = {
 235                .ia_valid = ATTR_SIZE,
 236                .ia_size = stat->size,
 237        };
 238
 239        return notify_change(&init_user_ns, upperdentry, &attr, NULL);
 240}
 241
 242static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
 243{
 244        struct iattr attr = {
 245                .ia_valid =
 246                     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
 247                .ia_atime = stat->atime,
 248                .ia_mtime = stat->mtime,
 249        };
 250
 251        return notify_change(&init_user_ns, upperdentry, &attr, NULL);
 252}
 253
 254int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 255{
 256        int err = 0;
 257
 258        if (!S_ISLNK(stat->mode)) {
 259                struct iattr attr = {
 260                        .ia_valid = ATTR_MODE,
 261                        .ia_mode = stat->mode,
 262                };
 263                err = notify_change(&init_user_ns, upperdentry, &attr, NULL);
 264        }
 265        if (!err) {
 266                struct iattr attr = {
 267                        .ia_valid = ATTR_UID | ATTR_GID,
 268                        .ia_uid = stat->uid,
 269                        .ia_gid = stat->gid,
 270                };
 271                err = notify_change(&init_user_ns, upperdentry, &attr, NULL);
 272        }
 273        if (!err)
 274                ovl_set_timestamps(upperdentry, stat);
 275
 276        return err;
 277}
 278
 279struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
 280                                  bool is_upper)
 281{
 282        struct ovl_fh *fh;
 283        int fh_type, dwords;
 284        int buflen = MAX_HANDLE_SZ;
 285        uuid_t *uuid = &real->d_sb->s_uuid;
 286        int err;
 287
 288        /* Make sure the real fid stays 32bit aligned */
 289        BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
 290        BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
 291
 292        fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
 293        if (!fh)
 294                return ERR_PTR(-ENOMEM);
 295
 296        /*
 297         * We encode a non-connectable file handle for non-dir, because we
 298         * only need to find the lower inode number and we don't want to pay
 299         * the price or reconnecting the dentry.
 300         */
 301        dwords = buflen >> 2;
 302        fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
 303        buflen = (dwords << 2);
 304
 305        err = -EIO;
 306        if (WARN_ON(fh_type < 0) ||
 307            WARN_ON(buflen > MAX_HANDLE_SZ) ||
 308            WARN_ON(fh_type == FILEID_INVALID))
 309                goto out_err;
 310
 311        fh->fb.version = OVL_FH_VERSION;
 312        fh->fb.magic = OVL_FH_MAGIC;
 313        fh->fb.type = fh_type;
 314        fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
 315        /*
 316         * When we will want to decode an overlay dentry from this handle
 317         * and all layers are on the same fs, if we get a disconncted real
 318         * dentry when we decode fid, the only way to tell if we should assign
 319         * it to upperdentry or to lowerstack is by checking this flag.
 320         */
 321        if (is_upper)
 322                fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
 323        fh->fb.len = sizeof(fh->fb) + buflen;
 324        if (ofs->config.uuid)
 325                fh->fb.uuid = *uuid;
 326
 327        return fh;
 328
 329out_err:
 330        kfree(fh);
 331        return ERR_PTR(err);
 332}
 333
 334int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
 335                   struct dentry *lower, struct dentry *upper)
 336{
 337        const struct ovl_fh *fh = NULL;
 338        int err;
 339
 340        /*
 341         * When lower layer doesn't support export operations store a 'null' fh,
 342         * so we can use the overlay.origin xattr to distignuish between a copy
 343         * up and a pure upper inode.
 344         */
 345        if (ovl_can_decode_fh(lower->d_sb)) {
 346                fh = ovl_encode_real_fh(ofs, lower, false);
 347                if (IS_ERR(fh))
 348                        return PTR_ERR(fh);
 349        }
 350
 351        /*
 352         * Do not fail when upper doesn't support xattrs.
 353         */
 354        err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
 355                                 fh ? fh->fb.len : 0, 0);
 356        kfree(fh);
 357
 358        /* Ignore -EPERM from setting "user.*" on symlink/special */
 359        return err == -EPERM ? 0 : err;
 360}
 361
 362/* Store file handle of @upper dir in @index dir entry */
 363static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
 364                            struct dentry *index)
 365{
 366        const struct ovl_fh *fh;
 367        int err;
 368
 369        fh = ovl_encode_real_fh(ofs, upper, true);
 370        if (IS_ERR(fh))
 371                return PTR_ERR(fh);
 372
 373        err = ovl_do_setxattr(ofs, index, OVL_XATTR_UPPER, fh->buf, fh->fb.len);
 374
 375        kfree(fh);
 376        return err;
 377}
 378
 379/*
 380 * Create and install index entry.
 381 *
 382 * Caller must hold i_mutex on indexdir.
 383 */
 384static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
 385                            struct dentry *upper)
 386{
 387        struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 388        struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
 389        struct inode *dir = d_inode(indexdir);
 390        struct dentry *index = NULL;
 391        struct dentry *temp = NULL;
 392        struct qstr name = { };
 393        int err;
 394
 395        /*
 396         * For now this is only used for creating index entry for directories,
 397         * because non-dir are copied up directly to index and then hardlinked
 398         * to upper dir.
 399         *
 400         * TODO: implement create index for non-dir, so we can call it when
 401         * encoding file handle for non-dir in case index does not exist.
 402         */
 403        if (WARN_ON(!d_is_dir(dentry)))
 404                return -EIO;
 405
 406        /* Directory not expected to be indexed before copy up */
 407        if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
 408                return -EIO;
 409
 410        err = ovl_get_index_name(ofs, origin, &name);
 411        if (err)
 412                return err;
 413
 414        temp = ovl_create_temp(indexdir, OVL_CATTR(S_IFDIR | 0));
 415        err = PTR_ERR(temp);
 416        if (IS_ERR(temp))
 417                goto free_name;
 418
 419        err = ovl_set_upper_fh(ofs, upper, temp);
 420        if (err)
 421                goto out;
 422
 423        index = lookup_one_len(name.name, indexdir, name.len);
 424        if (IS_ERR(index)) {
 425                err = PTR_ERR(index);
 426        } else {
 427                err = ovl_do_rename(dir, temp, dir, index, 0);
 428                dput(index);
 429        }
 430out:
 431        if (err)
 432                ovl_cleanup(dir, temp);
 433        dput(temp);
 434free_name:
 435        kfree(name.name);
 436        return err;
 437}
 438
 439struct ovl_copy_up_ctx {
 440        struct dentry *parent;
 441        struct dentry *dentry;
 442        struct path lowerpath;
 443        struct kstat stat;
 444        struct kstat pstat;
 445        const char *link;
 446        struct dentry *destdir;
 447        struct qstr destname;
 448        struct dentry *workdir;
 449        bool origin;
 450        bool indexed;
 451        bool metacopy;
 452};
 453
 454static int ovl_link_up(struct ovl_copy_up_ctx *c)
 455{
 456        int err;
 457        struct dentry *upper;
 458        struct dentry *upperdir = ovl_dentry_upper(c->parent);
 459        struct inode *udir = d_inode(upperdir);
 460
 461        /* Mark parent "impure" because it may now contain non-pure upper */
 462        err = ovl_set_impure(c->parent, upperdir);
 463        if (err)
 464                return err;
 465
 466        err = ovl_set_nlink_lower(c->dentry);
 467        if (err)
 468                return err;
 469
 470        inode_lock_nested(udir, I_MUTEX_PARENT);
 471        upper = lookup_one_len(c->dentry->d_name.name, upperdir,
 472                               c->dentry->d_name.len);
 473        err = PTR_ERR(upper);
 474        if (!IS_ERR(upper)) {
 475                err = ovl_do_link(ovl_dentry_upper(c->dentry), udir, upper);
 476                dput(upper);
 477
 478                if (!err) {
 479                        /* Restore timestamps on parent (best effort) */
 480                        ovl_set_timestamps(upperdir, &c->pstat);
 481                        ovl_dentry_set_upper_alias(c->dentry);
 482                }
 483        }
 484        inode_unlock(udir);
 485        if (err)
 486                return err;
 487
 488        err = ovl_set_nlink_upper(c->dentry);
 489
 490        return err;
 491}
 492
 493static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 494{
 495        struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
 496        int err;
 497
 498        /*
 499         * Copy up data first and then xattrs. Writing data after
 500         * xattrs will remove security.capability xattr automatically.
 501         */
 502        if (S_ISREG(c->stat.mode) && !c->metacopy) {
 503                struct path upperpath, datapath;
 504
 505                ovl_path_upper(c->dentry, &upperpath);
 506                if (WARN_ON(upperpath.dentry != NULL))
 507                        return -EIO;
 508                upperpath.dentry = temp;
 509
 510                ovl_path_lowerdata(c->dentry, &datapath);
 511                err = ovl_copy_up_data(ofs, &datapath, &upperpath,
 512                                       c->stat.size);
 513                if (err)
 514                        return err;
 515        }
 516
 517        err = ovl_copy_xattr(c->dentry->d_sb, c->lowerpath.dentry, temp);
 518        if (err)
 519                return err;
 520
 521        /*
 522         * Store identifier of lower inode in upper inode xattr to
 523         * allow lookup of the copy up origin inode.
 524         *
 525         * Don't set origin when we are breaking the association with a lower
 526         * hard link.
 527         */
 528        if (c->origin) {
 529                err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp);
 530                if (err)
 531                        return err;
 532        }
 533
 534        if (c->metacopy) {
 535                err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
 536                                         NULL, 0, -EOPNOTSUPP);
 537                if (err)
 538                        return err;
 539        }
 540
 541        inode_lock(temp->d_inode);
 542        if (S_ISREG(c->stat.mode))
 543                err = ovl_set_size(temp, &c->stat);
 544        if (!err)
 545                err = ovl_set_attr(temp, &c->stat);
 546        inode_unlock(temp->d_inode);
 547
 548        return err;
 549}
 550
 551struct ovl_cu_creds {
 552        const struct cred *old;
 553        struct cred *new;
 554};
 555
 556static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
 557{
 558        int err;
 559
 560        cc->old = cc->new = NULL;
 561        err = security_inode_copy_up(dentry, &cc->new);
 562        if (err < 0)
 563                return err;
 564
 565        if (cc->new)
 566                cc->old = override_creds(cc->new);
 567
 568        return 0;
 569}
 570
 571static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
 572{
 573        if (cc->new) {
 574                revert_creds(cc->old);
 575                put_cred(cc->new);
 576        }
 577}
 578
 579/*
 580 * Copyup using workdir to prepare temp file.  Used when copying up directories,
 581 * special files or when upper fs doesn't support O_TMPFILE.
 582 */
 583static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 584{
 585        struct inode *inode;
 586        struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
 587        struct dentry *temp, *upper;
 588        struct ovl_cu_creds cc;
 589        int err;
 590        struct ovl_cattr cattr = {
 591                /* Can't properly set mode on creation because of the umask */
 592                .mode = c->stat.mode & S_IFMT,
 593                .rdev = c->stat.rdev,
 594                .link = c->link
 595        };
 596
 597        /* workdir and destdir could be the same when copying up to indexdir */
 598        err = -EIO;
 599        if (lock_rename(c->workdir, c->destdir) != NULL)
 600                goto unlock;
 601
 602        err = ovl_prep_cu_creds(c->dentry, &cc);
 603        if (err)
 604                goto unlock;
 605
 606        temp = ovl_create_temp(c->workdir, &cattr);
 607        ovl_revert_cu_creds(&cc);
 608
 609        err = PTR_ERR(temp);
 610        if (IS_ERR(temp))
 611                goto unlock;
 612
 613        err = ovl_copy_up_inode(c, temp);
 614        if (err)
 615                goto cleanup;
 616
 617        if (S_ISDIR(c->stat.mode) && c->indexed) {
 618                err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
 619                if (err)
 620                        goto cleanup;
 621        }
 622
 623        upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
 624        err = PTR_ERR(upper);
 625        if (IS_ERR(upper))
 626                goto cleanup;
 627
 628        err = ovl_do_rename(wdir, temp, udir, upper, 0);
 629        dput(upper);
 630        if (err)
 631                goto cleanup;
 632
 633        if (!c->metacopy)
 634                ovl_set_upperdata(d_inode(c->dentry));
 635        inode = d_inode(c->dentry);
 636        ovl_inode_update(inode, temp);
 637        if (S_ISDIR(inode->i_mode))
 638                ovl_set_flag(OVL_WHITEOUTS, inode);
 639unlock:
 640        unlock_rename(c->workdir, c->destdir);
 641
 642        return err;
 643
 644cleanup:
 645        ovl_cleanup(wdir, temp);
 646        dput(temp);
 647        goto unlock;
 648}
 649
 650/* Copyup using O_TMPFILE which does not require cross dir locking */
 651static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 652{
 653        struct inode *udir = d_inode(c->destdir);
 654        struct dentry *temp, *upper;
 655        struct ovl_cu_creds cc;
 656        int err;
 657
 658        err = ovl_prep_cu_creds(c->dentry, &cc);
 659        if (err)
 660                return err;
 661
 662        temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
 663        ovl_revert_cu_creds(&cc);
 664
 665        if (IS_ERR(temp))
 666                return PTR_ERR(temp);
 667
 668        err = ovl_copy_up_inode(c, temp);
 669        if (err)
 670                goto out_dput;
 671
 672        inode_lock_nested(udir, I_MUTEX_PARENT);
 673
 674        upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
 675        err = PTR_ERR(upper);
 676        if (!IS_ERR(upper)) {
 677                err = ovl_do_link(temp, udir, upper);
 678                dput(upper);
 679        }
 680        inode_unlock(udir);
 681
 682        if (err)
 683                goto out_dput;
 684
 685        if (!c->metacopy)
 686                ovl_set_upperdata(d_inode(c->dentry));
 687        ovl_inode_update(d_inode(c->dentry), temp);
 688
 689        return 0;
 690
 691out_dput:
 692        dput(temp);
 693        return err;
 694}
 695
 696/*
 697 * Copy up a single dentry
 698 *
 699 * All renames start with copy up of source if necessary.  The actual
 700 * rename will only proceed once the copy up was successful.  Copy up uses
 701 * upper parent i_mutex for exclusion.  Since rename can change d_parent it
 702 * is possible that the copy up will lock the old parent.  At that point
 703 * the file will have already been copied up anyway.
 704 */
 705static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 706{
 707        int err;
 708        struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
 709        bool to_index = false;
 710
 711        /*
 712         * Indexed non-dir is copied up directly to the index entry and then
 713         * hardlinked to upper dir. Indexed dir is copied up to indexdir,
 714         * then index entry is created and then copied up dir installed.
 715         * Copying dir up to indexdir instead of workdir simplifies locking.
 716         */
 717        if (ovl_need_index(c->dentry)) {
 718                c->indexed = true;
 719                if (S_ISDIR(c->stat.mode))
 720                        c->workdir = ovl_indexdir(c->dentry->d_sb);
 721                else
 722                        to_index = true;
 723        }
 724
 725        if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
 726                c->origin = true;
 727
 728        if (to_index) {
 729                c->destdir = ovl_indexdir(c->dentry->d_sb);
 730                err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
 731                if (err)
 732                        return err;
 733        } else if (WARN_ON(!c->parent)) {
 734                /* Disconnected dentry must be copied up to index dir */
 735                return -EIO;
 736        } else {
 737                /*
 738                 * Mark parent "impure" because it may now contain non-pure
 739                 * upper
 740                 */
 741                err = ovl_set_impure(c->parent, c->destdir);
 742                if (err)
 743                        return err;
 744        }
 745
 746        /* Should we copyup with O_TMPFILE or with workdir? */
 747        if (S_ISREG(c->stat.mode) && ofs->tmpfile)
 748                err = ovl_copy_up_tmpfile(c);
 749        else
 750                err = ovl_copy_up_workdir(c);
 751        if (err)
 752                goto out;
 753
 754        if (c->indexed)
 755                ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
 756
 757        if (to_index) {
 758                /* Initialize nlink for copy up of disconnected dentry */
 759                err = ovl_set_nlink_upper(c->dentry);
 760        } else {
 761                struct inode *udir = d_inode(c->destdir);
 762
 763                /* Restore timestamps on parent (best effort) */
 764                inode_lock(udir);
 765                ovl_set_timestamps(c->destdir, &c->pstat);
 766                inode_unlock(udir);
 767
 768                ovl_dentry_set_upper_alias(c->dentry);
 769        }
 770
 771out:
 772        if (to_index)
 773                kfree(c->destname.name);
 774        return err;
 775}
 776
 777static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
 778                                  int flags)
 779{
 780        struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 781
 782        if (!ofs->config.metacopy)
 783                return false;
 784
 785        if (!S_ISREG(mode))
 786                return false;
 787
 788        if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
 789                return false;
 790
 791        return true;
 792}
 793
 794static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
 795{
 796        ssize_t res;
 797        char *buf;
 798
 799        res = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0);
 800        if (res == -ENODATA || res == -EOPNOTSUPP)
 801                res = 0;
 802
 803        if (res > 0) {
 804                buf = kzalloc(res, GFP_KERNEL);
 805                if (!buf)
 806                        return -ENOMEM;
 807
 808                res = vfs_getxattr(&init_user_ns, dentry, name, buf, res);
 809                if (res < 0)
 810                        kfree(buf);
 811                else
 812                        *value = buf;
 813        }
 814        return res;
 815}
 816
 817/* Copy up data of an inode which was copied up metadata only in the past. */
 818static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
 819{
 820        struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
 821        struct path upperpath, datapath;
 822        int err;
 823        char *capability = NULL;
 824        ssize_t cap_size;
 825
 826        ovl_path_upper(c->dentry, &upperpath);
 827        if (WARN_ON(upperpath.dentry == NULL))
 828                return -EIO;
 829
 830        ovl_path_lowerdata(c->dentry, &datapath);
 831        if (WARN_ON(datapath.dentry == NULL))
 832                return -EIO;
 833
 834        if (c->stat.size) {
 835                err = cap_size = ovl_getxattr(upperpath.dentry, XATTR_NAME_CAPS,
 836                                              &capability);
 837                if (cap_size < 0)
 838                        goto out;
 839        }
 840
 841        err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size);
 842        if (err)
 843                goto out_free;
 844
 845        /*
 846         * Writing to upper file will clear security.capability xattr. We
 847         * don't want that to happen for normal copy-up operation.
 848         */
 849        if (capability) {
 850                err = vfs_setxattr(&init_user_ns, upperpath.dentry,
 851                                   XATTR_NAME_CAPS, capability, cap_size, 0);
 852                if (err)
 853                        goto out_free;
 854        }
 855
 856
 857        err = ovl_do_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
 858        if (err)
 859                goto out_free;
 860
 861        ovl_set_upperdata(d_inode(c->dentry));
 862out_free:
 863        kfree(capability);
 864out:
 865        return err;
 866}
 867
 868static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 869                           int flags)
 870{
 871        int err;
 872        DEFINE_DELAYED_CALL(done);
 873        struct path parentpath;
 874        struct ovl_copy_up_ctx ctx = {
 875                .parent = parent,
 876                .dentry = dentry,
 877                .workdir = ovl_workdir(dentry),
 878        };
 879
 880        if (WARN_ON(!ctx.workdir))
 881                return -EROFS;
 882
 883        ovl_path_lower(dentry, &ctx.lowerpath);
 884        err = vfs_getattr(&ctx.lowerpath, &ctx.stat,
 885                          STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 886        if (err)
 887                return err;
 888
 889        ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
 890
 891        if (parent) {
 892                ovl_path_upper(parent, &parentpath);
 893                ctx.destdir = parentpath.dentry;
 894                ctx.destname = dentry->d_name;
 895
 896                err = vfs_getattr(&parentpath, &ctx.pstat,
 897                                  STATX_ATIME | STATX_MTIME,
 898                                  AT_STATX_SYNC_AS_STAT);
 899                if (err)
 900                        return err;
 901        }
 902
 903        /* maybe truncate regular file. this has no effect on dirs */
 904        if (flags & O_TRUNC)
 905                ctx.stat.size = 0;
 906
 907        if (S_ISLNK(ctx.stat.mode)) {
 908                ctx.link = vfs_get_link(ctx.lowerpath.dentry, &done);
 909                if (IS_ERR(ctx.link))
 910                        return PTR_ERR(ctx.link);
 911        }
 912
 913        err = ovl_copy_up_start(dentry, flags);
 914        /* err < 0: interrupted, err > 0: raced with another copy-up */
 915        if (unlikely(err)) {
 916                if (err > 0)
 917                        err = 0;
 918        } else {
 919                if (!ovl_dentry_upper(dentry))
 920                        err = ovl_do_copy_up(&ctx);
 921                if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
 922                        err = ovl_link_up(&ctx);
 923                if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
 924                        err = ovl_copy_up_meta_inode_data(&ctx);
 925                ovl_copy_up_end(dentry);
 926        }
 927        do_delayed_call(&done);
 928
 929        return err;
 930}
 931
 932static int ovl_copy_up_flags(struct dentry *dentry, int flags)
 933{
 934        int err = 0;
 935        const struct cred *old_cred;
 936        bool disconnected = (dentry->d_flags & DCACHE_DISCONNECTED);
 937
 938        /*
 939         * With NFS export, copy up can get called for a disconnected non-dir.
 940         * In this case, we will copy up lower inode to index dir without
 941         * linking it to upper dir.
 942         */
 943        if (WARN_ON(disconnected && d_is_dir(dentry)))
 944                return -EIO;
 945
 946        old_cred = ovl_override_creds(dentry->d_sb);
 947        while (!err) {
 948                struct dentry *next;
 949                struct dentry *parent = NULL;
 950
 951                if (ovl_already_copied_up(dentry, flags))
 952                        break;
 953
 954                next = dget(dentry);
 955                /* find the topmost dentry not yet copied up */
 956                for (; !disconnected;) {
 957                        parent = dget_parent(next);
 958
 959                        if (ovl_dentry_upper(parent))
 960                                break;
 961
 962                        dput(next);
 963                        next = parent;
 964                }
 965
 966                err = ovl_copy_up_one(parent, next, flags);
 967
 968                dput(parent);
 969                dput(next);
 970        }
 971        revert_creds(old_cred);
 972
 973        return err;
 974}
 975
 976static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
 977{
 978        /* Copy up of disconnected dentry does not set upper alias */
 979        if (ovl_already_copied_up(dentry, flags))
 980                return false;
 981
 982        if (special_file(d_inode(dentry)->i_mode))
 983                return false;
 984
 985        if (!ovl_open_flags_need_copy_up(flags))
 986                return false;
 987
 988        return true;
 989}
 990
 991int ovl_maybe_copy_up(struct dentry *dentry, int flags)
 992{
 993        int err = 0;
 994
 995        if (ovl_open_need_copy_up(dentry, flags)) {
 996                err = ovl_want_write(dentry);
 997                if (!err) {
 998                        err = ovl_copy_up_flags(dentry, flags);
 999                        ovl_drop_write(dentry);
1000                }
1001        }
1002
1003        return err;
1004}
1005
1006int ovl_copy_up_with_data(struct dentry *dentry)
1007{
1008        return ovl_copy_up_flags(dentry, O_WRONLY);
1009}
1010
1011int ovl_copy_up(struct dentry *dentry)
1012{
1013        return ovl_copy_up_flags(dentry, 0);
1014}
1015