linux/fs/ext4/migrate.c
<<
>>
Prefs
   1/*
   2 * Copyright IBM Corporation, 2007
   3 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of version 2.1 of the GNU Lesser General Public License
   7 * as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful, but
  10 * WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12 *
  13 */
  14
  15#include <linux/slab.h>
  16#include "ext4_jbd2.h"
  17
  18/*
  19 * The contiguous blocks details which can be
  20 * represented by a single extent
  21 */
  22struct migrate_struct {
  23        ext4_lblk_t first_block, last_block, curr_block;
  24        ext4_fsblk_t first_pblock, last_pblock;
  25};
  26
  27static int finish_range(handle_t *handle, struct inode *inode,
  28                                struct migrate_struct *lb)
  29
  30{
  31        int retval = 0, needed;
  32        struct ext4_extent newext;
  33        struct ext4_ext_path *path;
  34        if (lb->first_pblock == 0)
  35                return 0;
  36
  37        /* Add the extent to temp inode*/
  38        newext.ee_block = cpu_to_le32(lb->first_block);
  39        newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
  40        ext4_ext_store_pblock(&newext, lb->first_pblock);
  41        path = ext4_ext_find_extent(inode, lb->first_block, NULL);
  42
  43        if (IS_ERR(path)) {
  44                retval = PTR_ERR(path);
  45                path = NULL;
  46                goto err_out;
  47        }
  48
  49        /*
  50         * Calculate the credit needed to inserting this extent
  51         * Since we are doing this in loop we may accumalate extra
  52         * credit. But below we try to not accumalate too much
  53         * of them by restarting the journal.
  54         */
  55        needed = ext4_ext_calc_credits_for_single_extent(inode,
  56                    lb->last_block - lb->first_block + 1, path);
  57
  58        /*
  59         * Make sure the credit we accumalated is not really high
  60         */
  61        if (needed && ext4_handle_has_enough_credits(handle,
  62                                                EXT4_RESERVE_TRANS_BLOCKS)) {
  63                retval = ext4_journal_restart(handle, needed);
  64                if (retval)
  65                        goto err_out;
  66        } else if (needed) {
  67                retval = ext4_journal_extend(handle, needed);
  68                if (retval) {
  69                        /*
  70                         * IF not able to extend the journal restart the journal
  71                         */
  72                        retval = ext4_journal_restart(handle, needed);
  73                        if (retval)
  74                                goto err_out;
  75                }
  76        }
  77        retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
  78err_out:
  79        if (path) {
  80                ext4_ext_drop_refs(path);
  81                kfree(path);
  82        }
  83        lb->first_pblock = 0;
  84        return retval;
  85}
  86
  87static int update_extent_range(handle_t *handle, struct inode *inode,
  88                               ext4_fsblk_t pblock, struct migrate_struct *lb)
  89{
  90        int retval;
  91        /*
  92         * See if we can add on to the existing range (if it exists)
  93         */
  94        if (lb->first_pblock &&
  95                (lb->last_pblock+1 == pblock) &&
  96                (lb->last_block+1 == lb->curr_block)) {
  97                lb->last_pblock = pblock;
  98                lb->last_block = lb->curr_block;
  99                lb->curr_block++;
 100                return 0;
 101        }
 102        /*
 103         * Start a new range.
 104         */
 105        retval = finish_range(handle, inode, lb);
 106        lb->first_pblock = lb->last_pblock = pblock;
 107        lb->first_block = lb->last_block = lb->curr_block;
 108        lb->curr_block++;
 109        return retval;
 110}
 111
 112static int update_ind_extent_range(handle_t *handle, struct inode *inode,
 113                                   ext4_fsblk_t pblock,
 114                                   struct migrate_struct *lb)
 115{
 116        struct buffer_head *bh;
 117        __le32 *i_data;
 118        int i, retval = 0;
 119        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 120
 121        bh = sb_bread(inode->i_sb, pblock);
 122        if (!bh)
 123                return -EIO;
 124
 125        i_data = (__le32 *)bh->b_data;
 126        for (i = 0; i < max_entries; i++) {
 127                if (i_data[i]) {
 128                        retval = update_extent_range(handle, inode,
 129                                                le32_to_cpu(i_data[i]), lb);
 130                        if (retval)
 131                                break;
 132                } else {
 133                        lb->curr_block++;
 134                }
 135        }
 136        put_bh(bh);
 137        return retval;
 138
 139}
 140
 141static int update_dind_extent_range(handle_t *handle, struct inode *inode,
 142                                    ext4_fsblk_t pblock,
 143                                    struct migrate_struct *lb)
 144{
 145        struct buffer_head *bh;
 146        __le32 *i_data;
 147        int i, retval = 0;
 148        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 149
 150        bh = sb_bread(inode->i_sb, pblock);
 151        if (!bh)
 152                return -EIO;
 153
 154        i_data = (__le32 *)bh->b_data;
 155        for (i = 0; i < max_entries; i++) {
 156                if (i_data[i]) {
 157                        retval = update_ind_extent_range(handle, inode,
 158                                                le32_to_cpu(i_data[i]), lb);
 159                        if (retval)
 160                                break;
 161                } else {
 162                        /* Only update the file block number */
 163                        lb->curr_block += max_entries;
 164                }
 165        }
 166        put_bh(bh);
 167        return retval;
 168
 169}
 170
 171static int update_tind_extent_range(handle_t *handle, struct inode *inode,
 172                                    ext4_fsblk_t pblock,
 173                                    struct migrate_struct *lb)
 174{
 175        struct buffer_head *bh;
 176        __le32 *i_data;
 177        int i, retval = 0;
 178        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 179
 180        bh = sb_bread(inode->i_sb, pblock);
 181        if (!bh)
 182                return -EIO;
 183
 184        i_data = (__le32 *)bh->b_data;
 185        for (i = 0; i < max_entries; i++) {
 186                if (i_data[i]) {
 187                        retval = update_dind_extent_range(handle, inode,
 188                                                le32_to_cpu(i_data[i]), lb);
 189                        if (retval)
 190                                break;
 191                } else {
 192                        /* Only update the file block number */
 193                        lb->curr_block += max_entries * max_entries;
 194                }
 195        }
 196        put_bh(bh);
 197        return retval;
 198
 199}
 200
 201static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
 202{
 203        int retval = 0, needed;
 204
 205        if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
 206                return 0;
 207        /*
 208         * We are freeing a blocks. During this we touch
 209         * superblock, group descriptor and block bitmap.
 210         * So allocate a credit of 3. We may update
 211         * quota (user and group).
 212         */
 213        needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 214
 215        if (ext4_journal_extend(handle, needed) != 0)
 216                retval = ext4_journal_restart(handle, needed);
 217
 218        return retval;
 219}
 220
 221static int free_dind_blocks(handle_t *handle,
 222                                struct inode *inode, __le32 i_data)
 223{
 224        int i;
 225        __le32 *tmp_idata;
 226        struct buffer_head *bh;
 227        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 228
 229        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 230        if (!bh)
 231                return -EIO;
 232
 233        tmp_idata = (__le32 *)bh->b_data;
 234        for (i = 0; i < max_entries; i++) {
 235                if (tmp_idata[i]) {
 236                        extend_credit_for_blkdel(handle, inode);
 237                        ext4_free_blocks(handle, inode, NULL,
 238                                         le32_to_cpu(tmp_idata[i]), 1,
 239                                         EXT4_FREE_BLOCKS_METADATA |
 240                                         EXT4_FREE_BLOCKS_FORGET);
 241                }
 242        }
 243        put_bh(bh);
 244        extend_credit_for_blkdel(handle, inode);
 245        ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
 246                         EXT4_FREE_BLOCKS_METADATA |
 247                         EXT4_FREE_BLOCKS_FORGET);
 248        return 0;
 249}
 250
 251static int free_tind_blocks(handle_t *handle,
 252                                struct inode *inode, __le32 i_data)
 253{
 254        int i, retval = 0;
 255        __le32 *tmp_idata;
 256        struct buffer_head *bh;
 257        unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
 258
 259        bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
 260        if (!bh)
 261                return -EIO;
 262
 263        tmp_idata = (__le32 *)bh->b_data;
 264        for (i = 0; i < max_entries; i++) {
 265                if (tmp_idata[i]) {
 266                        retval = free_dind_blocks(handle,
 267                                        inode, tmp_idata[i]);
 268                        if (retval) {
 269                                put_bh(bh);
 270                                return retval;
 271                        }
 272                }
 273        }
 274        put_bh(bh);
 275        extend_credit_for_blkdel(handle, inode);
 276        ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
 277                         EXT4_FREE_BLOCKS_METADATA |
 278                         EXT4_FREE_BLOCKS_FORGET);
 279        return 0;
 280}
 281
 282static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 283{
 284        int retval;
 285
 286        /* ei->i_data[EXT4_IND_BLOCK] */
 287        if (i_data[0]) {
 288                extend_credit_for_blkdel(handle, inode);
 289                ext4_free_blocks(handle, inode, NULL,
 290                                le32_to_cpu(i_data[0]), 1,
 291                                 EXT4_FREE_BLOCKS_METADATA |
 292                                 EXT4_FREE_BLOCKS_FORGET);
 293        }
 294
 295        /* ei->i_data[EXT4_DIND_BLOCK] */
 296        if (i_data[1]) {
 297                retval = free_dind_blocks(handle, inode, i_data[1]);
 298                if (retval)
 299                        return retval;
 300        }
 301
 302        /* ei->i_data[EXT4_TIND_BLOCK] */
 303        if (i_data[2]) {
 304                retval = free_tind_blocks(handle, inode, i_data[2]);
 305                if (retval)
 306                        return retval;
 307        }
 308        return 0;
 309}
 310
 311static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
 312                                                struct inode *tmp_inode)
 313{
 314        int retval;
 315        __le32  i_data[3];
 316        struct ext4_inode_info *ei = EXT4_I(inode);
 317        struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
 318
 319        /*
 320         * One credit accounted for writing the
 321         * i_data field of the original inode
 322         */
 323        retval = ext4_journal_extend(handle, 1);
 324        if (retval) {
 325                retval = ext4_journal_restart(handle, 1);
 326                if (retval)
 327                        goto err_out;
 328        }
 329
 330        i_data[0] = ei->i_data[EXT4_IND_BLOCK];
 331        i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
 332        i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
 333
 334        down_write(&EXT4_I(inode)->i_data_sem);
 335        /*
 336         * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
 337         * happened after we started the migrate. We need to
 338         * fail the migrate
 339         */
 340        if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
 341                retval = -EAGAIN;
 342                up_write(&EXT4_I(inode)->i_data_sem);
 343                goto err_out;
 344        } else
 345                ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 346        /*
 347         * We have the extent map build with the tmp inode.
 348         * Now copy the i_data across
 349         */
 350        ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
 351        memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
 352
 353        /*
 354         * Update i_blocks with the new blocks that got
 355         * allocated while adding extents for extent index
 356         * blocks.
 357         *
 358         * While converting to extents we need not
 359         * update the orignal inode i_blocks for extent blocks
 360         * via quota APIs. The quota update happened via tmp_inode already.
 361         */
 362        spin_lock(&inode->i_lock);
 363        inode->i_blocks += tmp_inode->i_blocks;
 364        spin_unlock(&inode->i_lock);
 365        up_write(&EXT4_I(inode)->i_data_sem);
 366
 367        /*
 368         * We mark the inode dirty after, because we decrement the
 369         * i_blocks when freeing the indirect meta-data blocks
 370         */
 371        retval = free_ind_block(handle, inode, i_data);
 372        ext4_mark_inode_dirty(handle, inode);
 373
 374err_out:
 375        return retval;
 376}
 377
 378static int free_ext_idx(handle_t *handle, struct inode *inode,
 379                                        struct ext4_extent_idx *ix)
 380{
 381        int i, retval = 0;
 382        ext4_fsblk_t block;
 383        struct buffer_head *bh;
 384        struct ext4_extent_header *eh;
 385
 386        block = ext4_idx_pblock(ix);
 387        bh = sb_bread(inode->i_sb, block);
 388        if (!bh)
 389                return -EIO;
 390
 391        eh = (struct ext4_extent_header *)bh->b_data;
 392        if (eh->eh_depth != 0) {
 393                ix = EXT_FIRST_INDEX(eh);
 394                for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 395                        retval = free_ext_idx(handle, inode, ix);
 396                        if (retval)
 397                                break;
 398                }
 399        }
 400        put_bh(bh);
 401        extend_credit_for_blkdel(handle, inode);
 402        ext4_free_blocks(handle, inode, NULL, block, 1,
 403                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
 404        return retval;
 405}
 406
 407/*
 408 * Free the extent meta data blocks only
 409 */
 410static int free_ext_block(handle_t *handle, struct inode *inode)
 411{
 412        int i, retval = 0;
 413        struct ext4_inode_info *ei = EXT4_I(inode);
 414        struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
 415        struct ext4_extent_idx *ix;
 416        if (eh->eh_depth == 0)
 417                /*
 418                 * No extra blocks allocated for extent meta data
 419                 */
 420                return 0;
 421        ix = EXT_FIRST_INDEX(eh);
 422        for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
 423                retval = free_ext_idx(handle, inode, ix);
 424                if (retval)
 425                        return retval;
 426        }
 427        return retval;
 428
 429}
 430
 431int ext4_ext_migrate(struct inode *inode)
 432{
 433        handle_t *handle;
 434        int retval = 0, i;
 435        __le32 *i_data;
 436        struct ext4_inode_info *ei;
 437        struct inode *tmp_inode = NULL;
 438        struct migrate_struct lb;
 439        unsigned long max_entries;
 440        __u32 goal;
 441        uid_t owner[2];
 442
 443        /*
 444         * If the filesystem does not support extents, or the inode
 445         * already is extent-based, error out.
 446         */
 447        if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
 448                                       EXT4_FEATURE_INCOMPAT_EXTENTS) ||
 449            (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 450                return -EINVAL;
 451
 452        if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
 453                /*
 454                 * don't migrate fast symlink
 455                 */
 456                return retval;
 457
 458        handle = ext4_journal_start(inode,
 459                                        EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
 460                                        EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
 461                                        EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
 462                                        + 1);
 463        if (IS_ERR(handle)) {
 464                retval = PTR_ERR(handle);
 465                return retval;
 466        }
 467        goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
 468                EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
 469        owner[0] = i_uid_read(inode);
 470        owner[1] = i_gid_read(inode);
 471        tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
 472                                   S_IFREG, NULL, goal, owner);
 473        if (IS_ERR(tmp_inode)) {
 474                retval = PTR_ERR(tmp_inode);
 475                ext4_journal_stop(handle);
 476                return retval;
 477        }
 478        i_size_write(tmp_inode, i_size_read(inode));
 479        /*
 480         * Set the i_nlink to zero so it will be deleted later
 481         * when we drop inode reference.
 482         */
 483        clear_nlink(tmp_inode);
 484
 485        ext4_ext_tree_init(handle, tmp_inode);
 486        ext4_orphan_add(handle, tmp_inode);
 487        ext4_journal_stop(handle);
 488
 489        /*
 490         * start with one credit accounted for
 491         * superblock modification.
 492         *
 493         * For the tmp_inode we already have committed the
 494         * trascation that created the inode. Later as and
 495         * when we add extents we extent the journal
 496         */
 497        /*
 498         * Even though we take i_mutex we can still cause block
 499         * allocation via mmap write to holes. If we have allocated
 500         * new blocks we fail migrate.  New block allocation will
 501         * clear EXT4_STATE_EXT_MIGRATE flag.  The flag is updated
 502         * with i_data_sem held to prevent racing with block
 503         * allocation.
 504         */
 505        down_read((&EXT4_I(inode)->i_data_sem));
 506        ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 507        up_read((&EXT4_I(inode)->i_data_sem));
 508
 509        handle = ext4_journal_start(inode, 1);
 510        if (IS_ERR(handle)) {
 511                /*
 512                 * It is impossible to update on-disk structures without
 513                 * a handle, so just rollback in-core changes and live other
 514                 * work to orphan_list_cleanup()
 515                 */
 516                ext4_orphan_del(NULL, tmp_inode);
 517                retval = PTR_ERR(handle);
 518                goto out;
 519        }
 520
 521        ei = EXT4_I(inode);
 522        i_data = ei->i_data;
 523        memset(&lb, 0, sizeof(lb));
 524
 525        /* 32 bit block address 4 bytes */
 526        max_entries = inode->i_sb->s_blocksize >> 2;
 527        for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
 528                if (i_data[i]) {
 529                        retval = update_extent_range(handle, tmp_inode,
 530                                                le32_to_cpu(i_data[i]), &lb);
 531                        if (retval)
 532                                goto err_out;
 533                } else
 534                        lb.curr_block++;
 535        }
 536        if (i_data[EXT4_IND_BLOCK]) {
 537                retval = update_ind_extent_range(handle, tmp_inode,
 538                                le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
 539                        if (retval)
 540                                goto err_out;
 541        } else
 542                lb.curr_block += max_entries;
 543        if (i_data[EXT4_DIND_BLOCK]) {
 544                retval = update_dind_extent_range(handle, tmp_inode,
 545                                le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
 546                        if (retval)
 547                                goto err_out;
 548        } else
 549                lb.curr_block += max_entries * max_entries;
 550        if (i_data[EXT4_TIND_BLOCK]) {
 551                retval = update_tind_extent_range(handle, tmp_inode,
 552                                le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
 553                        if (retval)
 554                                goto err_out;
 555        }
 556        /*
 557         * Build the last extent
 558         */
 559        retval = finish_range(handle, tmp_inode, &lb);
 560err_out:
 561        if (retval)
 562                /*
 563                 * Failure case delete the extent information with the
 564                 * tmp_inode
 565                 */
 566                free_ext_block(handle, tmp_inode);
 567        else {
 568                retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
 569                if (retval)
 570                        /*
 571                         * if we fail to swap inode data free the extent
 572                         * details of the tmp inode
 573                         */
 574                        free_ext_block(handle, tmp_inode);
 575        }
 576
 577        /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
 578        if (ext4_journal_extend(handle, 1) != 0)
 579                ext4_journal_restart(handle, 1);
 580
 581        /*
 582         * Mark the tmp_inode as of size zero
 583         */
 584        i_size_write(tmp_inode, 0);
 585
 586        /*
 587         * set the  i_blocks count to zero
 588         * so that the ext4_delete_inode does the
 589         * right job
 590         *
 591         * We don't need to take the i_lock because
 592         * the inode is not visible to user space.
 593         */
 594        tmp_inode->i_blocks = 0;
 595
 596        /* Reset the extent details */
 597        ext4_ext_tree_init(handle, tmp_inode);
 598        ext4_journal_stop(handle);
 599out:
 600        unlock_new_inode(tmp_inode);
 601        iput(tmp_inode);
 602
 603        return retval;
 604}
 605
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.