linux/fs/ext4/super.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/ext4/super.c
   3 *
   4 * Copyright (C) 1992, 1993, 1994, 1995
   5 * Remy Card (card@masi.ibp.fr)
   6 * Laboratoire MASI - Institut Blaise Pascal
   7 * Universite Pierre et Marie Curie (Paris VI)
   8 *
   9 *  from
  10 *
  11 *  linux/fs/minix/inode.c
  12 *
  13 *  Copyright (C) 1991, 1992  Linus Torvalds
  14 *
  15 *  Big-endian to little-endian byte-swapping/bitmaps by
  16 *        David S. Miller (davem@caip.rutgers.edu), 1995
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/string.h>
  21#include <linux/fs.h>
  22#include <linux/time.h>
  23#include <linux/vmalloc.h>
  24#include <linux/jbd2.h>
  25#include <linux/slab.h>
  26#include <linux/init.h>
  27#include <linux/blkdev.h>
  28#include <linux/parser.h>
  29#include <linux/buffer_head.h>
  30#include <linux/exportfs.h>
  31#include <linux/vfs.h>
  32#include <linux/random.h>
  33#include <linux/mount.h>
  34#include <linux/namei.h>
  35#include <linux/quotaops.h>
  36#include <linux/seq_file.h>
  37#include <linux/proc_fs.h>
  38#include <linux/ctype.h>
  39#include <linux/log2.h>
  40#include <linux/crc16.h>
  41#include <linux/cleancache.h>
  42#include <asm/uaccess.h>
  43
  44#include <linux/kthread.h>
  45#include <linux/freezer.h>
  46
  47#include "ext4.h"
  48#include "ext4_extents.h"
  49#include "ext4_jbd2.h"
  50#include "xattr.h"
  51#include "acl.h"
  52#include "mballoc.h"
  53
  54#define CREATE_TRACE_POINTS
  55#include <trace/events/ext4.h>
  56
  57static struct proc_dir_entry *ext4_proc_root;
  58static struct kset *ext4_kset;
  59static struct ext4_lazy_init *ext4_li_info;
  60static struct mutex ext4_li_mtx;
  61static struct ext4_features *ext4_feat;
  62
  63static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  64                             unsigned long journal_devnum);
  65static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  66static int ext4_commit_super(struct super_block *sb, int sync);
  67static void ext4_mark_recovery_complete(struct super_block *sb,
  68                                        struct ext4_super_block *es);
  69static void ext4_clear_journal_err(struct super_block *sb,
  70                                   struct ext4_super_block *es);
  71static int ext4_sync_fs(struct super_block *sb, int wait);
  72static const char *ext4_decode_error(struct super_block *sb, int errno,
  73                                     char nbuf[16]);
  74static int ext4_remount(struct super_block *sb, int *flags, char *data);
  75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  76static int ext4_unfreeze(struct super_block *sb);
  77static int ext4_freeze(struct super_block *sb);
  78static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
  79                       const char *dev_name, void *data);
  80static inline int ext2_feature_set_ok(struct super_block *sb);
  81static inline int ext3_feature_set_ok(struct super_block *sb);
  82static int ext4_feature_set_ok(struct super_block *sb, int readonly);
  83static void ext4_destroy_lazyinit_thread(void);
  84static void ext4_unregister_li_request(struct super_block *sb);
  85static void ext4_clear_request_list(void);
  86
  87#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
  88static struct file_system_type ext2_fs_type = {
  89        .owner          = THIS_MODULE,
  90        .name           = "ext2",
  91        .mount          = ext4_mount,
  92        .kill_sb        = kill_block_super,
  93        .fs_flags       = FS_REQUIRES_DEV,
  94};
  95#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
  96#else
  97#define IS_EXT2_SB(sb) (0)
  98#endif
  99
 100
 101#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 102static struct file_system_type ext3_fs_type = {
 103        .owner          = THIS_MODULE,
 104        .name           = "ext3",
 105        .mount          = ext4_mount,
 106        .kill_sb        = kill_block_super,
 107        .fs_flags       = FS_REQUIRES_DEV,
 108};
 109#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 110#else
 111#define IS_EXT3_SB(sb) (0)
 112#endif
 113
 114static int ext4_verify_csum_type(struct super_block *sb,
 115                                 struct ext4_super_block *es)
 116{
 117        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 118                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 119                return 1;
 120
 121        return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 122}
 123
 124static __le32 ext4_superblock_csum(struct super_block *sb,
 125                                   struct ext4_super_block *es)
 126{
 127        struct ext4_sb_info *sbi = EXT4_SB(sb);
 128        int offset = offsetof(struct ext4_super_block, s_checksum);
 129        __u32 csum;
 130
 131        csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 132
 133        return cpu_to_le32(csum);
 134}
 135
 136int ext4_superblock_csum_verify(struct super_block *sb,
 137                                struct ext4_super_block *es)
 138{
 139        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 140                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 141                return 1;
 142
 143        return es->s_checksum == ext4_superblock_csum(sb, es);
 144}
 145
 146void ext4_superblock_csum_set(struct super_block *sb,
 147                              struct ext4_super_block *es)
 148{
 149        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 150                EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
 151                return;
 152
 153        es->s_checksum = ext4_superblock_csum(sb, es);
 154}
 155
 156void *ext4_kvmalloc(size_t size, gfp_t flags)
 157{
 158        void *ret;
 159
 160        ret = kmalloc(size, flags);
 161        if (!ret)
 162                ret = __vmalloc(size, flags, PAGE_KERNEL);
 163        return ret;
 164}
 165
 166void *ext4_kvzalloc(size_t size, gfp_t flags)
 167{
 168        void *ret;
 169
 170        ret = kzalloc(size, flags);
 171        if (!ret)
 172                ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 173        return ret;
 174}
 175
 176void ext4_kvfree(void *ptr)
 177{
 178        if (is_vmalloc_addr(ptr))
 179                vfree(ptr);
 180        else
 181                kfree(ptr);
 182
 183}
 184
 185ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 186                               struct ext4_group_desc *bg)
 187{
 188        return le32_to_cpu(bg->bg_block_bitmap_lo) |
 189                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 190                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 191}
 192
 193ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 194                               struct ext4_group_desc *bg)
 195{
 196        return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 197                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 198                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 199}
 200
 201ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 202                              struct ext4_group_desc *bg)
 203{
 204        return le32_to_cpu(bg->bg_inode_table_lo) |
 205                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 206                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 207}
 208
 209__u32 ext4_free_group_clusters(struct super_block *sb,
 210                               struct ext4_group_desc *bg)
 211{
 212        return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 213                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 214                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 215}
 216
 217__u32 ext4_free_inodes_count(struct super_block *sb,
 218                              struct ext4_group_desc *bg)
 219{
 220        return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 221                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 222                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 223}
 224
 225__u32 ext4_used_dirs_count(struct super_block *sb,
 226                              struct ext4_group_desc *bg)
 227{
 228        return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 229                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 230                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 231}
 232
 233__u32 ext4_itable_unused_count(struct super_block *sb,
 234                              struct ext4_group_desc *bg)
 235{
 236        return le16_to_cpu(bg->bg_itable_unused_lo) |
 237                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 238                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 239}
 240
 241void ext4_block_bitmap_set(struct super_block *sb,
 242                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 243{
 244        bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 245        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 246                bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 247}
 248
 249void ext4_inode_bitmap_set(struct super_block *sb,
 250                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 251{
 252        bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 253        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 254                bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 255}
 256
 257void ext4_inode_table_set(struct super_block *sb,
 258                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
 259{
 260        bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 261        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 262                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 263}
 264
 265void ext4_free_group_clusters_set(struct super_block *sb,
 266                                  struct ext4_group_desc *bg, __u32 count)
 267{
 268        bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 269        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 270                bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 271}
 272
 273void ext4_free_inodes_set(struct super_block *sb,
 274                          struct ext4_group_desc *bg, __u32 count)
 275{
 276        bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 277        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 278                bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 279}
 280
 281void ext4_used_dirs_set(struct super_block *sb,
 282                          struct ext4_group_desc *bg, __u32 count)
 283{
 284        bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 285        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 286                bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 287}
 288
 289void ext4_itable_unused_set(struct super_block *sb,
 290                          struct ext4_group_desc *bg, __u32 count)
 291{
 292        bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 293        if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 294                bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 295}
 296
 297
 298/* Just increment the non-pointer handle value */
 299static handle_t *ext4_get_nojournal(void)
 300{
 301        handle_t *handle = current->journal_info;
 302        unsigned long ref_cnt = (unsigned long)handle;
 303
 304        BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
 305
 306        ref_cnt++;
 307        handle = (handle_t *)ref_cnt;
 308
 309        current->journal_info = handle;
 310        return handle;
 311}
 312
 313
 314/* Decrement the non-pointer handle value */
 315static void ext4_put_nojournal(handle_t *handle)
 316{
 317        unsigned long ref_cnt = (unsigned long)handle;
 318
 319        BUG_ON(ref_cnt == 0);
 320
 321        ref_cnt--;
 322        handle = (handle_t *)ref_cnt;
 323
 324        current->journal_info = handle;
 325}
 326
 327/*
 328 * Wrappers for jbd2_journal_start/end.
 329 */
 330handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 331{
 332        journal_t *journal;
 333
 334        trace_ext4_journal_start(sb, nblocks, _RET_IP_);
 335        if (sb->s_flags & MS_RDONLY)
 336                return ERR_PTR(-EROFS);
 337
 338        WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
 339        journal = EXT4_SB(sb)->s_journal;
 340        if (!journal)
 341                return ext4_get_nojournal();
 342        /*
 343         * Special case here: if the journal has aborted behind our
 344         * backs (eg. EIO in the commit thread), then we still need to
 345         * take the FS itself readonly cleanly.
 346         */
 347        if (is_journal_aborted(journal)) {
 348                ext4_abort(sb, "Detected aborted journal");
 349                return ERR_PTR(-EROFS);
 350        }
 351        return jbd2_journal_start(journal, nblocks);
 352}
 353
 354int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
 355{
 356        struct super_block *sb;
 357        int err;
 358        int rc;
 359
 360        if (!ext4_handle_valid(handle)) {
 361                ext4_put_nojournal(handle);
 362                return 0;
 363        }
 364        sb = handle->h_transaction->t_journal->j_private;
 365        err = handle->h_err;
 366        rc = jbd2_journal_stop(handle);
 367
 368        if (!err)
 369                err = rc;
 370        if (err)
 371                __ext4_std_error(sb, where, line, err);
 372        return err;
 373}
 374
 375void ext4_journal_abort_handle(const char *caller, unsigned int line,
 376                               const char *err_fn, struct buffer_head *bh,
 377                               handle_t *handle, int err)
 378{
 379        char nbuf[16];
 380        const char *errstr = ext4_decode_error(NULL, err, nbuf);
 381
 382        BUG_ON(!ext4_handle_valid(handle));
 383
 384        if (bh)
 385                BUFFER_TRACE(bh, "abort");
 386
 387        if (!handle->h_err)
 388                handle->h_err = err;
 389
 390        if (is_handle_aborted(handle))
 391                return;
 392
 393        printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
 394               caller, line, errstr, err_fn);
 395
 396        jbd2_journal_abort_handle(handle);
 397}
 398
 399static void __save_error_info(struct super_block *sb, const char *func,
 400                            unsigned int line)
 401{
 402        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 403
 404        EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 405        es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 406        es->s_last_error_time = cpu_to_le32(get_seconds());
 407        strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
 408        es->s_last_error_line = cpu_to_le32(line);
 409        if (!es->s_first_error_time) {
 410                es->s_first_error_time = es->s_last_error_time;
 411                strncpy(es->s_first_error_func, func,
 412                        sizeof(es->s_first_error_func));
 413                es->s_first_error_line = cpu_to_le32(line);
 414                es->s_first_error_ino = es->s_last_error_ino;
 415                es->s_first_error_block = es->s_last_error_block;
 416        }
 417        /*
 418         * Start the daily error reporting function if it hasn't been
 419         * started already
 420         */
 421        if (!es->s_error_count)
 422                mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
 423        es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
 424}
 425
 426static void save_error_info(struct super_block *sb, const char *func,
 427                            unsigned int line)
 428{
 429        __save_error_info(sb, func, line);
 430        ext4_commit_super(sb, 1);
 431}
 432
 433/*
 434 * The del_gendisk() function uninitializes the disk-specific data
 435 * structures, including the bdi structure, without telling anyone
 436 * else.  Once this happens, any attempt to call mark_buffer_dirty()
 437 * (for example, by ext4_commit_super), will cause a kernel OOPS.
 438 * This is a kludge to prevent these oops until we can put in a proper
 439 * hook in del_gendisk() to inform the VFS and file system layers.
 440 */
 441static int block_device_ejected(struct super_block *sb)
 442{
 443        struct inode *bd_inode = sb->s_bdev->bd_inode;
 444        struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
 445
 446        return bdi->dev == NULL;
 447}
 448
 449static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 450{
 451        struct super_block              *sb = journal->j_private;
 452        struct ext4_sb_info             *sbi = EXT4_SB(sb);
 453        int                             error = is_journal_aborted(journal);
 454        struct ext4_journal_cb_entry    *jce, *tmp;
 455
 456        spin_lock(&sbi->s_md_lock);
 457        list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
 458                list_del_init(&jce->jce_list);
 459                spin_unlock(&sbi->s_md_lock);
 460                jce->jce_func(sb, jce, error);
 461                spin_lock(&sbi->s_md_lock);
 462        }
 463        spin_unlock(&sbi->s_md_lock);
 464}
 465
 466/* Deal with the reporting of failure conditions on a filesystem such as
 467 * inconsistencies detected or read IO failures.
 468 *
 469 * On ext2, we can store the error state of the filesystem in the
 470 * superblock.  That is not possible on ext4, because we may have other
 471 * write ordering constraints on the superblock which prevent us from
 472 * writing it out straight away; and given that the journal is about to
 473 * be aborted, we can't rely on the current, or future, transactions to
 474 * write out the superblock safely.
 475 *
 476 * We'll just use the jbd2_journal_abort() error code to record an error in
 477 * the journal instead.  On recovery, the journal will complain about
 478 * that error until we've noted it down and cleared it.
 479 */
 480
 481static void ext4_handle_error(struct super_block *sb)
 482{
 483        if (sb->s_flags & MS_RDONLY)
 484                return;
 485
 486        if (!test_opt(sb, ERRORS_CONT)) {
 487                journal_t *journal = EXT4_SB(sb)->s_journal;
 488
 489                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 490                if (journal)
 491                        jbd2_journal_abort(journal, -EIO);
 492        }
 493        if (test_opt(sb, ERRORS_RO)) {
 494                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 495                sb->s_flags |= MS_RDONLY;
 496        }
 497        if (test_opt(sb, ERRORS_PANIC))
 498                panic("EXT4-fs (device %s): panic forced after error\n",
 499                        sb->s_id);
 500}
 501
 502void __ext4_error(struct super_block *sb, const char *function,
 503                  unsigned int line, const char *fmt, ...)
 504{
 505        struct va_format vaf;
 506        va_list args;
 507
 508        va_start(args, fmt);
 509        vaf.fmt = fmt;
 510        vaf.va = &args;
 511        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 512               sb->s_id, function, line, current->comm, &vaf);
 513        va_end(args);
 514        save_error_info(sb, function, line);
 515
 516        ext4_handle_error(sb);
 517}
 518
 519void ext4_error_inode(struct inode *inode, const char *function,
 520                      unsigned int line, ext4_fsblk_t block,
 521                      const char *fmt, ...)
 522{
 523        va_list args;
 524        struct va_format vaf;
 525        struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 526
 527        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 528        es->s_last_error_block = cpu_to_le64(block);
 529        save_error_info(inode->i_sb, function, line);
 530        va_start(args, fmt);
 531        vaf.fmt = fmt;
 532        vaf.va = &args;
 533        if (block)
 534                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 535                       "inode #%lu: block %llu: comm %s: %pV\n",
 536                       inode->i_sb->s_id, function, line, inode->i_ino,
 537                       block, current->comm, &vaf);
 538        else
 539                printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 540                       "inode #%lu: comm %s: %pV\n",
 541                       inode->i_sb->s_id, function, line, inode->i_ino,
 542                       current->comm, &vaf);
 543        va_end(args);
 544
 545        ext4_handle_error(inode->i_sb);
 546}
 547
 548void ext4_error_file(struct file *file, const char *function,
 549                     unsigned int line, ext4_fsblk_t block,
 550                     const char *fmt, ...)
 551{
 552        va_list args;
 553        struct va_format vaf;
 554        struct ext4_super_block *es;
 555        struct inode *inode = file->f_dentry->d_inode;
 556        char pathname[80], *path;
 557
 558        es = EXT4_SB(inode->i_sb)->s_es;
 559        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 560        save_error_info(inode->i_sb, function, line);
 561        path = d_path(&(file->f_path), pathname, sizeof(pathname));
 562        if (IS_ERR(path))
 563                path = "(unknown)";
 564        va_start(args, fmt);
 565        vaf.fmt = fmt;
 566        vaf.va = &args;
 567        if (block)
 568                printk(KERN_CRIT
 569                       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 570                       "block %llu: comm %s: path %s: %pV\n",
 571                       inode->i_sb->s_id, function, line, inode->i_ino,
 572                       block, current->comm, path, &vaf);
 573        else
 574                printk(KERN_CRIT
 575                       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 576                       "comm %s: path %s: %pV\n",
 577                       inode->i_sb->s_id, function, line, inode->i_ino,
 578                       current->comm, path, &vaf);
 579        va_end(args);
 580
 581        ext4_handle_error(inode->i_sb);
 582}
 583
 584static const char *ext4_decode_error(struct super_block *sb, int errno,
 585                                     char nbuf[16])
 586{
 587        char *errstr = NULL;
 588
 589        switch (errno) {
 590        case -EIO:
 591                errstr = "IO failure";
 592                break;
 593        case -ENOMEM:
 594                errstr = "Out of memory";
 595                break;
 596        case -EROFS:
 597                if (!sb || (EXT4_SB(sb)->s_journal &&
 598                            EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 599                        errstr = "Journal has aborted";
 600                else
 601                        errstr = "Readonly filesystem";
 602                break;
 603        default:
 604                /* If the caller passed in an extra buffer for unknown
 605                 * errors, textualise them now.  Else we just return
 606                 * NULL. */
 607                if (nbuf) {
 608                        /* Check for truncated error codes... */
 609                        if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 610                                errstr = nbuf;
 611                }
 612                break;
 613        }
 614
 615        return errstr;
 616}
 617
 618/* __ext4_std_error decodes expected errors from journaling functions
 619 * automatically and invokes the appropriate error response.  */
 620
 621void __ext4_std_error(struct super_block *sb, const char *function,
 622                      unsigned int line, int errno)
 623{
 624        char nbuf[16];
 625        const char *errstr;
 626
 627        /* Special case: if the error is EROFS, and we're not already
 628         * inside a transaction, then there's really no point in logging
 629         * an error. */
 630        if (errno == -EROFS && journal_current_handle() == NULL &&
 631            (sb->s_flags & MS_RDONLY))
 632                return;
 633
 634        errstr = ext4_decode_error(sb, errno, nbuf);
 635        printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 636               sb->s_id, function, line, errstr);
 637        save_error_info(sb, function, line);
 638
 639        ext4_handle_error(sb);
 640}
 641
 642/*
 643 * ext4_abort is a much stronger failure handler than ext4_error.  The
 644 * abort function may be used to deal with unrecoverable failures such
 645 * as journal IO errors or ENOMEM at a critical moment in log management.
 646 *
 647 * We unconditionally force the filesystem into an ABORT|READONLY state,
 648 * unless the error response on the fs has been set to panic in which
 649 * case we take the easy way out and panic immediately.
 650 */
 651
 652void __ext4_abort(struct super_block *sb, const char *function,
 653                unsigned int line, const char *fmt, ...)
 654{
 655        va_list args;
 656
 657        save_error_info(sb, function, line);
 658        va_start(args, fmt);
 659        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
 660               function, line);
 661        vprintk(fmt, args);
 662        printk("\n");
 663        va_end(args);
 664
 665        if ((sb->s_flags & MS_RDONLY) == 0) {
 666                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 667                sb->s_flags |= MS_RDONLY;
 668                EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
 669                if (EXT4_SB(sb)->s_journal)
 670                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 671                save_error_info(sb, function, line);
 672        }
 673        if (test_opt(sb, ERRORS_PANIC))
 674                panic("EXT4-fs panic from previous error\n");
 675}
 676
 677void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
 678{
 679        struct va_format vaf;
 680        va_list args;
 681
 682        va_start(args, fmt);
 683        vaf.fmt = fmt;
 684        vaf.va = &args;
 685        printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 686        va_end(args);
 687}
 688
 689void __ext4_warning(struct super_block *sb, const char *function,
 690                    unsigned int line, const char *fmt, ...)
 691{
 692        struct va_format vaf;
 693        va_list args;
 694
 695        va_start(args, fmt);
 696        vaf.fmt = fmt;
 697        vaf.va = &args;
 698        printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 699               sb->s_id, function, line, &vaf);
 700        va_end(args);
 701}
 702
 703void __ext4_grp_locked_error(const char *function, unsigned int line,
 704                             struct super_block *sb, ext4_group_t grp,
 705                             unsigned long ino, ext4_fsblk_t block,
 706                             const char *fmt, ...)
 707__releases(bitlock)
 708__acquires(bitlock)
 709{
 710        struct va_format vaf;
 711        va_list args;
 712        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 713
 714        es->s_last_error_ino = cpu_to_le32(ino);
 715        es->s_last_error_block = cpu_to_le64(block);
 716        __save_error_info(sb, function, line);
 717
 718        va_start(args, fmt);
 719
 720        vaf.fmt = fmt;
 721        vaf.va = &args;
 722        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
 723               sb->s_id, function, line, grp);
 724        if (ino)
 725                printk(KERN_CONT "inode %lu: ", ino);
 726        if (block)
 727                printk(KERN_CONT "block %llu:", (unsigned long long) block);
 728        printk(KERN_CONT "%pV\n", &vaf);
 729        va_end(args);
 730
 731        if (test_opt(sb, ERRORS_CONT)) {
 732                ext4_commit_super(sb, 0);
 733                return;
 734        }
 735
 736        ext4_unlock_group(sb, grp);
 737        ext4_handle_error(sb);
 738        /*
 739         * We only get here in the ERRORS_RO case; relocking the group
 740         * may be dangerous, but nothing bad will happen since the
 741         * filesystem will have already been marked read/only and the
 742         * journal has been aborted.  We return 1 as a hint to callers
 743         * who might what to use the return value from
 744         * ext4_grp_locked_error() to distinguish between the
 745         * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 746         * aggressively from the ext4 function in question, with a
 747         * more appropriate error code.
 748         */
 749        ext4_lock_group(sb, grp);
 750        return;
 751}
 752
 753void ext4_update_dynamic_rev(struct super_block *sb)
 754{
 755        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 756
 757        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 758                return;
 759
 760        ext4_warning(sb,
 761                     "updating to rev %d because of new feature flag, "
 762                     "running e2fsck is recommended",
 763                     EXT4_DYNAMIC_REV);
 764
 765        es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 766        es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 767        es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 768        /* leave es->s_feature_*compat flags alone */
 769        /* es->s_uuid will be set by e2fsck if empty */
 770
 771        /*
 772         * The rest of the superblock fields should be zero, and if not it
 773         * means they are likely already in use, so leave them alone.  We
 774         * can leave it up to e2fsck to clean up any inconsistencies there.
 775         */
 776}
 777
 778/*
 779 * Open the external journal device
 780 */
 781static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 782{
 783        struct block_device *bdev;
 784        char b[BDEVNAME_SIZE];
 785
 786        bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
 787        if (IS_ERR(bdev))
 788                goto fail;
 789        return bdev;
 790
 791fail:
 792        ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 793                        __bdevname(dev, b), PTR_ERR(bdev));
 794        return NULL;
 795}
 796
 797/*
 798 * Release the journal device
 799 */
 800static int ext4_blkdev_put(struct block_device *bdev)
 801{
 802        return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 803}
 804
 805static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 806{
 807        struct block_device *bdev;
 808        int ret = -ENODEV;
 809
 810        bdev = sbi->journal_bdev;
 811        if (bdev) {
 812                ret = ext4_blkdev_put(bdev);
 813                sbi->journal_bdev = NULL;
 814        }
 815        return ret;
 816}
 817
 818static inline struct inode *orphan_list_entry(struct list_head *l)
 819{
 820        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 821}
 822
 823static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 824{
 825        struct list_head *l;
 826
 827        ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 828                 le32_to_cpu(sbi->s_es->s_last_orphan));
 829
 830        printk(KERN_ERR "sb_info orphan list:\n");
 831        list_for_each(l, &sbi->s_orphan) {
 832                struct inode *inode = orphan_list_entry(l);
 833                printk(KERN_ERR "  "
 834                       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 835                       inode->i_sb->s_id, inode->i_ino, inode,
 836                       inode->i_mode, inode->i_nlink,
 837                       NEXT_ORPHAN(inode));
 838        }
 839}
 840
 841static void ext4_put_super(struct super_block *sb)
 842{
 843        struct ext4_sb_info *sbi = EXT4_SB(sb);
 844        struct ext4_super_block *es = sbi->s_es;
 845        int i, err;
 846
 847        ext4_unregister_li_request(sb);
 848        dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 849
 850        flush_workqueue(sbi->dio_unwritten_wq);
 851        destroy_workqueue(sbi->dio_unwritten_wq);
 852
 853        lock_super(sb);
 854        if (sbi->s_journal) {
 855                err = jbd2_journal_destroy(sbi->s_journal);
 856                sbi->s_journal = NULL;
 857                if (err < 0)
 858                        ext4_abort(sb, "Couldn't clean up the journal");
 859        }
 860
 861        del_timer(&sbi->s_err_report);
 862        ext4_release_system_zone(sb);
 863        ext4_mb_release(sb);
 864        ext4_ext_release(sb);
 865        ext4_xattr_put_super(sb);
 866
 867        if (!(sb->s_flags & MS_RDONLY)) {
 868                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 869                es->s_state = cpu_to_le16(sbi->s_mount_state);
 870        }
 871        if (!(sb->s_flags & MS_RDONLY))
 872                ext4_commit_super(sb, 1);
 873
 874        if (sbi->s_proc) {
 875                remove_proc_entry("options", sbi->s_proc);
 876                remove_proc_entry(sb->s_id, ext4_proc_root);
 877        }
 878        kobject_del(&sbi->s_kobj);
 879
 880        for (i = 0; i < sbi->s_gdb_count; i++)
 881                brelse(sbi->s_group_desc[i]);
 882        ext4_kvfree(sbi->s_group_desc);
 883        ext4_kvfree(sbi->s_flex_groups);
 884        percpu_counter_destroy(&sbi->s_freeclusters_counter);
 885        percpu_counter_destroy(&sbi->s_freeinodes_counter);
 886        percpu_counter_destroy(&sbi->s_dirs_counter);
 887        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 888        brelse(sbi->s_sbh);
 889#ifdef CONFIG_QUOTA
 890        for (i = 0; i < MAXQUOTAS; i++)
 891                kfree(sbi->s_qf_names[i]);
 892#endif
 893
 894        /* Debugging code just in case the in-memory inode orphan list
 895         * isn't empty.  The on-disk one can be non-empty if we've
 896         * detected an error and taken the fs readonly, but the
 897         * in-memory list had better be clean by this point. */
 898        if (!list_empty(&sbi->s_orphan))
 899                dump_orphan_list(sb, sbi);
 900        J_ASSERT(list_empty(&sbi->s_orphan));
 901
 902        invalidate_bdev(sb->s_bdev);
 903        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 904                /*
 905                 * Invalidate the journal device's buffers.  We don't want them
 906                 * floating about in memory - the physical journal device may
 907                 * hotswapped, and it breaks the `ro-after' testing code.
 908                 */
 909                sync_blockdev(sbi->journal_bdev);
 910                invalidate_bdev(sbi->journal_bdev);
 911                ext4_blkdev_remove(sbi);
 912        }
 913        if (sbi->s_mmp_tsk)
 914                kthread_stop(sbi->s_mmp_tsk);
 915        sb->s_fs_info = NULL;
 916        /*
 917         * Now that we are completely done shutting down the
 918         * superblock, we need to actually destroy the kobject.
 919         */
 920        unlock_super(sb);
 921        kobject_put(&sbi->s_kobj);
 922        wait_for_completion(&sbi->s_kobj_unregister);
 923        if (sbi->s_chksum_driver)
 924                crypto_free_shash(sbi->s_chksum_driver);
 925        kfree(sbi->s_blockgroup_lock);
 926        kfree(sbi);
 927}
 928
 929static struct kmem_cache *ext4_inode_cachep;
 930
 931/*
 932 * Called inside transaction, so use GFP_NOFS
 933 */
 934static struct inode *ext4_alloc_inode(struct super_block *sb)
 935{
 936        struct ext4_inode_info *ei;
 937
 938        ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 939        if (!ei)
 940                return NULL;
 941
 942        ei->vfs_inode.i_version = 1;
 943        ei->vfs_inode.i_data.writeback_index = 0;
 944        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 945        INIT_LIST_HEAD(&ei->i_prealloc_list);
 946        spin_lock_init(&ei->i_prealloc_lock);
 947        ei->i_reserved_data_blocks = 0;
 948        ei->i_reserved_meta_blocks = 0;
 949        ei->i_allocated_meta_blocks = 0;
 950        ei->i_da_metadata_calc_len = 0;
 951        ei->i_da_metadata_calc_last_lblock = 0;
 952        spin_lock_init(&(ei->i_block_reservation_lock));
 953#ifdef CONFIG_QUOTA
 954        ei->i_reserved_quota = 0;
 955#endif
 956        ei->jinode = NULL;
 957        INIT_LIST_HEAD(&ei->i_completed_io_list);
 958        spin_lock_init(&ei->i_completed_io_lock);
 959        ei->cur_aio_dio = NULL;
 960        ei->i_sync_tid = 0;
 961        ei->i_datasync_tid = 0;
 962        atomic_set(&ei->i_ioend_count, 0);
 963        atomic_set(&ei->i_aiodio_unwritten, 0);
 964
 965        return &ei->vfs_inode;
 966}
 967
 968static int ext4_drop_inode(struct inode *inode)
 969{
 970        int drop = generic_drop_inode(inode);
 971
 972        trace_ext4_drop_inode(inode, drop);
 973        return drop;
 974}
 975
 976static void ext4_i_callback(struct rcu_head *head)
 977{
 978        struct inode *inode = container_of(head, struct inode, i_rcu);
 979        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 980}
 981
 982static void ext4_destroy_inode(struct inode *inode)
 983{
 984        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 985                ext4_msg(inode->i_sb, KERN_ERR,
 986                         "Inode %lu (%p): orphan list check failed!",
 987                         inode->i_ino, EXT4_I(inode));
 988                print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 989                                EXT4_I(inode), sizeof(struct ext4_inode_info),
 990                                true);
 991                dump_stack();
 992        }
 993        call_rcu(&inode->i_rcu, ext4_i_callback);
 994}
 995
 996static void init_once(void *foo)
 997{
 998        struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 999
1000        INIT_LIST_HEAD(&ei->i_orphan);
1001#ifdef CONFIG_EXT4_FS_XATTR
1002        init_rwsem(&ei->xattr_sem);
1003#endif
1004        init_rwsem(&ei->i_data_sem);
1005        inode_init_once(&ei->vfs_inode);
1006}
1007
1008static int init_inodecache(void)
1009{
1010        ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
1011                                             sizeof(struct ext4_inode_info),
1012                                             0, (SLAB_RECLAIM_ACCOUNT|
1013                                                SLAB_MEM_SPREAD),
1014                                             init_once);
1015        if (ext4_inode_cachep == NULL)
1016                return -ENOMEM;
1017        return 0;
1018}
1019
1020static void destroy_inodecache(void)
1021{
1022        kmem_cache_destroy(ext4_inode_cachep);
1023}
1024
1025void ext4_clear_inode(struct inode *inode)
1026{
1027        invalidate_inode_buffers(inode);
1028        clear_inode(inode);
1029        dquot_drop(inode);
1030        ext4_discard_preallocations(inode);
1031        if (EXT4_I(inode)->jinode) {
1032                jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1033                                               EXT4_I(inode)->jinode);
1034                jbd2_free_inode(EXT4_I(inode)->jinode);
1035                EXT4_I(inode)->jinode = NULL;
1036        }
1037}
1038
1039static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1040                                        u64 ino, u32 generation)
1041{
1042        struct inode *inode;
1043
1044        if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
1045                return ERR_PTR(-ESTALE);
1046        if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
1047                return ERR_PTR(-ESTALE);
1048
1049        /* iget isn't really right if the inode is currently unallocated!!
1050         *
1051         * ext4_read_inode will return a bad_inode if the inode had been
1052         * deleted, so we should be safe.
1053         *
1054         * Currently we don't know the generation for parent directory, so
1055         * a generation of 0 means "accept any"
1056         */
1057        inode = ext4_iget(sb, ino);
1058        if (IS_ERR(inode))
1059                return ERR_CAST(inode);
1060        if (generation && inode->i_generation != generation) {
1061                iput(inode);
1062                return ERR_PTR(-ESTALE);
1063        }
1064
1065        return inode;
1066}
1067
1068static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1069                                        int fh_len, int fh_type)
1070{
1071        return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1072                                    ext4_nfs_get_inode);
1073}
1074
1075static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1076                                        int fh_len, int fh_type)
1077{
1078        return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1079                                    ext4_nfs_get_inode);
1080}
1081
1082/*
1083 * Try to release metadata pages (indirect blocks, directories) which are
1084 * mapped via the block device.  Since these pages could have journal heads
1085 * which would prevent try_to_free_buffers() from freeing them, we must use
1086 * jbd2 layer's try_to_free_buffers() function to release them.
1087 */
1088static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
1089                                 gfp_t wait)
1090{
1091        journal_t *journal = EXT4_SB(sb)->s_journal;
1092
1093        WARN_ON(PageChecked(page));
1094        if (!page_has_buffers(page))
1095                return 0;
1096        if (journal)
1097                return jbd2_journal_try_to_free_buffers(journal, page,
1098                                                        wait & ~__GFP_WAIT);
1099        return try_to_free_buffers(page);
1100}
1101
1102#ifdef CONFIG_QUOTA
1103#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
1104#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
1105
1106static int ext4_write_dquot(struct dquot *dquot);
1107static int ext4_acquire_dquot(struct dquot *dquot);
1108static int ext4_release_dquot(struct dquot *dquot);
1109static int ext4_mark_dquot_dirty(struct dquot *dquot);
1110static int ext4_write_info(struct super_block *sb, int type);
1111static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1112                         struct path *path);
1113static int ext4_quota_on_sysfile(struct super_block *sb, int type,
1114                                 int format_id);
1115static int ext4_quota_off(struct super_block *sb, int type);
1116static int ext4_quota_off_sysfile(struct super_block *sb, int type);
1117static int ext4_quota_on_mount(struct super_block *sb, int type);
1118static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1119                               size_t len, loff_t off);
1120static ssize_t ext4_quota_write(struct super_block *sb, int type,
1121                                const char *data, size_t len, loff_t off);
1122static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1123                             unsigned int flags);
1124static int ext4_enable_quotas(struct super_block *sb);
1125
1126static const struct dquot_operations ext4_quota_operations = {
1127        .get_reserved_space = ext4_get_reserved_space,
1128        .write_dquot    = ext4_write_dquot,
1129        .acquire_dquot  = ext4_acquire_dquot,
1130        .release_dquot  = ext4_release_dquot,
1131        .mark_dirty     = ext4_mark_dquot_dirty,
1132        .write_info     = ext4_write_info,
1133        .alloc_dquot    = dquot_alloc,
1134        .destroy_dquot  = dquot_destroy,
1135};
1136
1137static const struct quotactl_ops ext4_qctl_operations = {
1138        .quota_on       = ext4_quota_on,
1139        .quota_off      = ext4_quota_off,
1140        .quota_sync     = dquot_quota_sync,
1141        .get_info       = dquot_get_dqinfo,
1142        .set_info       = dquot_set_dqinfo,
1143        .get_dqblk      = dquot_get_dqblk,
1144        .set_dqblk      = dquot_set_dqblk
1145};
1146
1147static const struct quotactl_ops ext4_qctl_sysfile_operations = {
1148        .quota_on_meta  = ext4_quota_on_sysfile,
1149        .quota_off      = ext4_quota_off_sysfile,
1150        .quota_sync     = dquot_quota_sync,
1151        .get_info       = dquot_get_dqinfo,
1152        .set_info       = dquot_set_dqinfo,
1153        .get_dqblk      = dquot_get_dqblk,
1154        .set_dqblk      = dquot_set_dqblk
1155};
1156#endif
1157
1158static const struct super_operations ext4_sops = {
1159        .alloc_inode    = ext4_alloc_inode,
1160        .destroy_inode  = ext4_destroy_inode,
1161        .write_inode    = ext4_write_inode,
1162        .dirty_inode    = ext4_dirty_inode,
1163        .drop_inode     = ext4_drop_inode,
1164        .evict_inode    = ext4_evict_inode,
1165        .put_super      = ext4_put_super,
1166        .sync_fs        = ext4_sync_fs,
1167        .freeze_fs      = ext4_freeze,
1168        .unfreeze_fs    = ext4_unfreeze,
1169        .statfs         = ext4_statfs,
1170        .remount_fs     = ext4_remount,
1171        .show_options   = ext4_show_options,
1172#ifdef CONFIG_QUOTA
1173        .quota_read     = ext4_quota_read,
1174        .quota_write    = ext4_quota_write,
1175#endif
1176        .bdev_try_to_free_page = bdev_try_to_free_page,
1177};
1178
1179static const struct super_operations ext4_nojournal_sops = {
1180        .alloc_inode    = ext4_alloc_inode,
1181        .destroy_inode  = ext4_destroy_inode,
1182        .write_inode    = ext4_write_inode,
1183        .dirty_inode    = ext4_dirty_inode,
1184        .drop_inode     = ext4_drop_inode,
1185        .evict_inode    = ext4_evict_inode,
1186        .put_super      = ext4_put_super,
1187        .statfs         = ext4_statfs,
1188        .remount_fs     = ext4_remount,
1189        .show_options   = ext4_show_options,
1190#ifdef CONFIG_QUOTA
1191        .quota_read     = ext4_quota_read,
1192        .quota_write    = ext4_quota_write,
1193#endif
1194        .bdev_try_to_free_page = bdev_try_to_free_page,
1195};
1196
1197static const struct export_operations ext4_export_ops = {
1198        .fh_to_dentry = ext4_fh_to_dentry,
1199        .fh_to_parent = ext4_fh_to_parent,
1200        .get_parent = ext4_get_parent,
1201};
1202
1203enum {
1204        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1205        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1206        Opt_nouid32, Opt_debug, Opt_removed,
1207        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1208        Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1209        Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1210        Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
1211        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1212        Opt_data_err_abort, Opt_data_err_ignore,
1213        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1214        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1215        Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1216        Opt_usrquota, Opt_grpquota, Opt_i_version,
1217        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1218        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1219        Opt_inode_readahead_blks, Opt_journal_ioprio,
1220        Opt_dioread_nolock, Opt_dioread_lock,
1221        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1222};
1223
1224static const match_table_t tokens = {
1225        {Opt_bsd_df, "bsddf"},
1226        {Opt_minix_df, "minixdf"},
1227        {Opt_grpid, "grpid"},
1228        {Opt_grpid, "bsdgroups"},
1229        {Opt_nogrpid, "nogrpid"},
1230        {Opt_nogrpid, "sysvgroups"},
1231        {Opt_resgid, "resgid=%u"},
1232        {Opt_resuid, "resuid=%u"},
1233        {Opt_sb, "sb=%u"},
1234        {Opt_err_cont, "errors=continue"},
1235        {Opt_err_panic, "errors=panic"},
1236        {Opt_err_ro, "errors=remount-ro"},
1237        {Opt_nouid32, "nouid32"},
1238        {Opt_debug, "debug"},
1239        {Opt_removed, "oldalloc"},
1240        {Opt_removed, "orlov"},
1241        {Opt_user_xattr, "user_xattr"},
1242        {Opt_nouser_xattr, "nouser_xattr"},
1243        {Opt_acl, "acl"},
1244        {Opt_noacl, "noacl"},
1245        {Opt_noload, "norecovery"},
1246        {Opt_noload, "noload"},
1247        {Opt_removed, "nobh"},
1248        {Opt_removed, "bh"},
1249        {Opt_commit, "commit=%u"},
1250        {Opt_min_batch_time, "min_batch_time=%u"},
1251        {Opt_max_batch_time, "max_batch_time=%u"},
1252        {Opt_journal_dev, "journal_dev=%u"},
1253        {Opt_journal_checksum, "journal_checksum"},
1254        {Opt_journal_async_commit, "journal_async_commit"},
1255        {Opt_abort, "abort"},
1256        {Opt_data_journal, "data=journal"},
1257        {Opt_data_ordered, "data=ordered"},
1258        {Opt_data_writeback, "data=writeback"},
1259        {Opt_data_err_abort, "data_err=abort"},
1260        {Opt_data_err_ignore, "data_err=ignore"},
1261        {Opt_offusrjquota, "usrjquota="},
1262        {Opt_usrjquota, "usrjquota=%s"},
1263        {Opt_offgrpjquota, "grpjquota="},
1264        {Opt_grpjquota, "grpjquota=%s"},
1265        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1266        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1267        {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1268        {Opt_grpquota, "grpquota"},
1269        {Opt_noquota, "noquota"},
1270        {Opt_quota, "quota"},
1271        {Opt_usrquota, "usrquota"},
1272        {Opt_barrier, "barrier=%u"},
1273        {Opt_barrier, "barrier"},
1274        {Opt_nobarrier, "nobarrier"},
1275        {Opt_i_version, "i_version"},
1276        {Opt_stripe, "stripe=%u"},
1277        {Opt_delalloc, "delalloc"},
1278        {Opt_nodelalloc, "nodelalloc"},
1279        {Opt_mblk_io_submit, "mblk_io_submit"},
1280        {Opt_nomblk_io_submit, "nomblk_io_submit"},
1281        {Opt_block_validity, "block_validity"},
1282        {Opt_noblock_validity, "noblock_validity"},
1283        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1284        {Opt_journal_ioprio, "journal_ioprio=%u"},
1285        {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1286        {Opt_auto_da_alloc, "auto_da_alloc"},
1287        {Opt_noauto_da_alloc, "noauto_da_alloc"},
1288        {Opt_dioread_nolock, "dioread_nolock"},
1289        {Opt_dioread_lock, "dioread_lock"},
1290        {Opt_discard, "discard"},
1291        {Opt_nodiscard, "nodiscard"},
1292        {Opt_init_itable, "init_itable=%u"},
1293        {Opt_init_itable, "init_itable"},
1294        {Opt_noinit_itable, "noinit_itable"},
1295        {Opt_removed, "check=none"},    /* mount option from ext2/3 */
1296        {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
1297        {Opt_removed, "reservation"},   /* mount option from ext2/3 */
1298        {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1299        {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
1300        {Opt_err, NULL},
1301};
1302
1303static ext4_fsblk_t get_sb_block(void **data)
1304{
1305        ext4_fsblk_t    sb_block;
1306        char            *options = (char *) *data;
1307
1308        if (!options || strncmp(options, "sb=", 3) != 0)
1309                return 1;       /* Default location */
1310
1311        options += 3;
1312        /* TODO: use simple_strtoll with >32bit ext4 */
1313        sb_block = simple_strtoul(options, &options, 0);
1314        if (*options && *options != ',') {
1315                printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1316                       (char *) *data);
1317                return 1;
1318        }
1319        if (*options == ',')
1320                options++;
1321        *data = (void *) options;
1322
1323        return sb_block;
1324}
1325
1326#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1327static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
1328        "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1329
1330#ifdef CONFIG_QUOTA
1331static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1332{
1333        struct ext4_sb_info *sbi = EXT4_SB(sb);
1334        char *qname;
1335
1336        if (sb_any_quota_loaded(sb) &&
1337                !sbi->s_qf_names[qtype]) {
1338                ext4_msg(sb, KERN_ERR,
1339                        "Cannot change journaled "
1340                        "quota options when quota turned on");
1341                return -1;
1342        }
1343        qname = match_strdup(args);
1344        if (!qname) {
1345                ext4_msg(sb, KERN_ERR,
1346                        "Not enough memory for storing quotafile name");
1347                return -1;
1348        }
1349        if (sbi->s_qf_names[qtype] &&
1350                strcmp(sbi->s_qf_names[qtype], qname)) {
1351                ext4_msg(sb, KERN_ERR,
1352                        "%s quota file already specified", QTYPE2NAME(qtype));
1353                kfree(qname);
1354                return -1;
1355        }
1356        sbi->s_qf_names[qtype] = qname;
1357        if (strchr(sbi->s_qf_names[qtype], '/')) {
1358                ext4_msg(sb, KERN_ERR,
1359                        "quotafile must be on filesystem root");
1360                kfree(sbi->s_qf_names[qtype]);
1361                sbi->s_qf_names[qtype] = NULL;
1362                return -1;
1363        }
1364        set_opt(sb, QUOTA);
1365        return 1;
1366}
1367
1368static int clear_qf_name(struct super_block *sb, int qtype)
1369{
1370
1371        struct ext4_sb_info *sbi = EXT4_SB(sb);
1372
1373        if (sb_any_quota_loaded(sb) &&
1374                sbi->s_qf_names[qtype]) {
1375                ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
1376                        " when quota turned on");
1377                return -1;
1378        }
1379        /*
1380         * The space will be released later when all options are confirmed
1381         * to be correct
1382         */
1383        sbi->s_qf_names[qtype] = NULL;
1384        return 1;
1385}
1386#endif
1387
1388#define MOPT_SET        0x0001
1389#define MOPT_CLEAR      0x0002
1390#define MOPT_NOSUPPORT  0x0004
1391#define MOPT_EXPLICIT   0x0008
1392#define MOPT_CLEAR_ERR  0x0010
1393#define MOPT_GTE0       0x0020
1394#ifdef CONFIG_QUOTA
1395#define MOPT_Q          0
1396#define MOPT_QFMT       0x0040
1397#else
1398#define MOPT_Q          MOPT_NOSUPPORT
1399#define MOPT_QFMT       MOPT_NOSUPPORT
1400#endif
1401#define MOPT_DATAJ      0x0080
1402
1403static const struct mount_opts {
1404        int     token;
1405        int     mount_opt;
1406        int     flags;
1407} ext4_mount_opts[] = {
1408        {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1409        {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1410        {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1411        {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1412        {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
1413        {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
1414        {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1415        {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1416        {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
1417        {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR},
1418        {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1419        {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1420        {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT},
1421        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT},
1422        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET},
1423        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1424                                    EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET},
1425        {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET},
1426        {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1427        {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1428        {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1429        {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET},
1430        {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR},
1431        {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1432        {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1433        {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1434        {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1435        {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1436        {Opt_commit, 0, MOPT_GTE0},
1437        {Opt_max_batch_time, 0, MOPT_GTE0},
1438        {Opt_min_batch_time, 0, MOPT_GTE0},
1439        {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1440        {Opt_init_itable, 0, MOPT_GTE0},
1441        {Opt_stripe, 0, MOPT_GTE0},
1442        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
1443        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
1444        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
1445#ifdef CONFIG_EXT4_FS_XATTR
1446        {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1447        {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1448#else
1449        {Opt_user_xattr, 0, MOPT_NOSUPPORT},
1450        {Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
1451#endif
1452#ifdef CONFIG_EXT4_FS_POSIX_ACL
1453        {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1454        {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
1455#else
1456        {Opt_acl, 0, MOPT_NOSUPPORT},
1457        {Opt_noacl, 0, MOPT_NOSUPPORT},
1458#endif
1459        {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1460        {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1461        {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1462        {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1463                                                        MOPT_SET | MOPT_Q},
1464        {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1465                                                        MOPT_SET | MOPT_Q},
1466        {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1467                       EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
1468        {Opt_usrjquota, 0, MOPT_Q},
1469        {Opt_grpjquota, 0, MOPT_Q},
1470        {Opt_offusrjquota, 0, MOPT_Q},
1471        {Opt_offgrpjquota, 0, MOPT_Q},
1472        {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1473        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1474        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1475        {Opt_err, 0, 0}
1476};
1477
1478static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1479                            substring_t *args, unsigned long *journal_devnum,
1480                            unsigned int *journal_ioprio, int is_remount)
1481{
1482        struct ext4_sb_info *sbi = EXT4_SB(sb);
1483        const struct mount_opts *m;
1484        kuid_t uid;
1485        kgid_t gid;
1486        int arg = 0;
1487
1488#ifdef CONFIG_QUOTA
1489        if (token == Opt_usrjquota)
1490                return set_qf_name(sb, USRQUOTA, &args[0]);
1491        else if (token == Opt_grpjquota)
1492                return set_qf_name(sb, GRPQUOTA, &args[0]);
1493        else if (token == Opt_offusrjquota)
1494                return clear_qf_name(sb, USRQUOTA);
1495        else if (token == Opt_offgrpjquota)
1496                return clear_qf_name(sb, GRPQUOTA);
1497#endif
1498        if (args->from && match_int(args, &arg))
1499                return -1;
1500        switch (token) {
1501        case Opt_noacl:
1502        case Opt_nouser_xattr:
1503                ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
1504                break;
1505        case Opt_sb:
1506                return 1;       /* handled by get_sb_block() */
1507        case Opt_removed:
1508                ext4_msg(sb, KERN_WARNING,
1509                         "Ignoring removed %s option", opt);
1510                return 1;
1511        case Opt_resuid:
1512                uid = make_kuid(current_user_ns(), arg);
1513                if (!uid_valid(uid)) {
1514                        ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1515                        return -1;
1516                }
1517                sbi->s_resuid = uid;
1518                return 1;
1519        case Opt_resgid:
1520                gid = make_kgid(current_user_ns(), arg);
1521                if (!gid_valid(gid)) {
1522                        ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1523                        return -1;
1524                }
1525                sbi->s_resgid = gid;
1526                return 1;
1527        case Opt_abort:
1528                sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1529                return 1;
1530        case Opt_i_version:
1531                sb->s_flags |= MS_I_VERSION;
1532                return 1;
1533        case Opt_journal_dev:
1534                if (is_remount) {
1535                        ext4_msg(sb, KERN_ERR,
1536                                 "Cannot specify journal on remount");
1537                        return -1;
1538                }
1539                *journal_devnum = arg;
1540                return 1;
1541        case Opt_journal_ioprio:
1542                if (arg < 0 || arg > 7)
1543                        return -1;
1544                *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1545                return 1;
1546        }
1547
1548        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1549                if (token != m->token)
1550                        continue;
1551                if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1552                        return -1;
1553                if (m->flags & MOPT_EXPLICIT)
1554                        set_opt2(sb, EXPLICIT_DELALLOC);
1555                if (m->flags & MOPT_CLEAR_ERR)
1556                        clear_opt(sb, ERRORS_MASK);
1557                if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1558                        ext4_msg(sb, KERN_ERR, "Cannot change quota "
1559                                 "options when quota turned on");
1560                        return -1;
1561                }
1562
1563                if (m->flags & MOPT_NOSUPPORT) {
1564                        ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1565                } else if (token == Opt_commit) {
1566                        if (arg == 0)
1567                                arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1568                        sbi->s_commit_interval = HZ * arg;
1569                } else if (token == Opt_max_batch_time) {
1570                        if (arg == 0)
1571                                arg = EXT4_DEF_MAX_BATCH_TIME;
1572                        sbi->s_max_batch_time = arg;
1573                } else if (token == Opt_min_batch_time) {
1574                        sbi->s_min_batch_time = arg;
1575                } else if (token == Opt_inode_readahead_blks) {
1576                        if (arg > (1 << 30))
1577                                return -1;
1578                        if (arg && !is_power_of_2(arg)) {
1579                                ext4_msg(sb, KERN_ERR,
1580                                         "EXT4-fs: inode_readahead_blks"
1581                                         " must be a power of 2");
1582                                return -1;
1583                        }
1584                        sbi->s_inode_readahead_blks = arg;
1585                } else if (token == Opt_init_itable) {
1586                        set_opt(sb, INIT_INODE_TABLE);
1587                        if (!args->from)
1588                                arg = EXT4_DEF_LI_WAIT_MULT;
1589                        sbi->s_li_wait_mult = arg;
1590                } else if (token == Opt_stripe) {
1591                        sbi->s_stripe = arg;
1592                } else if (m->flags & MOPT_DATAJ) {
1593                        if (is_remount) {
1594                                if (!sbi->s_journal)
1595                                        ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1596                                else if (test_opt(sb, DATA_FLAGS) !=
1597                                         m->mount_opt) {
1598                                        ext4_msg(sb, KERN_ERR,
1599                                         "Cannot change data mode on remount");
1600                                        return -1;
1601                                }
1602                        } else {
1603                                clear_opt(sb, DATA_FLAGS);
1604                                sbi->s_mount_opt |= m->mount_opt;
1605                        }
1606#ifdef CONFIG_QUOTA
1607                } else if (m->flags & MOPT_QFMT) {
1608                        if (sb_any_quota_loaded(sb) &&
1609                            sbi->s_jquota_fmt != m->mount_opt) {
1610                                ext4_msg(sb, KERN_ERR, "Cannot "
1611                                         "change journaled quota options "
1612                                         "when quota turned on");
1613                                return -1;
1614                        }
1615                        sbi->s_jquota_fmt = m->mount_opt;
1616#endif
1617                } else {
1618                        if (!args->from)
1619                                arg = 1;
1620                        if (m->flags & MOPT_CLEAR)
1621                                arg = !arg;
1622                        else if (unlikely(!(m->flags & MOPT_SET))) {
1623                                ext4_msg(sb, KERN_WARNING,
1624                                         "buggy handling of option %s", opt);
1625                                WARN_ON(1);
1626                                return -1;
1627                        }
1628                        if (arg != 0)
1629                                sbi->s_mount_opt |= m->mount_opt;
1630                        else
1631                                sbi->s_mount_opt &= ~m->mount_opt;
1632                }
1633                return 1;
1634        }
1635        ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1636                 "or missing value", opt);
1637        return -1;
1638}
1639
1640static int parse_options(char *options, struct super_block *sb,
1641                         unsigned long *journal_devnum,
1642                         unsigned int *journal_ioprio,
1643                         int is_remount)
1644{
1645#ifdef CONFIG_QUOTA
1646        struct ext4_sb_info *sbi = EXT4_SB(sb);
1647#endif
1648        char *p;
1649        substring_t args[MAX_OPT_ARGS];
1650        int token;
1651
1652        if (!options)
1653                return 1;
1654
1655        while ((p = strsep(&options, ",")) != NULL) {
1656                if (!*p)
1657                        continue;
1658                /*
1659                 * Initialize args struct so we know whether arg was
1660                 * found; some options take optional arguments.
1661                 */
1662                args[0].to = args[0].from = 0;
1663                token = match_token(p, tokens, args);
1664                if (handle_mount_opt(sb, p, token, args, journal_devnum,
1665                                     journal_ioprio, is_remount) < 0)
1666                        return 0;
1667        }
1668#ifdef CONFIG_QUOTA
1669        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1670                if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1671                        clear_opt(sb, USRQUOTA);
1672
1673                if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1674                        clear_opt(sb, GRPQUOTA);
1675
1676                if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1677                        ext4_msg(sb, KERN_ERR, "old and new quota "
1678                                        "format mixing");
1679                        return 0;
1680                }
1681
1682                if (!sbi->s_jquota_fmt) {
1683                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1684                                        "not specified");
1685                        return 0;
1686                }
1687        } else {
1688                if (sbi->s_jquota_fmt) {
1689                        ext4_msg(sb, KERN_ERR, "journaled quota format "
1690                                        "specified with no journaling "
1691                                        "enabled");
1692                        return 0;
1693                }
1694        }
1695#endif
1696        return 1;
1697}
1698
1699static inline void ext4_show_quota_options(struct seq_file *seq,
1700                                           struct super_block *sb)
1701{
1702#if defined(CONFIG_QUOTA)
1703        struct ext4_sb_info *sbi = EXT4_SB(sb);
1704
1705        if (sbi->s_jquota_fmt) {
1706                char *fmtname = "";
1707
1708                switch (sbi->s_jquota_fmt) {
1709                case QFMT_VFS_OLD:
1710                        fmtname = "vfsold";
1711                        break;
1712                case QFMT_VFS_V0:
1713                        fmtname = "vfsv0";
1714                        break;
1715                case QFMT_VFS_V1:
1716                        fmtname = "vfsv1";
1717                        break;
1718                }
1719                seq_printf(seq, ",jqfmt=%s", fmtname);
1720        }
1721
1722        if (sbi->s_qf_names[USRQUOTA])
1723                seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
1724
1725        if (sbi->s_qf_names[GRPQUOTA])
1726                seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
1727
1728        if (test_opt(sb, USRQUOTA))
1729                seq_puts(seq, ",usrquota");
1730
1731        if (test_opt(sb, GRPQUOTA))
1732                seq_puts(seq, ",grpquota");
1733#endif
1734}
1735
1736static const char *token2str(int token)
1737{
1738        const struct match_token *t;
1739
1740        for (t = tokens; t->token != Opt_err; t++)
1741                if (t->token == token && !strchr(t->pattern, '='))
1742                        break;
1743        return t->pattern;
1744}
1745
1746/*
1747 * Show an option if
1748 *  - it's set to a non-default value OR
1749 *  - if the per-sb default is different from the global default
1750 */
1751static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1752                              int nodefs)
1753{
1754        struct ext4_sb_info *sbi = EXT4_SB(sb);
1755        struct ext4_super_block *es = sbi->s_es;
1756        int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
1757        const struct mount_opts *m;
1758        char sep = nodefs ? '\n' : ',';
1759
1760#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
1761#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
1762
1763        if (sbi->s_sb_block != 1)
1764                SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
1765
1766        for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1767                int want_set = m->flags & MOPT_SET;
1768                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
1769                    (m->flags & MOPT_CLEAR_ERR))
1770                        continue;
1771                if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
1772                        continue; /* skip if same as the default */
1773                if ((want_set &&
1774                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
1775                    (!want_set && (sbi->s_mount_opt & m->mount_opt)))
1776                        continue; /* select Opt_noFoo vs Opt_Foo */
1777                SEQ_OPTS_PRINT("%s", token2str(m->token));
1778        }
1779
1780        if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
1781            le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
1782                SEQ_OPTS_PRINT("resuid=%u",
1783                                from_kuid_munged(&init_user_ns, sbi->s_resuid));
1784        if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
1785            le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
1786                SEQ_OPTS_PRINT("resgid=%u",
1787                                from_kgid_munged(&init_user_ns, sbi->s_resgid));
1788        def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
1789        if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
1790                SEQ_OPTS_PUTS("errors=remount-ro");
1791        if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
1792                SEQ_OPTS_PUTS("errors=continue");
1793        if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
1794                SEQ_OPTS_PUTS("errors=panic");
1795        if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
1796                SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
1797        if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
1798                SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
1799        if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
1800                SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
1801        if (sb->s_flags & MS_I_VERSION)
1802                SEQ_OPTS_PUTS("i_version");
1803        if (nodefs || sbi->s_stripe)
1804                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
1805        if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
1806                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
1807                        SEQ_OPTS_PUTS("data=journal");
1808                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
1809                        SEQ_OPTS_PUTS("data=ordered");
1810                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
1811                        SEQ_OPTS_PUTS("data=writeback");
1812        }
1813        if (nodefs ||
1814            sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
1815                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
1816                               sbi->s_inode_readahead_blks);
1817
1818        if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1819                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1820                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1821
1822        ext4_show_quota_options(seq, sb);
1823        return 0;
1824}
1825
1826static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1827{
1828        return _ext4_show_options(seq, root->d_sb, 0);
1829}
1830
1831static int options_seq_show(struct seq_file *seq, void *offset)
1832{
1833        struct super_block *sb = seq->private;
1834        int rc;
1835
1836        seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
1837        rc = _ext4_show_options(seq, sb, 1);
1838        seq_puts(seq, "\n");
1839        return rc;
1840}
1841
1842static int options_open_fs(struct inode *inode, struct file *file)
1843{
1844        return single_open(file, options_seq_show, PDE(inode)->data);
1845}
1846
1847static const struct file_operations ext4_seq_options_fops = {
1848        .owner = THIS_MODULE,
1849        .open = options_open_fs,
1850        .read = seq_read,
1851        .llseek = seq_lseek,
1852        .release = single_release,
1853};
1854
1855static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1856                            int read_only)
1857{
1858        struct ext4_sb_info *sbi = EXT4_SB(sb);
1859        int res = 0;
1860
1861        if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1862                ext4_msg(sb, KERN_ERR, "revision level too high, "
1863                         "forcing read-only mode");
1864                res = MS_RDONLY;
1865        }
1866        if (read_only)
1867                goto done;
1868        if (!(sbi->s_mount_state & EXT4_VALID_FS))
1869                ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1870                         "running e2fsck is recommended");
1871        else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1872                ext4_msg(sb, KERN_WARNING,
1873                         "warning: mounting fs with errors, "
1874                         "running e2fsck is recommended");
1875        else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1876                 le16_to_cpu(es->s_mnt_count) >=
1877                 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1878                ext4_msg(sb, KERN_WARNING,
1879                         "warning: maximal mount count reached, "
1880                         "running e2fsck is recommended");
1881        else if (le32_to_cpu(es->s_checkinterval) &&
1882                (le32_to_cpu(es->s_lastcheck) +
1883                        le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1884                ext4_msg(sb, KERN_WARNING,
1885                         "warning: checktime reached, "
1886                         "running e2fsck is recommended");
1887        if (!sbi->s_journal)
1888                es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1889        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1890                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1891        le16_add_cpu(&es->s_mnt_count, 1);
1892        es->s_mtime = cpu_to_le32(get_seconds());
1893        ext4_update_dynamic_rev(sb);
1894        if (sbi->s_journal)
1895                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1896
1897        ext4_commit_super(sb, 1);
1898done:
1899        if (test_opt(sb, DEBUG))
1900                printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1901                                "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
1902                        sb->s_blocksize,
1903                        sbi->s_groups_count,
1904                        EXT4_BLOCKS_PER_GROUP(sb),
1905                        EXT4_INODES_PER_GROUP(sb),
1906                        sbi->s_mount_opt, sbi->s_mount_opt2);
1907
1908        cleancache_init_fs(sb);
1909        return res;
1910}
1911
1912static int ext4_fill_flex_info(struct super_block *sb)
1913{
1914        struct ext4_sb_info *sbi = EXT4_SB(sb);
1915        struct ext4_group_desc *gdp = NULL;
1916        ext4_group_t flex_group_count;
1917        ext4_group_t flex_group;
1918        unsigned int groups_per_flex = 0;
1919        size_t size;
1920        int i;
1921
1922        sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1923        if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
1924                sbi->s_log_groups_per_flex = 0;
1925                return 1;
1926        }
1927        groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1928
1929        /* We allocate both existing and potentially added groups */
1930        flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1931                        ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1932                              EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1933        size = flex_group_count * sizeof(struct flex_groups);
1934        sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
1935        if (sbi->s_flex_groups == NULL) {
1936                ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
1937                         flex_group_count);
1938                goto failed;
1939        }
1940
1941        for (i = 0; i < sbi->s_groups_count; i++) {
1942                gdp = ext4_get_group_desc(sb, i, NULL);
1943
1944                flex_group = ext4_flex_group(sbi, i);
1945                atomic_add(ext4_free_inodes_count(sb, gdp),
1946                           &sbi->s_flex_groups[flex_group].free_inodes);
1947                atomic_add(ext4_free_group_clusters(sb, gdp),
1948                           &sbi->s_flex_groups[flex_group].free_clusters);
1949                atomic_add(ext4_used_dirs_count(sb, gdp),
1950                           &sbi->s_flex_groups[flex_group].used_dirs);
1951        }
1952
1953        return 1;
1954failed:
1955        return 0;
1956}
1957
1958static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1959                                   struct ext4_group_desc *gdp)
1960{
1961        int offset;
1962        __u16 crc = 0;
1963        __le32 le_group = cpu_to_le32(block_group);
1964
1965        if ((sbi->s_es->s_feature_ro_compat &
1966             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
1967                /* Use new metadata_csum algorithm */
1968                __u16 old_csum;
1969                __u32 csum32;
1970
1971                old_csum = gdp->bg_checksum;
1972                gdp->bg_checksum = 0;
1973                csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
1974                                     sizeof(le_group));
1975                csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
1976                                     sbi->s_desc_size);
1977                gdp->bg_checksum = old_csum;
1978
1979                crc = csum32 & 0xFFFF;
1980                goto out;
1981        }
1982
1983        /* old crc16 code */
1984        offset = offsetof(struct ext4_group_desc, bg_checksum);
1985
1986        crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1987        crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1988        crc = crc16(crc, (__u8 *)gdp, offset);
1989        offset += sizeof(gdp->bg_checksum); /* skip checksum */
1990        /* for checksum of struct ext4_group_desc do the rest...*/
1991        if ((sbi->s_es->s_feature_incompat &
1992             cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1993            offset < le16_to_cpu(sbi->s_es->s_desc_size))
1994                crc = crc16(crc, (__u8 *)gdp + offset,
1995                            le16_to_cpu(sbi->s_es->s_desc_size) -
1996                                offset);
1997
1998out:
1999        return cpu_to_le16(crc);
2000}
2001
2002int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
2003                                struct ext4_group_desc *gdp)
2004{
2005        if (ext4_has_group_desc_csum(sb) &&
2006            (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
2007                                                      block_group, gdp)))
2008                return 0;
2009
2010        return 1;
2011}
2012
2013void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
2014                              struct ext4_group_desc *gdp)
2015{
2016        if (!ext4_has_group_desc_csum(sb))
2017                return;
2018        gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
2019}
2020
2021/* Called at mount-time, super-block is locked */
2022static int ext4_check_descriptors(struct super_block *sb,
2023                                  ext4_group_t *first_not_zeroed)
2024{
2025        struct ext4_sb_info *sbi = EXT4_SB(sb);
2026        ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
2027        ext4_fsblk_t last_block;
2028        ext4_fsblk_t block_bitmap;
2029        ext4_fsblk_t inode_bitmap;
2030        ext4_fsblk_t inode_table;
2031        int flexbg_flag = 0;
2032        ext4_group_t i, grp = sbi->s_groups_count;
2033
2034        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2035                flexbg_flag = 1;
2036
2037        ext4_debug("Checking group descriptors");
2038
2039        for (i = 0; i < sbi->s_groups_count; i++) {
2040                struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
2041
2042                if (i == sbi->s_groups_count - 1 || flexbg_flag)
2043                        last_block = ext4_blocks_count(sbi->s_es) - 1;
2044                else
2045                        last_block = first_block +
2046                                (EXT4_BLOCKS_PER_GROUP(sb) - 1);
2047
2048                if ((grp == sbi->s_groups_count) &&
2049                   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2050                        grp = i;
2051
2052                block_bitmap = ext4_block_bitmap(sb, gdp);
2053                if (block_bitmap < first_block || block_bitmap > last_block) {
2054                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2055                               "Block bitmap for group %u not in group "
2056                               "(block %llu)!", i, block_bitmap);
2057                        return 0;
2058                }
2059                inode_bitmap = ext4_inode_bitmap(sb, gdp);
2060                if (inode_bitmap < first_block || inode_bitmap > last_block) {
2061                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2062                               "Inode bitmap for group %u not in group "
2063                               "(block %llu)!", i, inode_bitmap);
2064                        return 0;
2065                }
2066                inode_table = ext4_inode_table(sb, gdp);
2067                if (inode_table < first_block ||
2068                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
2069                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2070                               "Inode table for group %u not in group "
2071                               "(block %llu)!", i, inode_table);
2072                        return 0;
2073                }
2074                ext4_lock_group(sb, i);
2075                if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
2076                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
2077                                 "Checksum for group %u failed (%u!=%u)",
2078                                 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
2079                                     gdp)), le16_to_cpu(gdp->bg_checksum));
2080                        if (!(sb->s_flags & MS_RDONLY)) {
2081                                ext4_unlock_group(sb, i);
2082                                return 0;
2083                        }
2084                }
2085                ext4_unlock_group(sb, i);
2086                if (!flexbg_flag)
2087                        first_block += EXT4_BLOCKS_PER_GROUP(sb);
2088        }
2089        if (NULL != first_not_zeroed)
2090                *first_not_zeroed = grp;
2091
2092        ext4_free_blocks_count_set(sbi->s_es,
2093                                   EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2094        sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2095        return 1;
2096}
2097
2098/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
2099 * the superblock) which were deleted from all directories, but held open by
2100 * a process at the time of a crash.  We walk the list and try to delete these
2101 * inodes at recovery time (only with a read-write filesystem).
2102 *
2103 * In order to keep the orphan inode chain consistent during traversal (in
2104 * case of crash during recovery), we link each inode into the superblock
2105 * orphan list_head and handle it the same way as an inode deletion during
2106 * normal operation (which journals the operations for us).
2107 *
2108 * We only do an iget() and an iput() on each inode, which is very safe if we
2109 * accidentally point at an in-use or already deleted inode.  The worst that
2110 * can happen in this case is that we get a "bit already cleared" message from
2111 * ext4_free_inode().  The only reason we would point at a wrong inode is if
2112 * e2fsck was run on this filesystem, and it must have already done the orphan
2113 * inode cleanup for us, so we can safely abort without any further action.
2114 */
2115static void ext4_orphan_cleanup(struct super_block *sb,
2116                                struct ext4_super_block *es)
2117{
2118        unsigned int s_flags = sb->s_flags;
2119        int nr_orphans = 0, nr_truncates = 0;
2120#ifdef CONFIG_QUOTA
2121        int i;
2122#endif
2123        if (!es->s_last_orphan) {
2124                jbd_debug(4, "no orphan inodes to clean up\n");
2125                return;
2126        }
2127
2128        if (bdev_read_only(sb->s_bdev)) {
2129                ext4_msg(sb, KERN_ERR, "write access "
2130                        "unavailable, skipping orphan cleanup");
2131                return;
2132        }
2133
2134        /* Check if feature set would not allow a r/w mount */
2135        if (!ext4_feature_set_ok(sb, 0)) {
2136                ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
2137                         "unknown ROCOMPAT features");
2138                return;
2139        }
2140
2141        if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2142                if (es->s_last_orphan)
2143                        jbd_debug(1, "Errors on filesystem, "
2144                                  "clearing orphan list.\n");
2145                es->s_last_orphan = 0;
2146                jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2147                return;
2148        }
2149
2150        if (s_flags & MS_RDONLY) {
2151                ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
2152                sb->s_flags &= ~MS_RDONLY;
2153        }
2154#ifdef CONFIG_QUOTA
2155        /* Needed for iput() to work correctly and not trash data */
2156        sb->s_flags |= MS_ACTIVE;
2157        /* Turn on quotas so that they are updated correctly */
2158        for (i = 0; i < MAXQUOTAS; i++) {
2159                if (EXT4_SB(sb)->s_qf_names[i]) {
2160                        int ret = ext4_quota_on_mount(sb, i);
2161                        if (ret < 0)
2162                                ext4_msg(sb, KERN_ERR,
2163                                        "Cannot turn on journaled "
2164                                        "quota: error %d", ret);
2165                }
2166        }
2167#endif
2168
2169        while (es->s_last_orphan) {
2170                struct inode *inode;
2171
2172                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
2173                if (IS_ERR(inode)) {
2174                        es->s_last_orphan = 0;
2175                        break;
2176                }
2177
2178                list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2179                dquot_initialize(inode);
2180                if (inode->i_nlink) {
2181                        ext4_msg(sb, KERN_DEBUG,
2182                                "%s: truncating inode %lu to %lld bytes",
2183                                __func__, inode->i_ino, inode->i_size);
2184                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
2185                                  inode->i_ino, inode->i_size);
2186                        ext4_truncate(inode);
2187                        nr_truncates++;
2188                } else {
2189                        ext4_msg(sb, KERN_DEBUG,
2190                                "%s: deleting unreferenced inode %lu",
2191                                __func__, inode->i_ino);
2192                        jbd_debug(2, "deleting unreferenced inode %lu\n",
2193                                  inode->i_ino);
2194                        nr_orphans++;
2195                }
2196                iput(inode);  /* The delete magic happens here! */
2197        }
2198
2199#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
2200
2201        if (nr_orphans)
2202                ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
2203                       PLURAL(nr_orphans));
2204        if (nr_truncates)
2205                ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
2206                       PLURAL(nr_truncates));
2207#ifdef CONFIG_QUOTA
2208        /* Turn quotas off */
2209        for (i = 0; i < MAXQUOTAS; i++) {
2210                if (sb_dqopt(sb)->files[i])
2211                        dquot_quota_off(sb, i);
2212        }
2213#endif
2214        sb->s_flags = s_flags; /* Restore MS_RDONLY status */
2215}
2216
2217/*
2218 * Maximal extent format file size.
2219 * Resulting logical blkno at s_maxbytes must fit in our on-disk
2220 * extent format containers, within a sector_t, and within i_blocks
2221 * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
2222 * so that won't be a limiting factor.
2223 *
2224 * However there is other limiting factor. We do store extents in the form
2225 * of starting block and length, hence the resulting length of the extent
2226 * covering maximum file size must fit into on-disk format containers as
2227 * well. Given that length is always by 1 unit bigger than max unit (because
2228 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
2229 *
2230 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
2231 */
2232static loff_t ext4_max_size(int blkbits, int has_huge_files)
2233{
2234        loff_t res;
2235        loff_t upper_limit = MAX_LFS_FILESIZE;
2236
2237        /* small i_blocks in vfs inode? */
2238        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2239                /*
2240                 * CONFIG_LBDAF is not enabled implies the inode
2241                 * i_block represent total blocks in 512 bytes
2242                 * 32 == size of vfs inode i_blocks * 8
2243                 */
2244                upper_limit = (1LL << 32) - 1;
2245
2246                /* total blocks in file system block size */
2247                upper_limit >>= (blkbits - 9);
2248                upper_limit <<= blkbits;
2249        }
2250
2251        /*
2252         * 32-bit extent-start container, ee_block. We lower the maxbytes
2253         * by one fs block, so ee_len can cover the extent of maximum file
2254         * size
2255         */
2256        res = (1LL << 32) - 1;
2257        res <<= blkbits;
2258
2259        /* Sanity check against vm- & vfs- imposed limits */
2260        if (res > upper_limit)
2261                res = upper_limit;
2262
2263        return res;
2264}
2265
2266/*
2267 * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
2268 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
2269 * We need to be 1 filesystem block less than the 2^48 sector limit.
2270 */
2271static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
2272{
2273        loff_t res = EXT4_NDIR_BLOCKS;
2274        int meta_blocks;
2275        loff_t upper_limit;
2276        /* This is calculated to be the largest file size for a dense, block
2277         * mapped file such that the file's total number of 512-byte sectors,
2278         * including data and all indirect blocks, does not exceed (2^48 - 1).
2279         *
2280         * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
2281         * number of 512-byte sectors of the file.
2282         */
2283
2284        if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
2285                /*
2286                 * !has_huge_files or CONFIG_LBDAF not enabled implies that
2287                 * the inode i_block field represents total file blocks in
2288                 * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2289                 */
2290                upper_limit = (1LL << 32) - 1;
2291
2292                /* total blocks in file system block size */
2293                upper_limit >>= (bits - 9);
2294
2295        } else {
2296                /*
2297                 * We use 48 bit ext4_inode i_blocks
2298                 * With EXT4_HUGE_FILE_FL set the i_blocks
2299                 * represent total number of blocks in
2300                 * file system block size
2301                 */
2302                upper_limit = (1LL << 48) - 1;
2303
2304        }
2305
2306        /* indirect blocks */
2307        meta_blocks = 1;
2308        /* double indirect blocks */
2309        meta_blocks += 1 + (1LL << (bits-2));
2310        /* tripple indirect blocks */
2311        meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2312
2313        upper_limit -= meta_blocks;
2314        upper_limit <<= bits;
2315
2316        res += 1LL << (bits-2);
2317        res += 1LL << (2*(bits-2));
2318        res += 1LL << (3*(bits-2));
2319        res <<= bits;
2320        if (res > upper_limit)
2321                res = upper_limit;
2322
2323        if (res > MAX_LFS_FILESIZE)
2324                res = MAX_LFS_FILESIZE;
2325
2326        return res;
2327}
2328
2329static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2330                                   ext4_fsblk_t logical_sb_block, int nr)
2331{
2332        struct ext4_sb_info *sbi = EXT4_SB(sb);
2333        ext4_group_t bg, first_meta_bg;
2334        int has_super = 0;
2335
2336        first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2337
2338        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2339            nr < first_meta_bg)
2340                return logical_sb_block + nr + 1;
2341        bg = sbi->s_desc_per_block * nr;
2342        if (ext4_bg_has_super(sb, bg))
2343                has_super = 1;
2344
2345        return (has_super + ext4_group_first_block_no(sb, bg));
2346}
2347
2348/**
2349 * ext4_get_stripe_size: Get the stripe size.
2350 * @sbi: In memory super block info
2351 *
2352 * If we have specified it via mount option, then
2353 * use the mount option value. If the value specified at mount time is
2354 * greater than the blocks per group use the super block value.
2355 * If the super block value is greater than blocks per group return 0.
2356 * Allocator needs it be less than blocks per group.
2357 *
2358 */
2359static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2360{
2361        unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2362        unsigned long stripe_width =
2363                        le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2364        int ret;
2365
2366        if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2367                ret = sbi->s_stripe;
2368        else if (stripe_width <= sbi->s_blocks_per_group)
2369                ret = stripe_width;
2370        else if (stride <= sbi->s_blocks_per_group)
2371                ret = stride;
2372        else
2373                ret = 0;
2374
2375        /*
2376         * If the stripe width is 1, this makes no sense and
2377         * we set it to 0 to turn off stripe handling code.
2378         */
2379        if (ret <= 1)
2380                ret = 0;
2381
2382        return ret;
2383}
2384
2385/* sysfs supprt */
2386
2387struct ext4_attr {
2388        struct attribute attr;
2389        ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2390        ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2391                         const char *, size_t);
2392        int offset;
2393};
2394
2395static int parse_strtoul(const char *buf,
2396                unsigned long max, unsigned long *value)
2397{
2398        char *endp;
2399
2400        *value = simple_strtoul(skip_spaces(buf), &endp, 0);
2401        endp = skip_spaces(endp);
2402        if (*endp || *value > max)
2403                return -EINVAL;
2404
2405        return 0;
2406}
2407
2408static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2409                                              struct ext4_sb_info *sbi,
2410                                              char *buf)
2411{
2412        return snprintf(buf, PAGE_SIZE, "%llu\n",
2413                (s64) EXT4_C2B(sbi,
2414                        percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2415}
2416
2417static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2418                                         struct ext4_sb_info *sbi, char *buf)
2419{
2420        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2421
2422        if (!sb->s_bdev->bd_part)
2423                return snprintf(buf, PAGE_SIZE, "0\n");
2424        return snprintf(buf, PAGE_SIZE, "%lu\n",
2425                        (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2426                         sbi->s_sectors_written_start) >> 1);
2427}
2428
2429static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2430                                          struct ext4_sb_info *sbi, char *buf)
2431{
2432        struct super_block *sb = sbi->s_buddy_cache->i_sb;
2433
2434        if (!sb->s_bdev->bd_part)
2435                return snprintf(buf, PAGE_SIZE, "0\n");
2436        return snprintf(buf, PAGE_SIZE, "%llu\n",
2437                        (unsigned long long)(sbi->s_kbytes_written +
2438                        ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2439                          EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2440}
2441
2442static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2443                                          struct ext4_sb_info *sbi,
2444                                          const char *buf, size_t count)
2445{
2446        unsigned long t;
2447
2448        if (parse_strtoul(buf, 0x40000000, &t))
2449                return -EINVAL;
2450
2451        if (t && !is_power_of_2(t))
2452                return -EINVAL;
2453
2454        sbi->s_inode_readahead_blks = t;
2455        return count;
2456}
2457
2458static ssize_t sbi_ui_show(struct ext4_attr *a,
2459                           struct ext4_sb_info *sbi, char *buf)
2460{
2461        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2462
2463        return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2464}
2465
2466static ssize_t sbi_ui_store(struct ext4_attr *a,
2467                            struct ext4_sb_info *sbi,
2468                            const char *buf, size_t count)
2469{
2470        unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2471        unsigned long t;
2472
2473        if (parse_strtoul(buf, 0xffffffff, &t))
2474                return -EINVAL;
2475        *ui = t;
2476        return count;
2477}
2478
2479static ssize_t trigger_test_error(struct ext4_attr *a,
2480                                  struct ext4_sb_info *sbi,
2481                                  const char *buf, size_t count)
2482{
2483        int len = count;
2484
2485        if (!capable(CAP_SYS_ADMIN))
2486                return -EPERM;
2487
2488        if (len && buf[len-1] == '\n')
2489                len--;
2490
2491        if (len)
2492                ext4_error(sbi->s_sb, "%.*s", len, buf);
2493        return count;
2494}
2495
2496#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2497static struct ext4_attr ext4_attr_##_name = {                   \
2498        .attr = {.name = __stringify(_name), .mode = _mode },   \
2499        .show   = _show,                                        \
2500        .store  = _store,                                       \
2501        .offset = offsetof(struct ext4_sb_info, _elname),       \
2502}
2503#define EXT4_ATTR(name, mode, show, store) \
2504static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2505
2506#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2507#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2508#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2509#define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2510        EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2511#define ATTR_LIST(name) &ext4_attr_##name.attr
2512
2513EXT4_RO_ATTR(delayed_allocation_blocks);
2514EXT4_RO_ATTR(session_write_kbytes);
2515EXT4_RO_ATTR(lifetime_write_kbytes);
2516EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2517                 inode_readahead_blks_store, s_inode_readahead_blks);
2518EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
2519EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2520EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2521EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2522EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2523EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2524EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2525EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
2526EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2527
2528static struct attribute *ext4_attrs[] = {
2529        ATTR_LIST(delayed_allocation_blocks),
2530        ATTR_LIST(session_write_kbytes),
2531        ATTR_LIST(lifetime_write_kbytes),
2532        ATTR_LIST(inode_readahead_blks),
2533        ATTR_LIST(inode_goal),
2534        ATTR_LIST(mb_stats),
2535        ATTR_LIST(mb_max_to_scan),
2536        ATTR_LIST(mb_min_to_scan),
2537        ATTR_LIST(mb_order2_req),
2538        ATTR_LIST(mb_stream_req),
2539        ATTR_LIST(mb_group_prealloc),
2540        ATTR_LIST(max_writeback_mb_bump),
2541        ATTR_LIST(trigger_fs_error),
2542        NULL,
2543};
2544
2545/* Features this copy of ext4 supports */
2546EXT4_INFO_ATTR(lazy_itable_init);
2547EXT4_INFO_ATTR(batched_discard);
2548
2549static struct attribute *ext4_feat_attrs[] = {
2550        ATTR_LIST(lazy_itable_init),
2551        ATTR_LIST(batched_discard),
2552        NULL,
2553};
2554
2555static ssize_t ext4_attr_show(struct kobject *kobj,
2556                              struct attribute *attr, char *buf)
2557{
2558        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2559                                                s_kobj);
2560        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2561
2562        return a->show ? a->show(a, sbi, buf) : 0;
2563}
2564
2565static ssize_t ext4_attr_store(struct kobject *kobj,
2566                               struct attribute *attr,
2567                               const char *buf, size_t len)
2568{
2569        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2570                                                s_kobj);
2571        struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2572
2573        return a->store ? a->store(a, sbi, buf, len) : 0;
2574}
2575
2576static void ext4_sb_release(struct kobject *kobj)
2577{
2578        struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2579                                                s_kobj);
2580        complete(&sbi->s_kobj_unregister);
2581}
2582
2583static const struct sysfs_ops ext4_attr_ops = {
2584        .show   = ext4_attr_show,
2585        .store  = ext4_attr_store,
2586};
2587
2588static struct kobj_type ext4_ktype = {
2589        .default_attrs  = ext4_attrs,
2590        .sysfs_ops      = &ext4_attr_ops,
2591        .release        = ext4_sb_release,
2592};
2593
2594static void ext4_feat_release(struct kobject *kobj)
2595{
2596        complete(&ext4_feat->f_kobj_unregister);
2597}
2598
2599static struct kobj_type ext4_feat_ktype = {
2600        .default_attrs  = ext4_feat_attrs,
2601        .sysfs_ops      = &ext4_attr_ops,
2602        .release        = ext4_feat_release,
2603};
2604
2605/*
2606 * Check whether this filesystem can be mounted based on
2607 * the features present and the RDONLY/RDWR mount requested.
2608 * Returns 1 if this filesystem can be mounted as requested,
2609 * 0 if it cannot be.
2610 */
2611static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2612{
2613        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
2614                ext4_msg(sb, KERN_ERR,
2615                        "Couldn't mount because of "
2616                        "unsupported optional features (%x)",
2617                        (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2618                        ~EXT4_FEATURE_INCOMPAT_SUPP));
2619                return 0;
2620        }
2621
2622        if (readonly)
2623                return 1;
2624
2625        /* Check that feature set is OK for a read-write mount */
2626        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
2627                ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
2628                         "unsupported optional features (%x)",
2629                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2630                                ~EXT4_FEATURE_RO_COMPAT_SUPP));
2631                return 0;
2632        }
2633        /*
2634         * Large file size enabled file system can only be mounted
2635         * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
2636         */
2637        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
2638                if (sizeof(blkcnt_t) < sizeof(u64)) {
2639                        ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
2640                                 "cannot be mounted RDWR without "
2641                                 "CONFIG_LBDAF");
2642                        return 0;
2643                }
2644        }
2645        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2646            !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2647                ext4_msg(sb, KERN_ERR,
2648                         "Can't support bigalloc feature without "
2649                         "extents feature\n");
2650                return 0;
2651        }
2652
2653#ifndef CONFIG_QUOTA
2654        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
2655            !readonly) {
2656                ext4_msg(sb, KERN_ERR,
2657                         "Filesystem with quota feature cannot be mounted RDWR "
2658                         "without CONFIG_QUOTA");
2659                return 0;
2660        }
2661#endif  /* CONFIG_QUOTA */
2662        return 1;
2663}
2664
2665/*
2666 * This function is called once a day if we have errors logged
2667 * on the file system
2668 */
2669static void print_daily_error_info(unsigned long arg)
2670{
2671        struct super_block *sb = (struct super_block *) arg;
2672        struct ext4_sb_info *sbi;
2673        struct ext4_super_block *es;
2674
2675        sbi = EXT4_SB(sb);
2676        es = sbi->s_es;
2677
2678        if (es->s_error_count)
2679                ext4_msg(sb, KERN_NOTICE, "error count: %u",
2680                         le32_to_cpu(es->s_error_count));
2681        if (es->s_first_error_time) {
2682                printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
2683                       sb->s_id, le32_to_cpu(es->s_first_error_time),
2684                       (int) sizeof(es->s_first_error_func),
2685                       es->s_first_error_func,
2686                       le32_to_cpu(es->s_first_error_line));
2687                if (es->s_first_error_ino)
2688                        printk(": inode %u",
2689                               le32_to_cpu(es->s_first_error_ino));
2690                if (es->s_first_error_block)
2691                        printk(": block %llu", (unsigned long long)
2692                               le64_to_cpu(es->s_first_error_block));
2693                printk("\n");
2694        }
2695        if (es->s_last_error_time) {
2696                printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
2697                       sb->s_id, le32_to_cpu(es->s_last_error_time),
2698                       (int) sizeof(es->s_last_error_func),
2699                       es->s_last_error_func,
2700                       le32_to_cpu(es->s_last_error_line));
2701                if (es->s_last_error_ino)
2702                        printk(": inode %u",
2703                               le32_to_cpu(es->s_last_error_ino));
2704                if (es->s_last_error_block)
2705                        printk(": block %llu", (unsigned long long)
2706                               le64_to_cpu(es->s_last_error_block));
2707                printk("\n");
2708        }
2709        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
2710}
2711
2712/* Find next suitable group and run ext4_init_inode_table */
2713static int ext4_run_li_request(struct ext4_li_request *elr)
2714{
2715        struct ext4_group_desc *gdp = NULL;
2716        ext4_group_t group, ngroups;
2717        struct super_block *sb;
2718        unsigned long timeout = 0;
2719        int ret = 0;
2720
2721        sb = elr->lr_super;
2722        ngroups = EXT4_SB(sb)->s_groups_count;
2723
2724        sb_start_write(sb);
2725        for (group = elr->lr_next_group; group < ngroups; group++) {
2726                gdp = ext4_get_group_desc(sb, group, NULL);
2727                if (!gdp) {
2728                        ret = 1;
2729                        break;
2730                }
2731
2732                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2733                        break;
2734        }
2735
2736        if (group == ngroups)
2737                ret = 1;
2738
2739        if (!ret) {
2740                timeout = jiffies;
2741                ret = ext4_init_inode_table(sb, group,
2742                                            elr->lr_timeout ? 0 : 1);
2743                if (elr->lr_timeout == 0) {
2744                        timeout = (jiffies - timeout) *
2745                                  elr->lr_sbi->s_li_wait_mult;
2746                        elr->lr_timeout = timeout;
2747                }
2748                elr->lr_next_sched = jiffies + elr->lr_timeout;
2749                elr->lr_next_group = group + 1;
2750        }
2751        sb_end_write(sb);
2752
2753        return ret;
2754}
2755
2756/*
2757 * Remove lr_request from the list_request and free the
2758 * request structure. Should be called with li_list_mtx held
2759 */
2760static void ext4_remove_li_request(struct ext4_li_request *elr)
2761{
2762        struct ext4_sb_info *sbi;
2763
2764        if (!elr)
2765                return;
2766
2767        sbi = elr->lr_sbi;
2768
2769        list_del(&elr->lr_request);
2770        sbi->s_li_request = NULL;
2771        kfree(elr);
2772}
2773
2774static void ext4_unregister_li_request(struct super_block *sb)
2775{
2776        mutex_lock(&ext4_li_mtx);
2777        if (!ext4_li_info) {
2778                mutex_unlock(&ext4_li_mtx);
2779                return;
2780        }
2781
2782        mutex_lock(&ext4_li_info->li_list_mtx);
2783        ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
2784        mutex_unlock(&ext4_li_info->li_list_mtx);
2785        mutex_unlock(&ext4_li_mtx);
2786}
2787
2788static struct task_struct *ext4_lazyinit_task;
2789
2790/*
2791 * This is the function where ext4lazyinit thread lives. It walks
2792 * through the request list searching for next scheduled filesystem.
2793 * When such a fs is found, run the lazy initialization request
2794 * (ext4_rn_li_request) and keep track of the time spend in this
2795 * function. Based on that time we compute next schedule time of
2796 * the request. When walking through the list is complete, compute
2797 * next waking time and put itself into sleep.
2798 */
2799static int ext4_lazyinit_thread(void *arg)
2800{
2801        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
2802        struct list_head *pos, *n;
2803        struct ext4_li_request *elr;
2804        unsigned long next_wakeup, cur;
2805
2806        BUG_ON(NULL == eli);
2807
2808cont_thread:
2809        while (true) {
2810                next_wakeup = MAX_JIFFY_OFFSET;
2811
2812                mutex_lock(&eli->li_list_mtx);
2813                if (list_empty(&eli->li_request_list)) {
2814                        mutex_unlock(&eli->li_list_mtx);
2815                        goto exit_thread;
2816                }
2817
2818                list_for_each_safe(pos, n, &eli->li_request_list) {
2819                        elr = list_entry(pos, struct ext4_li_request,
2820                                         lr_request);
2821
2822                        if (time_after_eq(jiffies, elr->lr_next_sched)) {
2823                                if (ext4_run_li_request(elr) != 0) {
2824                                        /* error, remove the lazy_init job */
2825                                        ext4_remove_li_request(elr);
2826                                        continue;
2827                                }
2828                        }
2829
2830                        if (time_before(elr->lr_next_sched, next_wakeup))
2831                                next_wakeup = elr->lr_next_sched;
2832                }
2833                mutex_unlock(&eli->li_list_mtx);
2834
2835                try_to_freeze();
2836
2837                cur = jiffies;
2838                if ((time_after_eq(cur, next_wakeup)) ||
2839                    (MAX_JIFFY_OFFSET == next_wakeup)) {
2840                        cond_resched();
2841                        continue;
2842                }
2843
2844                schedule_timeout_interruptible(next_wakeup - cur);
2845
2846                if (kthread_should_stop()) {
2847                        ext4_clear_request_list();
2848                        goto exit_thread;
2849                }
2850        }
2851
2852exit_thread:
2853        /*
2854         * It looks like the request list is empty, but we need
2855         * to check it under the li_list_mtx lock, to prevent any
2856         * additions into it, and of course we should lock ext4_li_mtx
2857         * to atomically free the list and ext4_li_info, because at
2858         * this point another ext4 filesystem could be registering
2859         * new one.
2860         */
2861        mutex_lock(&ext4_li_mtx);
2862        mutex_lock(&eli->li_list_mtx);
2863        if (!list_empty(&eli->li_request_list)) {
2864                mutex_unlock(&eli->li_list_mtx);
2865                mutex_unlock(&ext4_li_mtx);
2866                goto cont_thread;
2867        }
2868        mutex_unlock(&eli->li_list_mtx);
2869        kfree(ext4_li_info);
2870        ext4_li_info = NULL;
2871        mutex_unlock(&ext4_li_mtx);
2872
2873        return 0;
2874}
2875
2876static void ext4_clear_request_list(void)
2877{
2878        struct list_head *pos, *n;
2879        struct ext4_li_request *elr;
2880
2881        mutex_lock(&ext4_li_info->li_list_mtx);
2882        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2883                elr = list_entry(pos, struct ext4_li_request,
2884                                 lr_request);
2885                ext4_remove_li_request(elr);
2886        }
2887        mutex_unlock(&ext4_li_info->li_list_mtx);
2888}
2889
2890static int ext4_run_lazyinit_thread(void)
2891{
2892        ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
2893                                         ext4_li_info, "ext4lazyinit");
2894        if (IS_ERR(ext4_lazyinit_task)) {
2895                int err = PTR_ERR(ext4_lazyinit_task);
2896                ext4_clear_request_list();
2897                kfree(ext4_li_info);
2898                ext4_li_info = NULL;
2899                printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
2900                                 "initialization thread\n",
2901                                 err);
2902                return err;
2903        }
2904        ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
2905        return 0;
2906}
2907
2908/*
2909 * Check whether it make sense to run itable init. thread or not.
2910 * If there is at least one uninitialized inode table, return
2911 * corresponding group number, else the loop goes through all
2912 * groups and return total number of groups.
2913 */
2914static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
2915{
2916        ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
2917        struct ext4_group_desc *gdp = NULL;
2918
2919        for (group = 0; group < ngroups; group++) {
2920                gdp = ext4_get_group_desc(sb, group, NULL);
2921                if (!gdp)
2922                        continue;
2923
2924                if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
2925                        break;
2926        }
2927
2928        return group;
2929}
2930
2931static int ext4_li_info_new(void)
2932{
2933        struct ext4_lazy_init *eli = NULL;
2934
2935        eli = kzalloc(sizeof(*eli), GFP_KERNEL);
2936        if (!eli)
2937                return -ENOMEM;
2938
2939        INIT_LIST_HEAD(&eli->li_request_list);
2940        mutex_init(&eli->li_list_mtx);
2941
2942        eli->li_state |= EXT4_LAZYINIT_QUIT;
2943
2944        ext4_li_info = eli;
2945
2946        return 0;
2947}
2948
2949static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
2950                                            ext4_group_t start)
2951{
2952        struct ext4_sb_info *sbi = EXT4_SB(sb);
2953        struct ext4_li_request *elr;
2954        unsigned long rnd;
2955
2956        elr = kzalloc(sizeof(*elr), GFP_KERNEL);
2957        if (!elr)
2958                return NULL;
2959
2960        elr->lr_super = sb;
2961        elr->lr_sbi = sbi;
2962        elr->lr_next_group = start;
2963
2964        /*
2965         * Randomize first schedule time of the request to
2966         * spread the inode table initialization requests
2967         * better.
2968         */
2969        get_random_bytes(&rnd, sizeof(rnd));
2970        elr->lr_next_sched = jiffies + (unsigned long)rnd %
2971                             (EXT4_DEF_LI_MAX_START_DELAY * HZ);
2972
2973        return elr;
2974}
2975
2976static int ext4_register_li_request(struct super_block *sb,
2977                                    ext4_group_t first_not_zeroed)
2978{
2979        struct ext4_sb_info *sbi = EXT4_SB(sb);
2980        struct ext4_li_request *elr;
2981        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
2982        int ret = 0;
2983
2984        if (sbi->s_li_request != NULL) {
2985                /*
2986                 * Reset timeout so it can be computed again, because
2987                 * s_li_wait_mult might have changed.
2988                 */
2989                sbi->s_li_request->lr_timeout = 0;
2990                return 0;
2991        }
2992
2993        if (first_not_zeroed == ngroups ||
2994            (sb->s_flags & MS_RDONLY) ||
2995            !test_opt(sb, INIT_INODE_TABLE))
2996                return 0;
2997
2998        elr = ext4_li_request_new(sb, first_not_zeroed);
2999        if (!elr)
3000                return -ENOMEM;
3001
3002        mutex_lock(&ext4_li_mtx);
3003
3004        if (NULL == ext4_li_info) {
3005                ret = ext4_li_info_new();
3006                if (ret)
3007                        goto out;
3008        }
3009
3010        mutex_lock(&ext4_li_info->li_list_mtx);
3011        list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3012        mutex_unlock(&ext4_li_info->li_list_mtx);
3013
3014        sbi->s_li_request = elr;
3015        /*
3016         * set elr to NULL here since it has been inserted to
3017         * the request_list and the removal and free of it is
3018         * handled by ext4_clear_request_list from now on.
3019         */
3020        elr = NULL;
3021
3022        if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3023                ret = ext4_run_lazyinit_thread();
3024                if (ret)
3025                        goto out;
3026        }
3027out:
3028        mutex_unlock(&ext4_li_mtx);
3029        if (ret)
3030                kfree(elr);
3031        return ret;
3032}
3033
3034/*
3035 * We do not need to lock anything since this is called on
3036 * module unload.
3037 */
3038static void ext4_destroy_lazyinit_thread(void)
3039{
3040        /*
3041         * If thread exited earlier
3042         * there's nothing to be done.
3043         */
3044        if (!ext4_li_info || !ext4_lazyinit_task)
3045                return;
3046
3047        kthread_stop(ext4_lazyinit_task);
3048}
3049
3050static int set_journal_csum_feature_set(struct super_block *sb)
3051{
3052        int ret = 1;
3053        int compat, incompat;
3054        struct ext4_sb_info *sbi = EXT4_SB(sb);
3055
3056        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3057                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3058                /* journal checksum v2 */
3059                compat = 0;
3060                incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
3061        } else {
3062                /* journal checksum v1 */
3063                compat = JBD2_FEATURE_COMPAT_CHECKSUM;
3064                incompat = 0;
3065        }
3066
3067        if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3068                ret = jbd2_journal_set_features(sbi->s_journal,
3069                                compat, 0,
3070                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3071                                incompat);
3072        } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
3073                ret = jbd2_journal_set_features(sbi->s_journal,
3074                                compat, 0,
3075                                incompat);
3076                jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3077                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3078        } else {
3079                jbd2_journal_clear_features(sbi->s_journal,
3080                                JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3081                                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3082                                JBD2_FEATURE_INCOMPAT_CSUM_V2);
3083        }
3084
3085        return ret;
3086}
3087
3088/*
3089 * Note: calculating the overhead so we can be compatible with
3090 * historical BSD practice is quite difficult in the face of
3091 * clusters/bigalloc.  This is because multiple metadata blocks from
3092 * different block group can end up in the same allocation cluster.
3093 * Calculating the exact overhead in the face of clustered allocation
3094 * requires either O(all block bitmaps) in memory or O(number of block
3095 * groups**2) in time.  We will still calculate the superblock for
3096 * older file systems --- and if we come across with a bigalloc file
3097 * system with zero in s_overhead_clusters the estimate will be close to
3098 * correct especially for very large cluster sizes --- but for newer
3099 * file systems, it's better to calculate this figure once at mkfs
3100 * time, and store it in the superblock.  If the superblock value is
3101 * present (even for non-bigalloc file systems), we will use it.
3102 */
3103static int count_overhead(struct super_block *sb, ext4_group_t grp,
3104                          char *buf)
3105{
3106        struct ext4_sb_info     *sbi = EXT4_SB(sb);
3107        struct ext4_group_desc  *gdp;
3108        ext4_fsblk_t            first_block, last_block, b;
3109        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
3110        int                     s, j, count = 0;
3111
3112        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3113                return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3114                        sbi->s_itb_per_group + 2);
3115
3116        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3117                (grp * EXT4_BLOCKS_PER_GROUP(sb));
3118        last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
3119        for (i = 0; i < ngroups; i++) {
3120                gdp = ext4_get_group_desc(sb, i, NULL);
3121                b = ext4_block_bitmap(sb, gdp);
3122                if (b >= first_block && b <= last_block) {
3123                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3124                        count++;
3125                }
3126                b = ext4_inode_bitmap(sb, gdp);
3127                if (b >= first_block && b <= last_block) {
3128                        ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
3129                        count++;
3130                }
3131                b = ext4_inode_table(sb, gdp);
3132                if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
3133                        for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
3134                                int c = EXT4_B2C(sbi, b - first_block);
3135                                ext4_set_bit(c, buf);
3136                                count++;
3137                        }
3138                if (i != grp)
3139                        continue;
3140                s = 0;
3141                if (ext4_bg_has_super(sb, grp)) {
3142                        ext4_set_bit(s++, buf);
3143                        count++;
3144                }
3145                for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
3146                        ext4_set_bit(EXT4_B2C(sbi, s++), buf);
3147                        count++;
3148                }
3149        }
3150        if (!count)
3151                return 0;
3152        return EXT4_CLUSTERS_PER_GROUP(sb) -
3153                ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
3154}
3155
3156/*
3157 * Compute the overhead and stash it in sbi->s_overhead
3158 */
3159int ext4_calculate_overhead(struct super_block *sb)
3160{
3161        struct ext4_sb_info *sbi = EXT4_SB(sb);
3162        struct ext4_super_block *es = sbi->s_es;
3163        ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3164        ext4_fsblk_t overhead = 0;
3165        char *buf = (char *) get_zeroed_page(GFP_KERNEL);
3166
3167        memset(buf, 0, PAGE_SIZE);
3168        if (!buf)
3169                return -ENOMEM;
3170
3171        /*
3172         * Compute the overhead (FS structures).  This is constant
3173         * for a given filesystem unless the number of block groups
3174         * changes so we cache the previous value until it does.
3175         */
3176
3177        /*
3178         * All of the blocks before first_data_block are overhead
3179         */
3180        overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
3181
3182        /*
3183         * Add the overhead found in each block group
3184         */
3185        for (i = 0; i < ngroups; i++) {
3186                int blks;
3187
3188                blks = count_overhead(sb, i, buf);
3189                overhead += blks;
3190                if (blks)
3191                        memset(buf, 0, PAGE_SIZE);
3192                cond_resched();
3193        }
3194        sbi->s_overhead = overhead;
3195        smp_wmb();
3196        free_page((unsigned long) buf);
3197        return 0;
3198}
3199
3200static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3201{
3202        char *orig_data = kstrdup(data, GFP_KERNEL);
3203        struct buffer_head *bh;
3204        struct ext4_super_block *es = NULL;
3205        struct ext4_sb_info *sbi;
3206        ext4_fsblk_t block;
3207        ext4_fsblk_t sb_block = get_sb_block(&data);
3208        ext4_fsblk_t logical_sb_block;
3209        unsigned long offset = 0;
3210        unsigned long journal_devnum = 0;
3211        unsigned long def_mount_opts;
3212        struct inode *root;
3213        char *cp;
3214        const char *descr;
3215        int ret = -ENOMEM;
3216        int blocksize, clustersize;
3217        unsigned int db_count;
3218        unsigned int i;
3219        int needs_recovery, has_huge_files, has_bigalloc;
3220        __u64 blocks_count;
3221        int err;
3222        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3223        ext4_group_t first_not_zeroed;
3224
3225        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
3226        if (!sbi)
3227                goto out_free_orig;
3228
3229        sbi->s_blockgroup_lock =
3230                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
3231        if (!sbi->s_blockgroup_lock) {
3232                kfree(sbi);
3233                goto out_free_orig;
3234        }
3235        sb->s_fs_info = sbi;
3236        sbi->s_sb = sb;
3237        sbi->s_mount_opt = 0;
3238        sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID);
3239        sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID);
3240        sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
3241        sbi->s_sb_block = sb_block;
3242        if (sb->s_bdev->bd_part)
3243                sbi->s_sectors_written_start =
3244                        part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3245
3246        /* Cleanup superblock name */
3247        for (cp = sb->s_id; (cp = strchr(cp, '/'));)
3248                *cp = '!';
3249
3250        ret = -EINVAL;
3251        blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
3252        if (!blocksize) {
3253                ext4_msg(sb, KERN_ERR, "unable to set blocksize");
3254                goto out_fail;
3255        }
3256
3257        /*
3258         * The ext4 superblock will not be buffer aligned for other than 1kB
3259         * block sizes.  We need to calculate the offset from buffer start.
3260         */
3261        if (blocksize != EXT4_MIN_BLOCK_SIZE) {
3262                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3263                offset = do_div(logical_sb_block, blocksize);
3264        } else {
3265                logical_sb_block = sb_block;
3266        }
3267
3268        if (!(bh = sb_bread(sb, logical_sb_block))) {
3269                ext4_msg(sb, KERN_ERR, "unable to read superblock");
3270                goto out_fail;
3271        }
3272        /*
3273         * Note: s_es must be initialized as soon as possible because
3274         *       some ext4 macro-instructions depend on its value
3275         */
3276        es = (struct ext4_super_block *) (bh->b_data + offset);
3277        sbi->s_es = es;
3278        sb->s_magic = le16_to_cpu(es->s_magic);
3279        if (sb->s_magic != EXT4_SUPER_MAGIC)
3280                goto cantfind_ext4;
3281        sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
3282
3283        /* Warn if metadata_csum and gdt_csum are both set. */
3284        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3285                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
3286            EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
3287                ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
3288                             "redundant flags; please run fsck.");
3289
3290        /* Check for a known checksum algorithm */
3291        if (!ext4_verify_csum_type(sb, es)) {
3292                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3293                         "unknown checksum algorithm.");
3294                silent = 1;
3295                goto cantfind_ext4;
3296        }
3297
3298        /* Load the checksum driver */
3299        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3300                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3301                sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
3302                if (IS_ERR(sbi->s_chksum_driver)) {
3303                        ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
3304                        ret = PTR_ERR(sbi->s_chksum_driver);
3305                        sbi->s_chksum_driver = NULL;
3306                        goto failed_mount;
3307                }
3308        }
3309
3310        /* Check superblock checksum */
3311        if (!ext4_superblock_csum_verify(sb, es)) {
3312                ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
3313                         "invalid superblock checksum.  Run e2fsck?");
3314                silent = 1;
3315                goto cantfind_ext4;
3316        }
3317
3318        /* Precompute checksum seed for all metadata */
3319        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3320                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
3321                sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3322                                               sizeof(es->s_uuid));
3323
3324        /* Set defaults before we parse the mount options */
3325        def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
3326        set_opt(sb, INIT_INODE_TABLE);
3327        if (def_mount_opts & EXT4_DEFM_DEBUG)
3328                set_opt(sb, DEBUG);
3329        if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
3330                set_opt(sb, GRPID);
3331        if (def_mount_opts & EXT4_DEFM_UID16)
3332                set_opt(sb, NO_UID32);
3333        /* xattr user namespace & acls are now defaulted on */
3334#ifdef CONFIG_EXT4_FS_XATTR
3335        set_opt(sb, XATTR_USER);
3336#endif
3337#ifdef CONFIG_EXT4_FS_POSIX_ACL
3338        set_opt(sb, POSIX_ACL);
3339#endif
3340        set_opt(sb, MBLK_IO_SUBMIT);
3341        if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3342                set_opt(sb, JOURNAL_DATA);
3343        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
3344                set_opt(sb, ORDERED_DATA);
3345        else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
3346                set_opt(sb, WRITEBACK_DATA);
3347
3348        if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
3349                set_opt(sb, ERRORS_PANIC);
3350        else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
3351                set_opt(sb, ERRORS_CONT);
3352        else
3353                set_opt(sb, ERRORS_RO);
3354        if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
3355                set_opt(sb, BLOCK_VALIDITY);
3356        if (def_mount_opts & EXT4_DEFM_DISCARD)
3357                set_opt(sb, DISCARD);
3358
3359        sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
3360        sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
3361        sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
3362        sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
3363        sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
3364
3365        if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
3366                set_opt(sb, BARRIER);
3367
3368        /*
3369         * enable delayed allocation by default
3370         * Use -o nodelalloc to turn it off
3371         */
3372        if (!IS_EXT3_SB(sb) &&
3373            ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3374                set_opt(sb, DELALLOC);
3375
3376        /*
3377         * set default s_li_wait_mult for lazyinit, for the case there is
3378         * no mount option specified.
3379         */
3380        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
3381
3382        if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
3383                           &journal_devnum, &journal_ioprio, 0)) {
3384                ext4_msg(sb, KERN_WARNING,
3385                         "failed to parse options in superblock: %s",
3386                         sbi->s_es->s_mount_opts);
3387        }
3388        sbi->s_def_mount_opt = sbi->s_mount_opt;
3389        if (!parse_options((char *) data, sb, &journal_devnum,
3390                           &journal_ioprio, 0))
3391                goto failed_mount;
3392
3393        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3394                printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3395                            "with data=journal disables delayed "
3396                            "allocation and O_DIRECT support!\n");
3397                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3398                        ext4_msg(sb, KERN_ERR, "can't mount with "
3399                                 "both data=journal and delalloc");
3400                        goto failed_mount;
3401                }
3402                if (test_opt(sb, DIOREAD_NOLOCK)) {
3403                        ext4_msg(sb, KERN_ERR, "can't mount with "
3404                                 "both data=journal and delalloc");
3405                        goto failed_mount;
3406                }
3407                if (test_opt(sb, DELALLOC))
3408                        clear_opt(sb, DELALLOC);
3409        }
3410
3411        blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3412        if (test_opt(sb, DIOREAD_NOLOCK)) {
3413                if (blocksize < PAGE_SIZE) {
3414                        ext4_msg(sb, KERN_ERR, "can't mount with "
3415                                 "dioread_nolock if block size != PAGE_SIZE");
3416                        goto failed_mount;
3417                }
3418        }
3419
3420        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3421                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3422
3423        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
3424            (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
3425             EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
3426             EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
3427                ext4_msg(sb, KERN_WARNING,
3428                       "feature flags set on rev 0 fs, "
3429                       "running e2fsck is recommended");
3430
3431        if (IS_EXT2_SB(sb)) {
3432                if (ext2_feature_set_ok(sb))
3433                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
3434                                 "using the ext4 subsystem");
3435                else {
3436                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
3437                                 "to feature incompatibilities");
3438                        goto failed_mount;
3439                }
3440        }
3441
3442        if (IS_EXT3_SB(sb)) {
3443                if (ext3_feature_set_ok(sb))
3444                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
3445                                 "using the ext4 subsystem");
3446                else {
3447                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
3448                                 "to feature incompatibilities");
3449                        goto failed_mount;
3450                }
3451        }
3452
3453        /*
3454         * Check feature flags regardless of the revision level, since we
3455         * previously didn't change the revision level when setting the flags,
3456         * so there is a chance incompat flags are set on a rev 0 filesystem.
3457         */
3458        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3459                goto failed_mount;
3460
3461        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3462            blocksize > EXT4_MAX_BLOCK_SIZE) {
3463                ext4_msg(sb, KERN_ERR,
3464                       "Unsupported filesystem blocksize %d", blocksize);
3465                goto failed_mount;
3466        }
3467
3468        if (sb->s_blocksize != blocksize) {
3469                /* Validate the filesystem blocksize */
3470                if (!sb_set_blocksize(sb, blocksize)) {
3471                        ext4_msg(sb, KERN_ERR, "bad block size %d",
3472                                        blocksize);
3473                        goto failed_mount;
3474                }
3475
3476                brelse(bh);
3477                logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3478                offset = do_div(logical_sb_block, blocksize);
3479                bh = sb_bread(sb, logical_sb_block);
3480                if (!bh) {
3481                        ext4_msg(sb, KERN_ERR,
3482                               "Can't read superblock on 2nd try");
3483                        goto failed_mount;
3484                }
3485                es = (struct ext4_super_block *)(bh->b_data + offset);
3486                sbi->s_es = es;
3487                if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
3488                        ext4_msg(sb, KERN_ERR,
3489                               "Magic mismatch, very weird!");
3490                        goto failed_mount;
3491                }
3492        }
3493
3494        has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3495                                EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
3496        sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
3497                                                      has_huge_files);
3498        sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
3499
3500        if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
3501                sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
3502                sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
3503        } else {
3504                sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
3505                sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
3506                if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
3507                    (!is_power_of_2(sbi->s_inode_size)) ||
3508                    (sbi->s_inode_size > blocksize)) {
3509                        ext4_msg(sb, KERN_ERR,
3510                               "unsupported inode size: %d",
3511                               sbi->s_inode_size);
3512                        goto failed_mount;
3513                }
3514                if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
3515                        sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
3516        }
3517
3518        sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
3519        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
3520                if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
3521                    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
3522                    !is_power_of_2(sbi->s_desc_size)) {
3523                        ext4_msg(sb, KERN_ERR,
3524                               "unsupported descriptor size %lu",
3525                               sbi->s_desc_size);
3526                        goto failed_mount;
3527                }
3528        } else
3529                sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
3530
3531        sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
3532        sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
3533        if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
3534                goto cantfind_ext4;
3535
3536        sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
3537        if (sbi->s_inodes_per_block == 0)
3538                goto cantfind_ext4;
3539        sbi->s_itb_per_group = sbi->s_inodes_per_group /
3540                                        sbi->s_inodes_per_block;
3541        sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
3542        sbi->s_sbh = bh;
3543        sbi->s_mount_state = le16_to_cpu(es->s_state);
3544        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3545        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3546
3547        for (i = 0; i < 4; i++)
3548                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3549        sbi->s_def_hash_version = es->s_def_hash_version;
3550        i = le32_to_cpu(es->s_flags);
3551        if (i & EXT2_FLAGS_UNSIGNED_HASH)
3552                sbi->s_hash_unsigned = 3;
3553        else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
3554#ifdef __CHAR_UNSIGNED__
3555                es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
3556                sbi->s_hash_unsigned = 3;
3557#else
3558                es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
3559#endif
3560        }
3561
3562        /* Handle clustersize */
3563        clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3564        has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3565                                EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3566        if (has_bigalloc) {
3567                if (clustersize < blocksize) {
3568                        ext4_msg(sb, KERN_ERR,
3569                                 "cluster size (%d) smaller than "
3570                                 "block size (%d)", clustersize, blocksize);
3571                        goto failed_mount;
3572                }
3573                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3574                        le32_to_cpu(es->s_log_block_size);
3575                sbi->s_clusters_per_group =
3576                        le32_to_cpu(es->s_clusters_per_group);
3577                if (sbi->s_clusters_per_group > blocksize * 8) {
3578                        ext4_msg(sb, KERN_ERR,
3579                                 "#clusters per group too big: %lu",
3580                                 sbi->s_clusters_per_group);
3581                        goto failed_mount;
3582                }
3583                if (sbi->s_blocks_per_group !=
3584                    (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3585                        ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3586                                 "clusters per group (%lu) inconsistent",
3587                                 sbi->s_blocks_per_group,
3588                                 sbi->s_clusters_per_group);
3589                        goto failed_mount;
3590                }
3591        } else {
3592                if (clustersize != blocksize) {
3593                        ext4_warning(sb, "fragment/cluster size (%d) != "
3594                                     "block size (%d)", clustersize,
3595                                     blocksize);
3596                        clustersize = blocksize;
3597                }
3598                if (sbi->s_blocks_per_group > blocksize * 8) {
3599                        ext4_msg(sb, KERN_ERR,
3600                                 "#blocks per group too big: %lu",
3601                                 sbi->s_blocks_per_group);
3602                        goto failed_mount;
3603                }
3604                sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3605                sbi->s_cluster_bits = 0;
3606        }
3607        sbi->s_cluster_ratio = clustersize / blocksize;
3608
3609        if (sbi->s_inodes_per_group > blocksize * 8) {
3610                ext4_msg(sb, KERN_ERR,
3611                       "#inodes per group too big: %lu",
3612                       sbi->s_inodes_per_group);
3613                goto failed_mount;
3614        }
3615
3616        /*
3617         * Test whether we have more sectors than will fit in sector_t,
3618         * and whether the max offset is addressable by the page cache.
3619         */
3620        err = generic_check_addressable(sb->s_blocksize_bits,
3621                                        ext4_blocks_count(es));
3622        if (err) {
3623                ext4_msg(sb, KERN_ERR, "filesystem"
3624                         " too large to mount safely on this system");
3625                if (sizeof(sector_t) < 8)
3626                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3627                ret = err;
3628                goto failed_mount;
3629        }
3630
3631        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
3632                goto cantfind_ext4;
3633
3634        /* check blocks count against device size */
3635        blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
3636        if (blocks_count && ext4_blocks_count(es) > blocks_count) {
3637                ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
3638                       "exceeds size of device (%llu blocks)",
3639                       ext4_blocks_count(es), blocks_count);
3640                goto failed_mount;
3641        }
3642
3643        /*
3644         * It makes no sense for the first data block to be beyond the end
3645         * of the filesystem.
3646         */
3647        if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3648                ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3649                         "block %u is beyond end of filesystem (%llu)",
3650                         le32_to_cpu(es->s_first_data_block),
3651                         ext4_blocks_count(es));
3652                goto failed_mount;
3653        }
3654        blocks_count = (ext4_blocks_count(es) -
3655                        le32_to_cpu(es->s_first_data_block) +
3656                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
3657        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
3658        if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
3659                ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
3660                       "(block count %llu, first data block %u, "
3661                       "blocks per group %lu)", sbi->s_groups_count,
3662                       ext4_blocks_count(es),
3663                       le32_to_cpu(es->s_first_data_block),
3664                       EXT4_BLOCKS_PER_GROUP(sb));
3665                goto failed_mount;
3666        }
3667        sbi->s_groups_count = blocks_count;
3668        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
3669                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3670        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3671                   EXT4_DESC_PER_BLOCK(sb);
3672        sbi->s_group_desc = ext4_kvmalloc(db_count *
3673                                          sizeof(struct buffer_head *),
3674                                          GFP_KERNEL);
3675        if (sbi->s_group_desc == NULL) {
3676                ext4_msg(sb, KERN_ERR, "not enough memory");
3677                ret = -ENOMEM;
3678                goto failed_mount;
3679        }
3680
3681        if (ext4_proc_root)
3682                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3683
3684        if (sbi->s_proc)
3685                proc_create_data("options", S_IRUGO, sbi->s_proc,
3686                                 &ext4_seq_options_fops, sb);
3687
3688        bgl_lock_init(sbi->s_blockgroup_lock);
3689
3690        for (i = 0; i < db_count; i++) {
3691                block = descriptor_loc(sb, logical_sb_block, i);
3692                sbi->s_group_desc[i] = sb_bread(sb, block);
3693                if (!sbi->s_group_desc[i]) {
3694                        ext4_msg(sb, KERN_ERR,
3695                               "can't read group descriptor %d", i);
3696                        db_count = i;
3697                        goto failed_mount2;
3698                }
3699        }
3700        if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3701                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3702                goto failed_mount2;
3703        }
3704        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3705                if (!ext4_fill_flex_info(sb)) {
3706                        ext4_msg(sb, KERN_ERR,
3707                               "unable to initialize "
3708                               "flex_bg meta info!");
3709                        goto failed_mount2;
3710                }
3711
3712        sbi->s_gdb_count = db_count;
3713        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3714        spin_lock_init(&sbi->s_next_gen_lock);
3715
3716        init_timer(&sbi->s_err_report);
3717        sbi->s_err_report.function = print_daily_error_info;
3718        sbi->s_err_report.data = (unsigned long) sb;
3719
3720        err = percpu_counter_init(&sbi->s_freeclusters_counter,
3721                        ext4_count_free_clusters(sb));
3722        if (!err) {
3723                err = percpu_counter_init(&sbi->s_freeinodes_counter,
3724                                ext4_count_free_inodes(sb));
3725        }
3726        if (!err) {
3727                err = percpu_counter_init(&sbi->s_dirs_counter,
3728                                ext4_count_dirs(sb));
3729        }
3730        if (!err) {
3731                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3732        }
3733        if (err) {
3734                ext4_msg(sb, KERN_ERR, "insufficient memory");
3735                ret = err;
3736                goto failed_mount3;
3737        }
3738
3739        sbi->s_stripe = ext4_get_stripe_size(sbi);
3740        sbi->s_max_writeback_mb_bump = 128;
3741
3742        /*
3743         * set up enough so that it can read an inode
3744         */
3745        if (!test_opt(sb, NOLOAD) &&
3746            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3747                sb->s_op = &ext4_sops;
3748        else
3749                sb->s_op = &ext4_nojournal_sops;
3750        sb->s_export_op = &ext4_export_ops;
3751        sb->s_xattr = ext4_xattr_handlers;
3752#ifdef CONFIG_QUOTA
3753        sb->s_qcop = &ext4_qctl_operations;
3754        sb->dq_op = &ext4_quota_operations;
3755
3756        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
3757                /* Use qctl operations for hidden quota files. */
3758                sb->s_qcop = &ext4_qctl_sysfile_operations;
3759        }
3760#endif
3761        memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3762
3763        INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3764        mutex_init(&sbi->s_orphan_lock);
3765        sbi->s_resize_flags = 0;
3766
3767        sb->s_root = NULL;
3768
3769        needs_recovery = (es->s_last_orphan != 0 ||
3770                          EXT4_HAS_INCOMPAT_FEATURE(sb,
3771                                    EXT4_FEATURE_INCOMPAT_RECOVER));
3772
3773        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
3774            !(sb->s_flags & MS_RDONLY))
3775                if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3776                        goto failed_mount3;
3777
3778        /*
3779         * The first inode we look at is the journal inode.  Don't try
3780         * root first: it may be modified in the journal!
3781         */
3782        if (!test_opt(sb, NOLOAD) &&
3783            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3784                if (ext4_load_journal(sb, es, journal_devnum))
3785                        goto failed_mount3;
3786        } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3787              EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3788                ext4_msg(sb, KERN_ERR, "required journal recovery "
3789                       "suppressed and not mounted read-only");
3790                goto failed_mount_wq;
3791        } else {
3792                clear_opt(sb, DATA_FLAGS);
3793                sbi->s_journal = NULL;
3794                needs_recovery = 0;
3795                goto no_journal;
3796        }
3797
3798        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
3799            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
3800                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
3801                ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
3802                goto failed_mount_wq;
3803        }
3804
3805        if (!set_journal_csum_feature_set(sb)) {
3806                ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
3807                         "feature set");
3808                goto failed_mount_wq;
3809        }
3810
3811        /* We have now updated the journal if required, so we can
3812         * validate the data journaling mode. */
3813        switch (test_opt(sb, DATA_FLAGS)) {
3814        case 0:
3815                /* No mode set, assume a default based on the journal
3816                 * capabilities: ORDERED_DATA if the journal can
3817                 * cope, else JOURNAL_DATA
3818                 */
3819                if (jbd2_journal_check_available_features
3820                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
3821                        set_opt(sb, ORDERED_DATA);
3822                else
3823                        set_opt(sb, JOURNAL_DATA);
3824                break;
3825
3826        case EXT4_MOUNT_ORDERED_DATA:
3827        case EXT4_MOUNT_WRITEBACK_DATA:
3828                if (!jbd2_journal_check_available_features
3829                    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
3830                        ext4_msg(sb, KERN_ERR, "Journal does not support "
3831                               "requested data journaling mode");
3832                        goto failed_mount_wq;
3833                }
3834        default:
3835                break;
3836        }
3837        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3838
3839        sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
3840
3841        /*
3842         * The journal may have updated the bg summary counts, so we
3843         * need to update the global counters.
3844         */
3845        percpu_counter_set(&sbi->s_freeclusters_counter,
3846                           ext4_count_free_clusters(sb));
3847        percpu_counter_set(&sbi->s_freeinodes_counter,
3848                           ext4_count_free_inodes(sb));
3849        percpu_counter_set(&sbi->s_dirs_counter,
3850                           ext4_count_dirs(sb));
3851        percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
3852
3853no_journal:
3854        /*
3855         * Get the # of file system overhead blocks from the
3856         * superblock if present.
3857         */
3858        if (es->s_overhead_clusters)
3859                sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
3860        else {
3861                ret = ext4_calculate_overhead(sb);
3862                if (ret)
3863                        goto failed_mount_wq;
3864        }
3865
3866        /*
3867         * The maximum number of concurrent works can be high and
3868         * concurrency isn't really necessary.  Limit it to 1.
3869         */
3870        EXT4_SB(sb)->dio_unwritten_wq =
3871                alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
3872        if (!EXT4_SB(sb)->dio_unwritten_wq) {
3873                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3874                goto failed_mount_wq;
3875        }
3876
3877        /*
3878         * The jbd2_journal_load will have done any necessary log recovery,
3879         * so we can safely mount the rest of the filesystem now.
3880         */
3881
3882        root = ext4_iget(sb, EXT4_ROOT_INO);
3883        if (IS_ERR(root)) {
3884                ext4_msg(sb, KERN_ERR, "get root inode failed");
3885                ret = PTR_ERR(root);
3886                root = NULL;
3887                goto failed_mount4;
3888        }
3889        if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
3890                ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
3891                iput(root);
3892                goto failed_mount4;
3893        }
3894        sb->s_root = d_make_root(root);
3895        if (!sb->s_root) {
3896                ext4_msg(sb, KERN_ERR, "get root dentry failed");
3897                ret = -ENOMEM;
3898                goto failed_mount4;
3899        }
3900
3901        if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
3902                sb->s_flags |= MS_RDONLY;
3903
3904        /* determine the minimum size of new large inodes, if present */
3905        if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
3906                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
3907                                                     EXT4_GOOD_OLD_INODE_SIZE;
3908                if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3909                                       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
3910                        if (sbi->s_want_extra_isize <
3911                            le16_to_cpu(es->s_want_extra_isize))
3912                                sbi->s_want_extra_isize =
3913                                        le16_to_cpu(es->s_want_extra_isize);
3914                        if (sbi->s_want_extra_isize <
3915                            le16_to_cpu(es->s_min_extra_isize))
3916                                sbi->s_want_extra_isize =
3917                                        le16_to_cpu(es->s_min_extra_isize);
3918                }
3919        }
3920        /* Check if enough inode space is available */
3921        if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
3922                                                        sbi->s_inode_size) {
3923                sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
3924                                                       EXT4_GOOD_OLD_INODE_SIZE;
3925                ext4_msg(sb, KERN_INFO, "required extra inode space not"
3926                         "available");
3927        }
3928
3929        err = ext4_setup_system_zone(sb);
3930        if (err) {
3931                ext4_msg(sb, KERN_ERR, "failed to initialize system "
3932                         "zone (%d)", err);
3933                goto failed_mount4a;
3934        }
3935
3936        ext4_ext_init(sb);
3937        err = ext4_mb_init(sb);
3938        if (err) {
3939                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3940                         err);
3941                goto failed_mount5;
3942        }
3943
3944        err = ext4_register_li_request(sb, first_not_zeroed);
3945        if (err)
3946                goto failed_mount6;
3947
3948        sbi->s_kobj.kset = ext4_kset;
3949        init_completion(&sbi->s_kobj_unregister);
3950        err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
3951                                   "%s", sb->s_id);
3952        if (err)
3953                goto failed_mount7;
3954
3955        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
3956        ext4_orphan_cleanup(sb, es);
3957        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
3958        if (needs_recovery) {
3959                ext4_msg(sb, KERN_INFO, "recovery complete");
3960                ext4_mark_recovery_complete(sb, es);
3961        }
3962        if (EXT4_SB(sb)->s_journal) {
3963                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
3964                        descr = " journalled data mode";
3965                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
3966                        descr = " ordered data mode";
3967                else
3968                        descr = " writeback data mode";
3969        } else
3970                descr = "out journal";
3971
3972#ifdef CONFIG_QUOTA
3973        /* Enable quota usage during mount. */
3974        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
3975            !(sb->s_flags & MS_RDONLY)) {
3976                ret = ext4_enable_quotas(sb);
3977                if (ret)
3978                        goto failed_mount7;
3979        }
3980#endif  /* CONFIG_QUOTA */
3981
3982        ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
3983                 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3984                 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3985
3986        if (es->s_error_count)
3987                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3988
3989        kfree(orig_data);
3990        return 0;
3991
3992cantfind_ext4:
3993        if (!silent)
3994                ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
3995        goto failed_mount;
3996
3997failed_mount7:
3998        ext4_unregister_li_request(sb);
3999failed_mount6:
4000        ext4_mb_release(sb);
4001failed_mount5:
4002        ext4_ext_release(sb);
4003        ext4_release_system_zone(sb);
4004failed_mount4a:
4005        dput(sb->s_root);
4006        sb->s_root = NULL;
4007failed_mount4:
4008        ext4_msg(sb, KERN_ERR, "mount failed");
4009        destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
4010failed_mount_wq:
4011        if (sbi->s_journal) {
4012                jbd2_journal_destroy(sbi->s_journal);
4013                sbi->s_journal = NULL;
4014        }
4015failed_mount3:
4016        del_timer(&sbi->s_err_report);
4017        if (sbi->s_flex_groups)
4018                ext4_kvfree(sbi->s_flex_groups);
4019        percpu_counter_destroy(&sbi->s_freeclusters_counter);
4020        percpu_counter_destroy(&sbi->s_freeinodes_counter);
4021        percpu_counter_destroy(&sbi->s_dirs_counter);
4022        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4023        if (sbi->s_mmp_tsk)
4024                kthread_stop(sbi->s_mmp_tsk);
4025failed_mount2:
4026        for (i = 0; i < db_count; i++)
4027                brelse(sbi->s_group_desc[i]);
4028        ext4_kvfree(sbi->s_group_desc);
4029failed_mount:
4030        if (sbi->s_chksum_driver)
4031                crypto_free_shash(sbi->s_chksum_driver);
4032        if (sbi->s_proc) {
4033                remove_proc_entry("options", sbi->s_proc);
4034                remove_proc_entry(sb->s_id, ext4_proc_root);
4035        }
4036#ifdef CONFIG_QUOTA
4037        for (i = 0; i < MAXQUOTAS; i++)
4038                kfree(sbi->s_qf_names[i]);
4039#endif
4040        ext4_blkdev_remove(sbi);
4041        brelse(bh);
4042out_fail:
4043        sb->s_fs_info = NULL;
4044        kfree(sbi->s_blockgroup_lock);
4045        kfree(sbi);
4046out_free_orig:
4047        kfree(orig_data);
4048        return ret;
4049}
4050
4051/*
4052 * Setup any per-fs journal parameters now.  We'll do this both on
4053 * initial mount, once the journal has been initialised but before we've
4054 * done any recovery; and again on any subsequent remount.
4055 */
4056static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
4057{
4058        struct ext4_sb_info *sbi = EXT4_SB(sb);
4059
4060        journal->j_commit_interval = sbi->s_commit_interval;
4061        journal->j_min_batch_time = sbi->s_min_batch_time;
4062        journal->j_max_batch_time = sbi->s_max_batch_time;
4063
4064        write_lock(&journal->j_state_lock);
4065        if (test_opt(sb, BARRIER))
4066                journal->j_flags |= JBD2_BARRIER;
4067        else
4068                journal->j_flags &= ~JBD2_BARRIER;
4069        if (test_opt(sb, DATA_ERR_ABORT))
4070                journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
4071        else
4072                journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
4073        write_unlock(&journal->j_state_lock);
4074}
4075
4076static journal_t *ext4_get_journal(struct super_block *sb,
4077                                   unsigned int journal_inum)
4078{
4079        struct inode *journal_inode;
4080        journal_t *journal;
4081
4082        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4083
4084        /* First, test for the existence of a valid inode on disk.  Bad
4085         * things happen if we iget() an unused inode, as the subsequent
4086         * iput() will try to delete it. */
4087
4088        journal_inode = ext4_iget(sb, journal_inum);
4089        if (IS_ERR(journal_inode)) {
4090                ext4_msg(sb, KERN_ERR, "no journal found");
4091                return NULL;
4092        }
4093        if (!journal_inode->i_nlink) {
4094                make_bad_inode(journal_inode);
4095                iput(journal_inode);
4096                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
4097                return NULL;
4098        }
4099
4100        jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
4101                  journal_inode, journal_inode->i_size);
4102        if (!S_ISREG(journal_inode->i_mode)) {
4103                ext4_msg(sb, KERN_ERR, "invalid journal inode");
4104                iput(journal_inode);
4105                return NULL;
4106        }
4107
4108        journal = jbd2_journal_init_inode(journal_inode);
4109        if (!journal) {
4110                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
4111                iput(journal_inode);
4112                return NULL;
4113        }
4114        journal->j_private = sb;
4115        ext4_init_journal_params(sb, journal);
4116        return journal;
4117}
4118
4119static journal_t *ext4_get_dev_journal(struct super_block *sb,
4120                                       dev_t j_dev)
4121{
4122        struct buffer_head *bh;
4123        journal_t *journal;
4124        ext4_fsblk_t start;
4125        ext4_fsblk_t len;
4126        int hblock, blocksize;
4127        ext4_fsblk_t sb_block;
4128        unsigned long offset;
4129        struct ext4_super_block *es;
4130        struct block_device *bdev;
4131
4132        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4133
4134        bdev = ext4_blkdev_get(j_dev, sb);
4135        if (bdev == NULL)
4136                return NULL;
4137
4138        blocksize = sb->s_blocksize;
4139        hblock = bdev_logical_block_size(bdev);
4140        if (blocksize < hblock) {
4141                ext4_msg(sb, KERN_ERR,
4142                        "blocksize too small for journal device");
4143                goto out_bdev;
4144        }
4145
4146        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
4147        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
4148        set_blocksize(bdev, blocksize);
4149        if (!(bh = __bread(bdev, sb_block, blocksize))) {
4150                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
4151                       "external journal");
4152                goto out_bdev;
4153        }
4154
4155        es = (struct ext4_super_block *) (bh->b_data + offset);
4156        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
4157            !(le32_to_cpu(es->s_feature_incompat) &
4158              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4159                ext4_msg(sb, KERN_ERR, "external journal has "
4160                                        "bad superblock");
4161                brelse(bh);
4162                goto out_bdev;
4163        }
4164
4165        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4166                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4167                brelse(bh);
4168                goto out_bdev;
4169        }
4170
4171        len = ext4_blocks_count(es);
4172        start = sb_block + 1;
4173        brelse(bh);     /* we're done with the superblock */
4174
4175        journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
4176                                        start, len, blocksize);
4177        if (!journal) {
4178                ext4_msg(sb, KERN_ERR, "failed to create device journal");
4179                goto out_bdev;
4180        }
4181        journal->j_private = sb;
4182        ll_rw_block(READ, 1, &journal->j_sb_buffer);
4183        wait_on_buffer(journal->j_sb_buffer);
4184        if (!buffer_uptodate(journal->j_sb_buffer)) {
4185                ext4_msg(sb, KERN_ERR, "I/O error on journal device");
4186                goto out_journal;
4187        }
4188        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4189                ext4_msg(sb, KERN_ERR, "External journal has more than one "
4190                                        "user (unsupported) - %d",
4191                        be32_to_cpu(journal->j_superblock->s_nr_users));
4192                goto out_journal;
4193        }
4194        EXT4_SB(sb)->journal_bdev = bdev;
4195        ext4_init_journal_params(sb, journal);
4196        return journal;
4197
4198out_journal:
4199        jbd2_journal_destroy(journal);
4200out_bdev:
4201        ext4_blkdev_put(bdev);
4202        return NULL;
4203}
4204
4205static int ext4_load_journal(struct super_block *sb,
4206                             struct ext4_super_block *es,
4207                             unsigned long journal_devnum)
4208{
4209        journal_t *journal;
4210        unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
4211        dev_t journal_dev;
4212        int err = 0;
4213        int really_read_only;
4214
4215        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4216
4217        if (journal_devnum &&
4218            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4219                ext4_msg(sb, KERN_INFO, "external journal device major/minor "
4220                        "numbers have changed");
4221                journal_dev = new_decode_dev(journal_devnum);
4222        } else
4223                journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
4224
4225        really_read_only = bdev_read_only(sb->s_bdev);
4226
4227        /*
4228         * Are we loading a blank journal or performing recovery after a
4229         * crash?  For recovery, we need to check in advance whether we
4230         * can get read-write access to the device.
4231         */
4232        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
4233                if (sb->s_flags & MS_RDONLY) {
4234                        ext4_msg(sb, KERN_INFO, "INFO: recovery "
4235                                        "required on readonly filesystem");
4236                        if (really_read_only) {
4237                                ext4_msg(sb, KERN_ERR, "write access "
4238                                        "unavailable, cannot proceed");
4239                                return -EROFS;
4240                        }
4241                        ext4_msg(sb, KERN_INFO, "write access will "
4242                               "be enabled during recovery");
4243                }
4244        }
4245
4246        if (journal_inum && journal_dev) {
4247                ext4_msg(sb, KERN_ERR, "filesystem has both journal "
4248                       "and inode journals!");
4249                return -EINVAL;
4250        }
4251
4252        if (journal_inum) {
4253                if (!(journal = ext4_get_journal(sb, journal_inum)))
4254                        return -EINVAL;
4255        } else {
4256                if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
4257                        return -EINVAL;
4258        }
4259
4260        if (!(journal->j_flags & JBD2_BARRIER))
4261                ext4_msg(sb, KERN_INFO, "barriers disabled");
4262
4263        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
4264                err = jbd2_journal_wipe(journal, !really_read_only);
4265        if (!err) {
4266                char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
4267                if (save)
4268                        memcpy(save, ((char *) es) +
4269                               EXT4_S_ERR_START, EXT4_S_ERR_LEN);
4270                err = jbd2_journal_load(journal);
4271                if (save)
4272                        memcpy(((char *) es) + EXT4_S_ERR_START,
4273                               save, EXT4_S_ERR_LEN);
4274                kfree(save);
4275        }
4276
4277        if (err) {
4278                ext4_msg(sb, KERN_ERR, "error loading journal");
4279                jbd2_journal_destroy(journal);
4280                return err;
4281        }
4282
4283        EXT4_SB(sb)->s_journal = journal;
4284        ext4_clear_journal_err(sb, es);
4285
4286        if (!really_read_only && journal_devnum &&
4287            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4288                es->s_journal_dev = cpu_to_le32(journal_devnum);
4289
4290                /* Make sure we flush the recovery flag to disk. */
4291                ext4_commit_super(sb, 1);
4292        }
4293
4294        return 0;
4295}
4296
4297static int ext4_commit_super(struct super_block *sb, int sync)
4298{
4299        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
4300        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4301        int error = 0;
4302
4303        if (!sbh || block_device_ejected(sb))
4304                return error;
4305        if (buffer_write_io_error(sbh)) {
4306                /*
4307                 * Oh, dear.  A previous attempt to write the
4308                 * superblock failed.  This could happen because the
4309                 * USB device was yanked out.  Or it could happen to
4310                 * be a transient write error and maybe the block will
4311                 * be remapped.  Nothing we can do but to retry the
4312                 * write and hope for the best.
4313                 */
4314                ext4_msg(sb, KERN_ERR, "previous I/O error to "
4315                       "superblock detected");
4316                clear_buffer_write_io_error(sbh);
4317                set_buffer_uptodate(sbh);
4318        }
4319        /*
4320         * If the file system is mounted read-only, don't update the
4321         * superblock write time.  This avoids updating the superblock
4322         * write time when we are mounting the root file system
4323         * read/only but we need to replay the journal; at that point,
4324         * for people who are east of GMT and who make their clock
4325         * tick in localtime for Windows bug-for-bug compatibility,
4326         * the clock is set in the future, and this will cause e2fsck
4327         * to complain and force a full file system check.
4328         */
4329        if (!(sb->s_flags & MS_RDONLY))
4330                es->s_wtime = cpu_to_le32(get_seconds());
4331        if (sb->s_bdev->bd_part)
4332                es->s_kbytes_written =
4333                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
4334                            ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
4335                              EXT4_SB(sb)->s_sectors_written_start) >> 1));
4336        else
4337                es->s_kbytes_written =
4338                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4339        ext4_free_blocks_count_set(es,
4340                        EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4341                                &EXT4_SB(sb)->s_freeclusters_counter)));
4342        es->s_free_inodes_count =
4343                cpu_to_le32(percpu_counter_sum_positive(
4344                                &EXT4_SB(sb)->s_freeinodes_counter));
4345        BUFFER_TRACE(sbh, "marking dirty");
4346        ext4_superblock_csum_set(sb, es);
4347        mark_buffer_dirty(sbh);
4348        if (sync) {
4349                error = sync_dirty_buffer(sbh);
4350                if (error)
4351                        return error;
4352
4353                error = buffer_write_io_error(sbh);
4354                if (error) {
4355                        ext4_msg(sb, KERN_ERR, "I/O error while writing "
4356                               "superblock");
4357                        clear_buffer_write_io_error(sbh);
4358                        set_buffer_uptodate(sbh);
4359                }
4360        }
4361        return error;
4362}
4363
4364/*
4365 * Have we just finished recovery?  If so, and if we are mounting (or
4366 * remounting) the filesystem readonly, then we will end up with a
4367 * consistent fs on disk.  Record that fact.
4368 */
4369static void ext4_mark_recovery_complete(struct super_block *sb,
4370                                        struct ext4_super_block *es)
4371{
4372        journal_t *journal = EXT4_SB(sb)->s_journal;
4373
4374        if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
4375                BUG_ON(journal != NULL);
4376                return;
4377        }
4378        jbd2_journal_lock_updates(journal);
4379        if (jbd2_journal_flush(journal) < 0)
4380                goto out;
4381
4382        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
4383            sb->s_flags & MS_RDONLY) {
4384                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4385                ext4_commit_super(sb, 1);
4386        }
4387
4388out:
4389        jbd2_journal_unlock_updates(journal);
4390}
4391
4392/*
4393 * If we are mounting (or read-write remounting) a filesystem whose journal
4394 * has recorded an error from a previous lifetime, move that error to the
4395 * main filesystem now.
4396 */
4397static void ext4_clear_journal_err(struct super_block *sb,
4398                                   struct ext4_super_block *es)
4399{
4400        journal_t *journal;
4401        int j_errno;
4402        const char *errstr;
4403
4404        BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
4405
4406        journal = EXT4_SB(sb)->s_journal;
4407
4408        /*
4409         * Now check for any error status which may have been recorded in the
4410         * journal by a prior ext4_error() or ext4_abort()
4411         */
4412
4413        j_errno = jbd2_journal_errno(journal);
4414        if (j_errno) {
4415                char nbuf[16];
4416
4417                errstr = ext4_decode_error(sb, j_errno, nbuf);
4418                ext4_warning(sb, "Filesystem error recorded "
4419                             "from previous mount: %s", errstr);
4420                ext4_warning(sb, "Marking fs in need of filesystem check.");
4421
4422                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
4423                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4424                ext4_commit_super(sb, 1);
4425
4426                jbd2_journal_clear_err(journal);
4427                jbd2_journal_update_sb_errno(journal);
4428        }
4429}
4430
4431/*
4432 * Force the running and committing transactions to commit,
4433 * and wait on the commit.
4434 */
4435int ext4_force_commit(struct super_block *sb)
4436{
4437        journal_t *journal;
4438        int ret = 0;
4439
4440        if (sb->s_flags & MS_RDONLY)
4441                return 0;
4442
4443        journal = EXT4_SB(sb)->s_journal;
4444        if (journal)
4445                ret = ext4_journal_force_commit(journal);
4446
4447        return ret;
4448}
4449
4450static int ext4_sync_fs(struct super_block *sb, int wait)
4451{
4452        int ret = 0;
4453        tid_t target;
4454        struct ext4_sb_info *sbi = EXT4_SB(sb);
4455
4456        trace_ext4_sync_fs(sb, wait);
4457        flush_workqueue(sbi->dio_unwritten_wq);
4458        /*
4459         * Writeback quota in non-journalled quota case - journalled quota has
4460         * no dirty dquots
4461         */
4462        dquot_writeback_dquots(sb, -1);
4463        if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4464                if (wait)
4465                        jbd2_log_wait_commit(sbi->s_journal, target);
4466        }
4467        return ret;
4468}
4469
4470/*
4471 * LVM calls this function before a (read-only) snapshot is created.  This
4472 * gives us a chance to flush the journal completely and mark the fs clean.
4473 *
4474 * Note that only this function cannot bring a filesystem to be in a clean
4475 * state independently. It relies on upper layer to stop all data & metadata
4476 * modifications.
4477 */
4478static int ext4_freeze(struct super_block *sb)
4479{
4480        int error = 0;
4481        journal_t *journal;
4482
4483        if (sb->s_flags & MS_RDONLY)
4484                return 0;
4485
4486        journal = EXT4_SB(sb)->s_journal;
4487
4488        /* Now we set up the journal barrier. */
4489        jbd2_journal_lock_updates(journal);
4490
4491        /*
4492         * Don't clear the needs_recovery flag if we failed to flush
4493         * the journal.
4494         */
4495        error = jbd2_journal_flush(journal);
4496        if (error < 0)
4497                goto out;
4498
4499        /* Journal blocked and flushed, clear needs_recovery flag. */
4500        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4501        error = ext4_commit_super(sb, 1);
4502out:
4503        /* we rely on upper layer to stop further updates */
4504        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4505        return error;
4506}
4507
4508/*
4509 * Called by LVM after the snapshot is done.  We need to reset the RECOVER
4510 * flag here, even though the filesystem is not technically dirty yet.
4511 */
4512static int ext4_unfreeze(struct super_block *sb)
4513{
4514        if (sb->s_flags & MS_RDONLY)
4515                return 0;
4516
4517        lock_super(sb);
4518        /* Reset the needs_recovery flag before the fs is unlocked. */
4519        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4520        ext4_commit_super(sb, 1);
4521        unlock_super(sb);
4522        return 0;
4523}
4524
4525/*
4526 * Structure to save mount options for ext4_remount's benefit
4527 */
4528struct ext4_mount_options {
4529        unsigned long s_mount_opt;
4530        unsigned long s_mount_opt2;
4531        kuid_t s_resuid;
4532        kgid_t s_resgid;
4533        unsigned long s_commit_interval;
4534        u32 s_min_batch_time, s_max_batch_time;
4535#ifdef CONFIG_QUOTA
4536        int s_jquota_fmt;
4537        char *s_qf_names[MAXQUOTAS];
4538#endif
4539};
4540
4541static int ext4_remount(struct super_block *sb, int *flags, char *data)
4542{
4543        struct ext4_super_block *es;
4544        struct ext4_sb_info *sbi = EXT4_SB(sb);
4545        unsigned long old_sb_flags;
4546        struct ext4_mount_options old_opts;
4547        int enable_quota = 0;
4548        ext4_group_t g;
4549        unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4550        int err = 0;
4551#ifdef CONFIG_QUOTA
4552        int i;
4553#endif
4554        char *orig_data = kstrdup(data, GFP_KERNEL);
4555
4556        /* Store the original options */
4557        lock_super(sb);
4558        old_sb_flags = sb->s_flags;
4559        old_opts.s_mount_opt = sbi->s_mount_opt;
4560        old_opts.s_mount_opt2 = sbi->s_mount_opt2;
4561        old_opts.s_resuid = sbi->s_resuid;
4562        old_opts.s_resgid = sbi->s_resgid;
4563        old_opts.s_commit_interval = sbi->s_commit_interval;
4564        old_opts.s_min_batch_time = sbi->s_min_batch_time;
4565        old_opts.s_max_batch_time = sbi->s_max_batch_time;
4566#ifdef CONFIG_QUOTA
4567        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4568        for (i = 0; i < MAXQUOTAS; i++)
4569                old_opts.s_qf_names[i] = sbi->s_qf_names[i];
4570#endif
4571        if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4572                journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
4573
4574        /*
4575         * Allow the "check" option to be passed as a remount option.
4576         */
4577        if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
4578                err = -EINVAL;
4579                goto restore_opts;
4580        }
4581
4582        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4583                ext4_abort(sb, "Abort forced by user");
4584
4585        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
4586                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
4587
4588        es = sbi->s_es;
4589
4590        if (sbi->s_journal) {
4591                ext4_init_journal_params(sb, sbi->s_journal);
4592                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
4593        }
4594
4595        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
4596                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
4597                        err = -EROFS;
4598                        goto restore_opts;
4599                }
4600
4601                if (*flags & MS_RDONLY) {
4602                        err = dquot_suspend(sb, -1);
4603                        if (err < 0)
4604                                goto restore_opts;
4605
4606                        /*
4607                         * First of all, the unconditional stuff we have to do
4608                         * to disable replay of the journal when we next remount
4609                         */
4610                        sb->s_flags |= MS_RDONLY;
4611
4612                        /*
4613                         * OK, test if we are remounting a valid rw partition
4614                         * readonly, and if so set the rdonly flag and then
4615                         * mark the partition as valid again.
4616                         */
4617                        if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
4618                            (sbi->s_mount_state & EXT4_VALID_FS))
4619                                es->s_state = cpu_to_le16(sbi->s_mount_state);
4620
4621                        if (sbi->s_journal)
4622                                ext4_mark_recovery_complete(sb, es);
4623                } else {
4624                        /* Make sure we can mount this feature set readwrite */
4625                        if (!ext4_feature_set_ok(sb, 0)) {
4626                                err = -EROFS;
4627                                goto restore_opts;
4628                        }
4629                        /*
4630                         * Make sure the group descriptor checksums
4631                         * are sane.  If they aren't, refuse to remount r/w.
4632                         */
4633                        for (g = 0; g < sbi->s_groups_count; g++) {
4634                                struct ext4_group_desc *gdp =
4635                                        ext4_get_group_desc(sb, g, NULL);
4636
4637                                if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
4638                                        ext4_msg(sb, KERN_ERR,
4639               "ext4_remount: Checksum for group %u failed (%u!=%u)",
4640                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
4641                                               le16_to_cpu(gdp->bg_checksum));
4642                                        err = -EINVAL;
4643                                        goto restore_opts;
4644                                }
4645                        }
4646
4647                        /*
4648                         * If we have an unprocessed orphan list hanging
4649                         * around from a previously readonly bdev mount,
4650                         * require a full umount/remount for now.
4651                         */
4652                        if (es->s_last_orphan) {
4653                                ext4_msg(sb, KERN_WARNING, "Couldn't "
4654                                       "remount RDWR because of unprocessed "
4655                                       "orphan inode list.  Please "
4656                                       "umount/remount instead");
4657                                err = -EINVAL;
4658                                goto restore_opts;
4659                        }
4660
4661                        /*
4662                         * Mounting a RDONLY partition read-write, so reread
4663                         * and store the current valid flag.  (It may have
4664                         * been changed by e2fsck since we originally mounted
4665                         * the partition.)
4666                         */
4667                        if (sbi->s_journal)
4668                                ext4_clear_journal_err(sb, es);
4669                        sbi->s_mount_state = le16_to_cpu(es->s_state);
4670                        if (!ext4_setup_super(sb, es, 0))
4671                                sb->s_flags &= ~MS_RDONLY;
4672                        if (EXT4_HAS_INCOMPAT_FEATURE(sb,
4673                                                     EXT4_FEATURE_INCOMPAT_MMP))
4674                                if (ext4_multi_mount_protect(sb,
4675                                                le64_to_cpu(es->s_mmp_block))) {
4676                                        err = -EROFS;
4677                                        goto restore_opts;
4678                                }
4679                        enable_quota = 1;
4680                }
4681        }
4682
4683        /*
4684         * Reinitialize lazy itable initialization thread based on
4685         * current settings
4686         */
4687        if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
4688                ext4_unregister_li_request(sb);
4689        else {
4690                ext4_group_t first_not_zeroed;
4691                first_not_zeroed = ext4_has_uninit_itable(sb);
4692                ext4_register_li_request(sb, first_not_zeroed);
4693        }
4694
4695        ext4_setup_system_zone(sb);
4696        if (sbi->s_journal == NULL)
4697                ext4_commit_super(sb, 1);
4698
4699        unlock_super(sb);
4700#ifdef CONFIG_QUOTA
4701        /* Release old quota file names */
4702        for (i = 0; i < MAXQUOTAS; i++)
4703                if (old_opts.s_qf_names[i] &&
4704                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
4705                        kfree(old_opts.s_qf_names[i]);
4706        if (enable_quota) {
4707                if (sb_any_quota_suspended(sb))
4708                        dquot_resume(sb, -1);
4709                else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4710                                        EXT4_FEATURE_RO_COMPAT_QUOTA)) {
4711                        err = ext4_enable_quotas(sb);
4712                        if (err) {
4713                                lock_super(sb);
4714                                goto restore_opts;
4715                        }
4716                }
4717        }
4718#endif
4719
4720        ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
4721        kfree(orig_data);
4722        return 0;
4723
4724restore_opts:
4725        sb->s_flags = old_sb_flags;
4726        sbi->s_mount_opt = old_opts.s_mount_opt;
4727        sbi->s_mount_opt2 = old_opts.s_mount_opt2;
4728        sbi->s_resuid = old_opts.s_resuid;
4729        sbi->s_resgid = old_opts.s_resgid;
4730        sbi->s_commit_interval = old_opts.s_commit_interval;
4731        sbi->s_min_batch_time = old_opts.s_min_batch_time;
4732        sbi->s_max_batch_time = old_opts.s_max_batch_time;
4733#ifdef CONFIG_QUOTA
4734        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4735        for (i = 0; i < MAXQUOTAS; i++) {
4736                if (sbi->s_qf_names[i] &&
4737                    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
4738                        kfree(sbi->s_qf_names[i]);
4739                sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4740        }
4741#endif
4742        unlock_super(sb);
4743        kfree(orig_data);
4744        return err;
4745}
4746
4747static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4748{
4749        struct super_block *sb = dentry->d_sb;
4750        struct ext4_sb_info *sbi = EXT4_SB(sb);
4751        struct ext4_super_block *es = sbi->s_es;
4752        ext4_fsblk_t overhead = 0;
4753        u64 fsid;
4754        s64 bfree;
4755
4756        if (!test_opt(sb, MINIX_DF))
4757                overhead = sbi->s_overhead;
4758
4759        buf->f_type = EXT4_SUPER_MAGIC;
4760        buf->f_bsize = sb->s_blocksize;
4761        buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead);
4762        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
4763                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
4764        /* prevent underflow in case that few free space is available */
4765        buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
4766        buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
4767        if (buf->f_bfree < ext4_r_blocks_count(es))
4768                buf->f_bavail = 0;
4769        buf->f_files = le32_to_cpu(es->s_inodes_count);
4770        buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
4771        buf->f_namelen = EXT4_NAME_LEN;
4772        fsid = le64_to_cpup((void *)es->s_uuid) ^
4773               le64_to_cpup((void *)es->s_uuid + sizeof(u64));
4774        buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
4775        buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
4776
4777        return 0;
4778}
4779
4780/* Helper function for writing quotas on sync - we need to start transaction
4781 * before quota file is locked for write. Otherwise the are possible deadlocks:
4782 * Process 1                         Process 2
4783 * ext4_create()                     quota_sync()
4784 *   jbd2_journal_start()                  write_dquot()
4785 *   dquot_initialize()                         down(dqio_mutex)
4786 *     down(dqio_mutex)                    jbd2_journal_start()
4787 *
4788 */
4789
4790#ifdef CONFIG_QUOTA
4791
4792static inline struct inode *dquot_to_inode(struct dquot *dquot)
4793{
4794        return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
4795}
4796
4797static int ext4_write_dquot(struct dquot *dquot)
4798{
4799        int ret, err;
4800        handle_t *handle;
4801        struct inode *inode;
4802
4803        inode = dquot_to_inode(dquot);
4804        handle = ext4_journal_start(inode,
4805                                    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
4806        if (IS_ERR(handle))
4807                return PTR_ERR(handle);
4808        ret = dquot_commit(dquot);
4809        err = ext4_journal_stop(handle);
4810        if (!ret)
4811                ret = err;
4812        return ret;
4813}
4814
4815static int ext4_acquire_dquot(struct dquot *dquot)
4816{
4817        int ret, err;
4818        handle_t *handle;
4819
4820        handle = ext4_journal_start(dquot_to_inode(dquot),
4821                                    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
4822        if (IS_ERR(handle))
4823                return PTR_ERR(handle);
4824        ret = dquot_acquire(dquot);
4825        err = ext4_journal_stop(handle);
4826        if (!ret)
4827                ret = err;
4828        return ret;
4829}
4830
4831static int ext4_release_dquot(struct dquot *dquot)
4832{
4833        int ret, err;
4834        handle_t *handle;
4835
4836        handle = ext4_journal_start(dquot_to_inode(dquot),
4837                                    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
4838        if (IS_ERR(handle)) {
4839                /* Release dquot anyway to avoid endless cycle in dqput() */
4840                dquot_release(dquot);
4841                return PTR_ERR(handle);
4842        }
4843        ret = dquot_release(dquot);
4844        err = ext4_journal_stop(handle);
4845        if (!ret)
4846                ret = err;
4847        return ret;
4848}
4849
4850static int ext4_mark_dquot_dirty(struct dquot *dquot)
4851{
4852        /* Are we journaling quotas? */
4853        if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
4854            EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
4855                dquot_mark_dquot_dirty(dquot);
4856                return ext4_write_dquot(dquot);
4857        } else {
4858                return dquot_mark_dquot_dirty(dquot);
4859        }
4860}
4861
4862static int ext4_write_info(struct super_block *sb, int type)
4863{
4864        int ret, err;
4865        handle_t *handle;
4866
4867        /* Data block + inode block */
4868        handle = ext4_journal_start(sb->s_root->d_inode, 2);
4869        if (IS_ERR(handle))
4870                return PTR_ERR(handle);
4871        ret = dquot_commit_info(sb, type);
4872        err = ext4_journal_stop(handle);
4873        if (!ret)
4874                ret = err;
4875        return ret;
4876}
4877
4878/*
4879 * Turn on quotas during mount time - we need to find
4880 * the quota file and such...
4881 */
4882static int ext4_quota_on_mount(struct super_block *sb, int type)
4883{
4884        return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
4885                                        EXT4_SB(sb)->s_jquota_fmt, type);
4886}
4887
4888/*
4889 * Standard function to be called on quota_on
4890 */
4891static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4892                         struct path *path)
4893{
4894        int err;
4895
4896        if (!test_opt(sb, QUOTA))
4897                return -EINVAL;
4898
4899        /* Quotafile not on the same filesystem? */
4900        if (path->dentry->d_sb != sb)
4901                return -EXDEV;
4902        /* Journaling quota? */
4903        if (EXT4_SB(sb)->s_qf_names[type]) {
4904                /* Quotafile not in fs root? */
4905                if (path->dentry->d_parent != sb->s_root)
4906                        ext4_msg(sb, KERN_WARNING,
4907                                "Quota file not on filesystem root. "
4908                                "Journaled quota will not work");
4909        }
4910
4911        /*
4912         * When we journal data on quota file, we have to flush journal to see
4913         * all updates to the file when we bypass pagecache...
4914         */
4915        if (EXT4_SB(sb)->s_journal &&
4916            ext4_should_journal_data(path->dentry->d_inode)) {
4917                /*
4918                 * We don't need to lock updates but journal_flush() could
4919                 * otherwise be livelocked...
4920                 */
4921                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
4922                err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
4923                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4924                if (err)
4925                        return err;
4926        }
4927
4928        return dquot_quota_on(sb, type, format_id, path);
4929}
4930
4931static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
4932                             unsigned int flags)
4933{
4934        int err;
4935        struct inode *qf_inode;
4936        unsigned long qf_inums[MAXQUOTAS] = {
4937                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
4938                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
4939        };
4940
4941        BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
4942
4943        if (!qf_inums[type])
4944                return -EPERM;
4945
4946        qf_inode = ext4_iget(sb, qf_inums[type]);
4947        if (IS_ERR(qf_inode)) {
4948                ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
4949                return PTR_ERR(qf_inode);
4950        }
4951
4952        err = dquot_enable(qf_inode, type, format_id, flags);
4953        iput(qf_inode);
4954
4955        return err;
4956}
4957
4958/* Enable usage tracking for all quota types. */
4959static int ext4_enable_quotas(struct super_block *sb)
4960{
4961        int type, err = 0;
4962        unsigned long qf_inums[MAXQUOTAS] = {
4963                le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
4964                le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
4965        };
4966
4967        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
4968        for (type = 0; type < MAXQUOTAS; type++) {
4969                if (qf_inums[type]) {
4970                        err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
4971                                                DQUOT_USAGE_ENABLED);
4972                        if (err) {
4973                                ext4_warning(sb,
4974                                        "Failed to enable quota (type=%d) "
4975                                        "tracking. Please run e2fsck to fix.",
4976                                        type);
4977                                return err;
4978                        }
4979                }
4980        }
4981        return 0;
4982}
4983
4984/*
4985 * quota_on function that is used when QUOTA feature is set.
4986 */
4987static int ext4_quota_on_sysfile(struct super_block *sb, int type,
4988                                 int format_id)
4989{
4990        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
4991                return -EINVAL;
4992
4993        /*
4994         * USAGE was enabled at mount time. Only need to enable LIMITS now.
4995         */
4996        return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
4997}
4998
4999static int ext4_quota_off(struct super_block *sb, int type)
5000{
5001        struct inode *inode = sb_dqopt(sb)->files[type];
5002        handle_t *handle;
5003
5004        /* Force all delayed allocation blocks to be allocated.
5005         * Caller already holds s_umount sem */
5006        if (test_opt(sb, DELALLOC))
5007                sync_filesystem(sb);
5008
5009        if (!inode)
5010                goto out;
5011
5012        /* Update modification times of quota files when userspace can
5013         * start looking at them */
5014        handle = ext4_journal_start(inode, 1);
5015        if (IS_ERR(handle))
5016                goto out;
5017        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
5018        ext4_mark_inode_dirty(handle, inode);
5019        ext4_journal_stop(handle);
5020
5021out:
5022        return dquot_quota_off(sb, type);
5023}
5024
5025/*
5026 * quota_off function that is used when QUOTA feature is set.
5027 */
5028static int ext4_quota_off_sysfile(struct super_block *sb, int type)
5029{
5030        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
5031                return -EINVAL;
5032
5033        /* Disable only the limits. */
5034        return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
5035}
5036
5037/* Read data from quotafile - avoid pagecache and such because we cannot afford
5038 * acquiring the locks... As quota files are never truncated and quota code
5039 * itself serializes the operations (and no one else should touch the files)
5040 * we don't have to be afraid of races */
5041static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5042                               size_t len, loff_t off)
5043{
5044        struct inode *inode = sb_dqopt(sb)->files[type];
5045        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5046        int err = 0;
5047        int offset = off & (sb->s_blocksize - 1);
5048        int tocopy;
5049        size_t toread;
5050        struct buffer_head *bh;
5051        loff_t i_size = i_size_read(inode);
5052
5053        if (off > i_size)
5054                return 0;
5055        if (off+len > i_size)
5056                len = i_size-off;
5057        toread = len;
5058        while (toread > 0) {
5059                tocopy = sb->s_blocksize - offset < toread ?
5060                                sb->s_blocksize - offset : toread;
5061                bh = ext4_bread(NULL, inode, blk, 0, &err);
5062                if (err)
5063                        return err;
5064                if (!bh)        /* A hole? */
5065                        memset(data, 0, tocopy);
5066                else
5067                        memcpy(data, bh->b_data+offset, tocopy);
5068                brelse(bh);
5069                offset = 0;
5070                toread -= tocopy;
5071                data += tocopy;
5072                blk++;
5073        }
5074        return len;
5075}
5076
5077/* Write to quotafile (we know the transaction is already started and has
5078 * enough credits) */
5079static ssize_t ext4_quota_write(struct super_block *sb, int type,
5080                                const char *data, size_t len, loff_t off)
5081{
5082        struct inode *inode = sb_dqopt(sb)->files[type];
5083        ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5084        int err = 0;
5085        int offset = off & (sb->s_blocksize - 1);
5086        struct buffer_head *bh;
5087        handle_t *handle = journal_current_handle();
5088
5089        if (EXT4_SB(sb)->s_journal && !handle) {
5090                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5091                        " cancelled because transaction is not started",
5092                        (unsigned long long)off, (unsigned long long)len);
5093                return -EIO;
5094        }
5095        /*
5096         * Since we account only one data block in transaction credits,
5097         * then it is impossible to cross a block boundary.
5098         */
5099        if (sb->s_blocksize - offset < len) {
5100                ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
5101                        " cancelled because not block aligned",
5102                        (unsigned long long)off, (unsigned long long)len);
5103                return -EIO;
5104        }
5105
5106        bh = ext4_bread(handle, inode, blk, 1, &err);
5107        if (!bh)
5108                goto out;
5109        err = ext4_journal_get_write_access(handle, bh);
5110        if (err) {
5111                brelse(bh);
5112                goto out;
5113        }
5114        lock_buffer(bh);
5115        memcpy(bh->b_data+offset, data, len);
5116        flush_dcache_page(bh->b_page);
5117        unlock_buffer(bh);
5118        err = ext4_handle_dirty_metadata(handle, NULL, bh);
5119        brelse(bh);
5120out:
5121        if (err)
5122                return err;
5123        if (inode->i_size < off + len) {
5124                i_size_write(inode, off + len);
5125                EXT4_I(inode)->i_disksize = inode->i_size;
5126                ext4_mark_inode_dirty(handle, inode);
5127        }
5128        return len;
5129}
5130
5131#endif
5132
5133static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
5134                       const char *dev_name, void *data)
5135{
5136        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
5137}
5138
5139#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
5140static inline void register_as_ext2(void)
5141{
5142        int err = register_filesystem(&ext2_fs_type);
5143        if (err)
5144                printk(KERN_WARNING
5145                       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
5146}
5147
5148static inline void unregister_as_ext2(void)
5149{
5150        unregister_filesystem(&ext2_fs_type);
5151}
5152
5153static inline int ext2_feature_set_ok(struct super_block *sb)
5154{
5155        if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
5156                return 0;
5157        if (sb->s_flags & MS_RDONLY)
5158                return 1;
5159        if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
5160                return 0;
5161        return 1;
5162}
5163MODULE_ALIAS("ext2");
5164#else
5165static inline void