linux/fs/ceph/super.c
<<
>>
Prefs
   1
   2#include <linux/ceph/ceph_debug.h>
   3
   4#include <linux/backing-dev.h>
   5#include <linux/ctype.h>
   6#include <linux/fs.h>
   7#include <linux/inet.h>
   8#include <linux/in6.h>
   9#include <linux/module.h>
  10#include <linux/mount.h>
  11#include <linux/parser.h>
  12#include <linux/sched.h>
  13#include <linux/seq_file.h>
  14#include <linux/slab.h>
  15#include <linux/statfs.h>
  16#include <linux/string.h>
  17
  18#include "super.h"
  19#include "mds_client.h"
  20#include "cache.h"
  21
  22#include <linux/ceph/ceph_features.h>
  23#include <linux/ceph/decode.h>
  24#include <linux/ceph/mon_client.h>
  25#include <linux/ceph/auth.h>
  26#include <linux/ceph/debugfs.h>
  27
  28/*
  29 * Ceph superblock operations
  30 *
  31 * Handle the basics of mounting, unmounting.
  32 */
  33
  34/*
  35 * super ops
  36 */
  37static void ceph_put_super(struct super_block *s)
  38{
  39        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
  40
  41        dout("put_super\n");
  42        ceph_mdsc_close_sessions(fsc->mdsc);
  43
  44        /*
  45         * ensure we release the bdi before put_anon_super releases
  46         * the device name.
  47         */
  48        if (s->s_bdi == &fsc->backing_dev_info) {
  49                bdi_unregister(&fsc->backing_dev_info);
  50                s->s_bdi = NULL;
  51        }
  52
  53        return;
  54}
  55
  56static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
  57{
  58        struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
  59        struct ceph_monmap *monmap = fsc->client->monc.monmap;
  60        struct ceph_statfs st;
  61        u64 fsid;
  62        int err;
  63
  64        dout("statfs\n");
  65        err = ceph_monc_do_statfs(&fsc->client->monc, &st);
  66        if (err < 0)
  67                return err;
  68
  69        /* fill in kstatfs */
  70        buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
  71
  72        /*
  73         * express utilization in terms of large blocks to avoid
  74         * overflow on 32-bit machines.
  75         *
  76         * NOTE: for the time being, we make bsize == frsize to humor
  77         * not-yet-ancient versions of glibc that are broken.
  78         * Someday, we will probably want to report a real block
  79         * size...  whatever that may mean for a network file system!
  80         */
  81        buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
  82        buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
  83        buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
  84        buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  85        buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  86
  87        buf->f_files = le64_to_cpu(st.num_objects);
  88        buf->f_ffree = -1;
  89        buf->f_namelen = NAME_MAX;
  90
  91        /* leave fsid little-endian, regardless of host endianness */
  92        fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
  93        buf->f_fsid.val[0] = fsid & 0xffffffff;
  94        buf->f_fsid.val[1] = fsid >> 32;
  95
  96        return 0;
  97}
  98
  99
 100static int ceph_sync_fs(struct super_block *sb, int wait)
 101{
 102        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 103
 104        if (!wait) {
 105                dout("sync_fs (non-blocking)\n");
 106                ceph_flush_dirty_caps(fsc->mdsc);
 107                dout("sync_fs (non-blocking) done\n");
 108                return 0;
 109        }
 110
 111        dout("sync_fs (blocking)\n");
 112        ceph_osdc_sync(&fsc->client->osdc);
 113        ceph_mdsc_sync(fsc->mdsc);
 114        dout("sync_fs (blocking) done\n");
 115        return 0;
 116}
 117
 118/*
 119 * mount options
 120 */
 121enum {
 122        Opt_wsize,
 123        Opt_rsize,
 124        Opt_rasize,
 125        Opt_caps_wanted_delay_min,
 126        Opt_caps_wanted_delay_max,
 127        Opt_cap_release_safety,
 128        Opt_readdir_max_entries,
 129        Opt_readdir_max_bytes,
 130        Opt_congestion_kb,
 131        Opt_last_int,
 132        /* int args above */
 133        Opt_snapdirname,
 134        Opt_last_string,
 135        /* string args above */
 136        Opt_dirstat,
 137        Opt_nodirstat,
 138        Opt_rbytes,
 139        Opt_norbytes,
 140        Opt_asyncreaddir,
 141        Opt_noasyncreaddir,
 142        Opt_dcache,
 143        Opt_nodcache,
 144        Opt_ino32,
 145        Opt_noino32,
 146        Opt_fscache,
 147        Opt_nofscache,
 148#ifdef CONFIG_CEPH_FS_POSIX_ACL
 149        Opt_acl,
 150#endif
 151        Opt_noacl
 152};
 153
 154static match_table_t fsopt_tokens = {
 155        {Opt_wsize, "wsize=%d"},
 156        {Opt_rsize, "rsize=%d"},
 157        {Opt_rasize, "rasize=%d"},
 158        {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
 159        {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
 160        {Opt_cap_release_safety, "cap_release_safety=%d"},
 161        {Opt_readdir_max_entries, "readdir_max_entries=%d"},
 162        {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
 163        {Opt_congestion_kb, "write_congestion_kb=%d"},
 164        /* int args above */
 165        {Opt_snapdirname, "snapdirname=%s"},
 166        /* string args above */
 167        {Opt_dirstat, "dirstat"},
 168        {Opt_nodirstat, "nodirstat"},
 169        {Opt_rbytes, "rbytes"},
 170        {Opt_norbytes, "norbytes"},
 171        {Opt_asyncreaddir, "asyncreaddir"},
 172        {Opt_noasyncreaddir, "noasyncreaddir"},
 173        {Opt_dcache, "dcache"},
 174        {Opt_nodcache, "nodcache"},
 175        {Opt_ino32, "ino32"},
 176        {Opt_noino32, "noino32"},
 177        {Opt_fscache, "fsc"},
 178        {Opt_nofscache, "nofsc"},
 179#ifdef CONFIG_CEPH_FS_POSIX_ACL
 180        {Opt_acl, "acl"},
 181#endif
 182        {Opt_noacl, "noacl"},
 183        {-1, NULL}
 184};
 185
 186static int parse_fsopt_token(char *c, void *private)
 187{
 188        struct ceph_mount_options *fsopt = private;
 189        substring_t argstr[MAX_OPT_ARGS];
 190        int token, intval, ret;
 191
 192        token = match_token((char *)c, fsopt_tokens, argstr);
 193        if (token < 0)
 194                return -EINVAL;
 195
 196        if (token < Opt_last_int) {
 197                ret = match_int(&argstr[0], &intval);
 198                if (ret < 0) {
 199                        pr_err("bad mount option arg (not int) "
 200                               "at '%s'\n", c);
 201                        return ret;
 202                }
 203                dout("got int token %d val %d\n", token, intval);
 204        } else if (token > Opt_last_int && token < Opt_last_string) {
 205                dout("got string token %d val %s\n", token,
 206                     argstr[0].from);
 207        } else {
 208                dout("got token %d\n", token);
 209        }
 210
 211        switch (token) {
 212        case Opt_snapdirname:
 213                kfree(fsopt->snapdir_name);
 214                fsopt->snapdir_name = kstrndup(argstr[0].from,
 215                                               argstr[0].to-argstr[0].from,
 216                                               GFP_KERNEL);
 217                if (!fsopt->snapdir_name)
 218                        return -ENOMEM;
 219                break;
 220
 221                /* misc */
 222        case Opt_wsize:
 223                fsopt->wsize = intval;
 224                break;
 225        case Opt_rsize:
 226                fsopt->rsize = intval;
 227                break;
 228        case Opt_rasize:
 229                fsopt->rasize = intval;
 230                break;
 231        case Opt_caps_wanted_delay_min:
 232                fsopt->caps_wanted_delay_min = intval;
 233                break;
 234        case Opt_caps_wanted_delay_max:
 235                fsopt->caps_wanted_delay_max = intval;
 236                break;
 237        case Opt_readdir_max_entries:
 238                fsopt->max_readdir = intval;
 239                break;
 240        case Opt_readdir_max_bytes:
 241                fsopt->max_readdir_bytes = intval;
 242                break;
 243        case Opt_congestion_kb:
 244                fsopt->congestion_kb = intval;
 245                break;
 246        case Opt_dirstat:
 247                fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
 248                break;
 249        case Opt_nodirstat:
 250                fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
 251                break;
 252        case Opt_rbytes:
 253                fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
 254                break;
 255        case Opt_norbytes:
 256                fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
 257                break;
 258        case Opt_asyncreaddir:
 259                fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
 260                break;
 261        case Opt_noasyncreaddir:
 262                fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
 263                break;
 264        case Opt_dcache:
 265                fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
 266                break;
 267        case Opt_nodcache:
 268                fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
 269                break;
 270        case Opt_ino32:
 271                fsopt->flags |= CEPH_MOUNT_OPT_INO32;
 272                break;
 273        case Opt_noino32:
 274                fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
 275                break;
 276        case Opt_fscache:
 277                fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
 278                break;
 279        case Opt_nofscache:
 280                fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
 281                break;
 282#ifdef CONFIG_CEPH_FS_POSIX_ACL
 283        case Opt_acl:
 284                fsopt->sb_flags |= MS_POSIXACL;
 285                break;
 286#endif
 287        case Opt_noacl:
 288                fsopt->sb_flags &= ~MS_POSIXACL;
 289                break;
 290        default:
 291                BUG_ON(token);
 292        }
 293        return 0;
 294}
 295
 296static void destroy_mount_options(struct ceph_mount_options *args)
 297{
 298        dout("destroy_mount_options %p\n", args);
 299        kfree(args->snapdir_name);
 300        kfree(args);
 301}
 302
 303static int strcmp_null(const char *s1, const char *s2)
 304{
 305        if (!s1 && !s2)
 306                return 0;
 307        if (s1 && !s2)
 308                return -1;
 309        if (!s1 && s2)
 310                return 1;
 311        return strcmp(s1, s2);
 312}
 313
 314static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 315                                 struct ceph_options *new_opt,
 316                                 struct ceph_fs_client *fsc)
 317{
 318        struct ceph_mount_options *fsopt1 = new_fsopt;
 319        struct ceph_mount_options *fsopt2 = fsc->mount_options;
 320        int ofs = offsetof(struct ceph_mount_options, snapdir_name);
 321        int ret;
 322
 323        ret = memcmp(fsopt1, fsopt2, ofs);
 324        if (ret)
 325                return ret;
 326
 327        ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
 328        if (ret)
 329                return ret;
 330
 331        return ceph_compare_options(new_opt, fsc->client);
 332}
 333
 334static int parse_mount_options(struct ceph_mount_options **pfsopt,
 335                               struct ceph_options **popt,
 336                               int flags, char *options,
 337                               const char *dev_name,
 338                               const char **path)
 339{
 340        struct ceph_mount_options *fsopt;
 341        const char *dev_name_end;
 342        int err;
 343
 344        if (!dev_name || !*dev_name)
 345                return -EINVAL;
 346
 347        fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
 348        if (!fsopt)
 349                return -ENOMEM;
 350
 351        dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
 352
 353        fsopt->sb_flags = flags;
 354        fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
 355
 356        fsopt->rsize = CEPH_RSIZE_DEFAULT;
 357        fsopt->rasize = CEPH_RASIZE_DEFAULT;
 358        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
 359        fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
 360        fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
 361        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
 362        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
 363        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
 364        fsopt->congestion_kb = default_congestion_kb();
 365
 366        /*
 367         * Distinguish the server list from the path in "dev_name".
 368         * Internally we do not include the leading '/' in the path.
 369         *
 370         * "dev_name" will look like:
 371         *     <server_spec>[,<server_spec>...]:[<path>]
 372         * where
 373         *     <server_spec> is <ip>[:<port>]
 374         *     <path> is optional, but if present must begin with '/'
 375         */
 376        dev_name_end = strchr(dev_name, '/');
 377        if (dev_name_end) {
 378                /* skip over leading '/' for path */
 379                *path = dev_name_end + 1;
 380        } else {
 381                /* path is empty */
 382                dev_name_end = dev_name + strlen(dev_name);
 383                *path = dev_name_end;
 384        }
 385        err = -EINVAL;
 386        dev_name_end--;         /* back up to ':' separator */
 387        if (dev_name_end < dev_name || *dev_name_end != ':') {
 388                pr_err("device name is missing path (no : separator in %s)\n",
 389                                dev_name);
 390                goto out;
 391        }
 392        dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
 393        dout("server path '%s'\n", *path);
 394
 395        *popt = ceph_parse_options(options, dev_name, dev_name_end,
 396                                 parse_fsopt_token, (void *)fsopt);
 397        if (IS_ERR(*popt)) {
 398                err = PTR_ERR(*popt);
 399                goto out;
 400        }
 401
 402        /* success */
 403        *pfsopt = fsopt;
 404        return 0;
 405
 406out:
 407        destroy_mount_options(fsopt);
 408        return err;
 409}
 410
 411/**
 412 * ceph_show_options - Show mount options in /proc/mounts
 413 * @m: seq_file to write to
 414 * @root: root of that (sub)tree
 415 */
 416static int ceph_show_options(struct seq_file *m, struct dentry *root)
 417{
 418        struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
 419        struct ceph_mount_options *fsopt = fsc->mount_options;
 420        struct ceph_options *opt = fsc->client->options;
 421
 422        if (opt->flags & CEPH_OPT_FSID)
 423                seq_printf(m, ",fsid=%pU", &opt->fsid);
 424        if (opt->flags & CEPH_OPT_NOSHARE)
 425                seq_puts(m, ",noshare");
 426        if (opt->flags & CEPH_OPT_NOCRC)
 427                seq_puts(m, ",nocrc");
 428
 429        if (opt->name)
 430                seq_printf(m, ",name=%s", opt->name);
 431        if (opt->key)
 432                seq_puts(m, ",secret=<hidden>");
 433
 434        if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
 435                seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
 436        if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
 437                seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
 438        if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 439                seq_printf(m, ",osdkeepalivetimeout=%d",
 440                           opt->osd_keepalive_timeout);
 441
 442        if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
 443                seq_puts(m, ",dirstat");
 444        if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
 445                seq_puts(m, ",norbytes");
 446        if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
 447                seq_puts(m, ",noasyncreaddir");
 448        if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
 449                seq_puts(m, ",dcache");
 450        else
 451                seq_puts(m, ",nodcache");
 452        if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
 453                seq_puts(m, ",fsc");
 454        else
 455                seq_puts(m, ",nofsc");
 456
 457#ifdef CONFIG_CEPH_FS_POSIX_ACL
 458        if (fsopt->sb_flags & MS_POSIXACL)
 459                seq_puts(m, ",acl");
 460        else
 461                seq_puts(m, ",noacl");
 462#endif
 463
 464        if (fsopt->wsize)
 465                seq_printf(m, ",wsize=%d", fsopt->wsize);
 466        if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
 467                seq_printf(m, ",rsize=%d", fsopt->rsize);
 468        if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
 469                seq_printf(m, ",rasize=%d", fsopt->rasize);
 470        if (fsopt->congestion_kb != default_congestion_kb())
 471                seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
 472        if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
 473                seq_printf(m, ",caps_wanted_delay_min=%d",
 474                         fsopt->caps_wanted_delay_min);
 475        if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
 476                seq_printf(m, ",caps_wanted_delay_max=%d",
 477                           fsopt->caps_wanted_delay_max);
 478        if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
 479                seq_printf(m, ",cap_release_safety=%d",
 480                           fsopt->cap_release_safety);
 481        if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
 482                seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
 483        if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
 484                seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
 485        if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
 486                seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
 487        return 0;
 488}
 489
 490/*
 491 * handle any mon messages the standard library doesn't understand.
 492 * return error if we don't either.
 493 */
 494static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 495{
 496        struct ceph_fs_client *fsc = client->private;
 497        int type = le16_to_cpu(msg->hdr.type);
 498
 499        switch (type) {
 500        case CEPH_MSG_MDS_MAP:
 501                ceph_mdsc_handle_map(fsc->mdsc, msg);
 502                return 0;
 503
 504        default:
 505                return -1;
 506        }
 507}
 508
 509/*
 510 * create a new fs client
 511 */
 512static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 513                                        struct ceph_options *opt)
 514{
 515        struct ceph_fs_client *fsc;
 516        const u64 supported_features =
 517                CEPH_FEATURE_FLOCK |
 518                CEPH_FEATURE_DIRLAYOUTHASH |
 519                CEPH_FEATURE_MDS_INLINE_DATA;
 520        const u64 required_features = 0;
 521        int page_count;
 522        size_t size;
 523        int err = -ENOMEM;
 524
 525        fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
 526        if (!fsc)
 527                return ERR_PTR(-ENOMEM);
 528
 529        fsc->client = ceph_create_client(opt, fsc, supported_features,
 530                                         required_features);
 531        if (IS_ERR(fsc->client)) {
 532                err = PTR_ERR(fsc->client);
 533                goto fail;
 534        }
 535        fsc->client->extra_mon_dispatch = extra_mon_dispatch;
 536        fsc->client->monc.want_mdsmap = 1;
 537
 538        fsc->mount_options = fsopt;
 539
 540        fsc->sb = NULL;
 541        fsc->mount_state = CEPH_MOUNT_MOUNTING;
 542
 543        atomic_long_set(&fsc->writeback_count, 0);
 544
 545        err = bdi_init(&fsc->backing_dev_info);
 546        if (err < 0)
 547                goto fail_client;
 548
 549        err = -ENOMEM;
 550        /*
 551         * The number of concurrent works can be high but they don't need
 552         * to be processed in parallel, limit concurrency.
 553         */
 554        fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
 555        if (fsc->wb_wq == NULL)
 556                goto fail_bdi;
 557        fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
 558        if (fsc->pg_inv_wq == NULL)
 559                goto fail_wb_wq;
 560        fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
 561        if (fsc->trunc_wq == NULL)
 562                goto fail_pg_inv_wq;
 563
 564        /* set up mempools */
 565        err = -ENOMEM;
 566        page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
 567        size = sizeof (struct page *) * (page_count ? page_count : 1);
 568        fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
 569        if (!fsc->wb_pagevec_pool)
 570                goto fail_trunc_wq;
 571
 572        /* setup fscache */
 573        if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
 574            (ceph_fscache_register_fs(fsc) != 0))
 575                goto fail_fscache;
 576
 577        /* caps */
 578        fsc->min_caps = fsopt->max_readdir;
 579
 580        return fsc;
 581
 582fail_fscache:
 583        ceph_fscache_unregister_fs(fsc);
 584fail_trunc_wq:
 585        destroy_workqueue(fsc->trunc_wq);
 586fail_pg_inv_wq:
 587        destroy_workqueue(fsc->pg_inv_wq);
 588fail_wb_wq:
 589        destroy_workqueue(fsc->wb_wq);
 590fail_bdi:
 591        bdi_destroy(&fsc->backing_dev_info);
 592fail_client:
 593        ceph_destroy_client(fsc->client);
 594fail:
 595        kfree(fsc);
 596        return ERR_PTR(err);
 597}
 598
 599static void destroy_fs_client(struct ceph_fs_client *fsc)
 600{
 601        dout("destroy_fs_client %p\n", fsc);
 602
 603        ceph_fscache_unregister_fs(fsc);
 604
 605        destroy_workqueue(fsc->wb_wq);
 606        destroy_workqueue(fsc->pg_inv_wq);
 607        destroy_workqueue(fsc->trunc_wq);
 608
 609        bdi_destroy(&fsc->backing_dev_info);
 610
 611        mempool_destroy(fsc->wb_pagevec_pool);
 612
 613        destroy_mount_options(fsc->mount_options);
 614
 615        ceph_fs_debugfs_cleanup(fsc);
 616
 617        ceph_destroy_client(fsc->client);
 618
 619        kfree(fsc);
 620        dout("destroy_fs_client %p done\n", fsc);
 621}
 622
 623/*
 624 * caches
 625 */
 626struct kmem_cache *ceph_inode_cachep;
 627struct kmem_cache *ceph_cap_cachep;
 628struct kmem_cache *ceph_dentry_cachep;
 629struct kmem_cache *ceph_file_cachep;
 630
 631static void ceph_inode_init_once(void *foo)
 632{
 633        struct ceph_inode_info *ci = foo;
 634        inode_init_once(&ci->vfs_inode);
 635}
 636
 637static int __init init_caches(void)
 638{
 639        int error = -ENOMEM;
 640
 641        ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
 642                                      sizeof(struct ceph_inode_info),
 643                                      __alignof__(struct ceph_inode_info),
 644                                      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
 645                                      ceph_inode_init_once);
 646        if (ceph_inode_cachep == NULL)
 647                return -ENOMEM;
 648
 649        ceph_cap_cachep = KMEM_CACHE(ceph_cap,
 650                                     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 651        if (ceph_cap_cachep == NULL)
 652                goto bad_cap;
 653
 654        ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
 655                                        SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 656        if (ceph_dentry_cachep == NULL)
 657                goto bad_dentry;
 658
 659        ceph_file_cachep = KMEM_CACHE(ceph_file_info,
 660                                      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 661        if (ceph_file_cachep == NULL)
 662                goto bad_file;
 663
 664        if ((error = ceph_fscache_register()))
 665                goto bad_file;
 666
 667        return 0;
 668bad_file:
 669        kmem_cache_destroy(ceph_dentry_cachep);
 670bad_dentry:
 671        kmem_cache_destroy(ceph_cap_cachep);
 672bad_cap:
 673        kmem_cache_destroy(ceph_inode_cachep);
 674        return error;
 675}
 676
 677static void destroy_caches(void)
 678{
 679        /*
 680         * Make sure all delayed rcu free inodes are flushed before we
 681         * destroy cache.
 682         */
 683        rcu_barrier();
 684
 685        kmem_cache_destroy(ceph_inode_cachep);
 686        kmem_cache_destroy(ceph_cap_cachep);
 687        kmem_cache_destroy(ceph_dentry_cachep);
 688        kmem_cache_destroy(ceph_file_cachep);
 689
 690        ceph_fscache_unregister();
 691}
 692
 693
 694/*
 695 * ceph_umount_begin - initiate forced umount.  Tear down down the
 696 * mount, skipping steps that may hang while waiting for server(s).
 697 */
 698static void ceph_umount_begin(struct super_block *sb)
 699{
 700        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 701
 702        dout("ceph_umount_begin - starting forced umount\n");
 703        if (!fsc)
 704                return;
 705        fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
 706        return;
 707}
 708
 709static const struct super_operations ceph_super_ops = {
 710        .alloc_inode    = ceph_alloc_inode,
 711        .destroy_inode  = ceph_destroy_inode,
 712        .write_inode    = ceph_write_inode,
 713        .drop_inode     = ceph_drop_inode,
 714        .sync_fs        = ceph_sync_fs,
 715        .put_super      = ceph_put_super,
 716        .show_options   = ceph_show_options,
 717        .statfs         = ceph_statfs,
 718        .umount_begin   = ceph_umount_begin,
 719};
 720
 721/*
 722 * Bootstrap mount by opening the root directory.  Note the mount
 723 * @started time from caller, and time out if this takes too long.
 724 */
 725static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 726                                       const char *path,
 727                                       unsigned long started)
 728{
 729        struct ceph_mds_client *mdsc = fsc->mdsc;
 730        struct ceph_mds_request *req = NULL;
 731        int err;
 732        struct dentry *root;
 733
 734        /* open dir */
 735        dout("open_root_inode opening '%s'\n", path);
 736        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
 737        if (IS_ERR(req))
 738                return ERR_CAST(req);
 739        req->r_path1 = kstrdup(path, GFP_NOFS);
 740        req->r_ino1.ino = CEPH_INO_ROOT;
 741        req->r_ino1.snap = CEPH_NOSNAP;
 742        req->r_started = started;
 743        req->r_timeout = fsc->client->options->mount_timeout * HZ;
 744        req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
 745        req->r_num_caps = 2;
 746        err = ceph_mdsc_do_request(mdsc, NULL, req);
 747        if (err == 0) {
 748                struct inode *inode = req->r_target_inode;
 749                req->r_target_inode = NULL;
 750                dout("open_root_inode success\n");
 751                if (ceph_ino(inode) == CEPH_INO_ROOT &&
 752                    fsc->sb->s_root == NULL) {
 753                        root = d_make_root(inode);
 754                        if (!root) {
 755                                root = ERR_PTR(-ENOMEM);
 756                                goto out;
 757                        }
 758                } else {
 759                        root = d_obtain_root(inode);
 760                }
 761                ceph_init_dentry(root);
 762                dout("open_root_inode success, root dentry is %p\n", root);
 763        } else {
 764                root = ERR_PTR(err);
 765        }
 766out:
 767        ceph_mdsc_put_request(req);
 768        return root;
 769}
 770
 771
 772
 773
 774/*
 775 * mount: join the ceph cluster, and open root directory.
 776 */
 777static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 778                      const char *path)
 779{
 780        int err;
 781        unsigned long started = jiffies;  /* note the start time */
 782        struct dentry *root;
 783        int first = 0;   /* first vfsmount for this super_block */
 784
 785        dout("mount start\n");
 786        mutex_lock(&fsc->client->mount_mutex);
 787
 788        err = __ceph_open_session(fsc->client, started);
 789        if (err < 0)
 790                goto out;
 791
 792        dout("mount opening root\n");
 793        root = open_root_dentry(fsc, "", started);
 794        if (IS_ERR(root)) {
 795                err = PTR_ERR(root);
 796                goto out;
 797        }
 798        if (fsc->sb->s_root) {
 799                dput(root);
 800        } else {
 801                fsc->sb->s_root = root;
 802                first = 1;
 803
 804                err = ceph_fs_debugfs_init(fsc);
 805                if (err < 0)
 806                        goto fail;
 807        }
 808
 809        if (path[0] == 0) {
 810                dget(root);
 811        } else {
 812                dout("mount opening base mountpoint\n");
 813                root = open_root_dentry(fsc, path, started);
 814                if (IS_ERR(root)) {
 815                        err = PTR_ERR(root);
 816                        goto fail;
 817                }
 818        }
 819
 820        fsc->mount_state = CEPH_MOUNT_MOUNTED;
 821        dout("mount success\n");
 822        mutex_unlock(&fsc->client->mount_mutex);
 823        return root;
 824
 825out:
 826        mutex_unlock(&fsc->client->mount_mutex);
 827        return ERR_PTR(err);
 828
 829fail:
 830        if (first) {
 831                dput(fsc->sb->s_root);
 832                fsc->sb->s_root = NULL;
 833        }
 834        goto out;
 835}
 836
 837static int ceph_set_super(struct super_block *s, void *data)
 838{
 839        struct ceph_fs_client *fsc = data;
 840        int ret;
 841
 842        dout("set_super %p data %p\n", s, data);
 843
 844        s->s_flags = fsc->mount_options->sb_flags;
 845        s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
 846
 847        s->s_xattr = ceph_xattr_handlers;
 848        s->s_fs_info = fsc;
 849        fsc->sb = s;
 850
 851        s->s_op = &ceph_super_ops;
 852        s->s_export_op = &ceph_export_ops;
 853
 854        s->s_time_gran = 1000;  /* 1000 ns == 1 us */
 855
 856        ret = set_anon_super(s, NULL);  /* what is that second arg for? */
 857        if (ret != 0)
 858                goto fail;
 859
 860        return ret;
 861
 862fail:
 863        s->s_fs_info = NULL;
 864        fsc->sb = NULL;
 865        return ret;
 866}
 867
 868/*
 869 * share superblock if same fs AND options
 870 */
 871static int ceph_compare_super(struct super_block *sb, void *data)
 872{
 873        struct ceph_fs_client *new = data;
 874        struct ceph_mount_options *fsopt = new->mount_options;
 875        struct ceph_options *opt = new->client->options;
 876        struct ceph_fs_client *other = ceph_sb_to_client(sb);
 877
 878        dout("ceph_compare_super %p\n", sb);
 879
 880        if (compare_mount_options(fsopt, opt, other)) {
 881                dout("monitor(s)/mount options don't match\n");
 882                return 0;
 883        }
 884        if ((opt->flags & CEPH_OPT_FSID) &&
 885            ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
 886                dout("fsid doesn't match\n");
 887                return 0;
 888        }
 889        if (fsopt->sb_flags != other->mount_options->sb_flags) {
 890                dout("flags differ\n");
 891                return 0;
 892        }
 893        return 1;
 894}
 895
 896/*
 897 * construct our own bdi so we can control readahead, etc.
 898 */
 899static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 900
 901static int ceph_register_bdi(struct super_block *sb,
 902                             struct ceph_fs_client *fsc)
 903{
 904        int err;
 905
 906        /* set ra_pages based on rasize mount option? */
 907        if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
 908                fsc->backing_dev_info.ra_pages =
 909                        (fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
 910                        >> PAGE_SHIFT;
 911        else
 912                fsc->backing_dev_info.ra_pages =
 913                        default_backing_dev_info.ra_pages;
 914
 915        err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 916                           atomic_long_inc_return(&bdi_seq));
 917        if (!err)
 918                sb->s_bdi = &fsc->backing_dev_info;
 919        return err;
 920}
 921
 922static struct dentry *ceph_mount(struct file_system_type *fs_type,
 923                       int flags, const char *dev_name, void *data)
 924{
 925        struct super_block *sb;
 926        struct ceph_fs_client *fsc;
 927        struct dentry *res;
 928        int err;
 929        int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
 930        const char *path = NULL;
 931        struct ceph_mount_options *fsopt = NULL;
 932        struct ceph_options *opt = NULL;
 933
 934        dout("ceph_mount\n");
 935
 936#ifdef CONFIG_CEPH_FS_POSIX_ACL
 937        flags |= MS_POSIXACL;
 938#endif
 939        err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
 940        if (err < 0) {
 941                res = ERR_PTR(err);
 942                goto out_final;
 943        }
 944
 945        /* create client (which we may/may not use) */
 946        fsc = create_fs_client(fsopt, opt);
 947        if (IS_ERR(fsc)) {
 948                res = ERR_CAST(fsc);
 949                destroy_mount_options(fsopt);
 950                ceph_destroy_options(opt);
 951                goto out_final;
 952        }
 953
 954        err = ceph_mdsc_init(fsc);
 955        if (err < 0) {
 956                res = ERR_PTR(err);
 957                goto out;
 958        }
 959
 960        if (ceph_test_opt(fsc->client, NOSHARE))
 961                compare_super = NULL;
 962        sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
 963        if (IS_ERR(sb)) {
 964                res = ERR_CAST(sb);
 965                goto out;
 966        }
 967
 968        if (ceph_sb_to_client(sb) != fsc) {
 969                ceph_mdsc_destroy(fsc);
 970                destroy_fs_client(fsc);
 971                fsc = ceph_sb_to_client(sb);
 972                dout("get_sb got existing client %p\n", fsc);
 973        } else {
 974                dout("get_sb using new client %p\n", fsc);
 975                err = ceph_register_bdi(sb, fsc);
 976                if (err < 0) {
 977                        res = ERR_PTR(err);
 978                        goto out_splat;
 979                }
 980        }
 981
 982        res = ceph_real_mount(fsc, path);
 983        if (IS_ERR(res))
 984                goto out_splat;
 985        dout("root %p inode %p ino %llx.%llx\n", res,
 986             res->d_inode, ceph_vinop(res->d_inode));
 987        return res;
 988
 989out_splat:
 990        ceph_mdsc_close_sessions(fsc->mdsc);
 991        deactivate_locked_super(sb);
 992        goto out_final;
 993
 994out:
 995        ceph_mdsc_destroy(fsc);
 996        destroy_fs_client(fsc);
 997out_final:
 998        dout("ceph_mount fail %ld\n", PTR_ERR(res));
 999        return res;
1000}
1001
1002static void ceph_kill_sb(struct super_block *s)
1003{
1004        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
1005        dout("kill_sb %p\n", s);
1006        ceph_mdsc_pre_umount(fsc->mdsc);
1007        kill_anon_super(s);    /* will call put_super after sb is r/o */
1008        ceph_mdsc_destroy(fsc);
1009        destroy_fs_client(fsc);
1010}
1011
1012static struct file_system_type ceph_fs_type = {
1013        .owner          = THIS_MODULE,
1014        .name           = "ceph",
1015        .mount          = ceph_mount,
1016        .kill_sb        = ceph_kill_sb,
1017        .fs_flags       = FS_RENAME_DOES_D_MOVE,
1018};
1019MODULE_ALIAS_FS("ceph");
1020
1021static int __init init_ceph(void)
1022{
1023        int ret = init_caches();
1024        if (ret)
1025                goto out;
1026
1027        ceph_flock_init();
1028        ceph_xattr_init();
1029        ret = ceph_snap_init();
1030        if (ret)
1031                goto out_xattr;
1032        ret = register_filesystem(&ceph_fs_type);
1033        if (ret)
1034                goto out_snap;
1035
1036        pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1037
1038        return 0;
1039
1040out_snap:
1041        ceph_snap_exit();
1042out_xattr:
1043        ceph_xattr_exit();
1044        destroy_caches();
1045out:
1046        return ret;
1047}
1048
1049static void __exit exit_ceph(void)
1050{
1051        dout("exit_ceph\n");
1052        unregister_filesystem(&ceph_fs_type);
1053        ceph_snap_exit();
1054        ceph_xattr_exit();
1055        destroy_caches();
1056}
1057
1058module_init(init_ceph);
1059module_exit(exit_ceph);
1060
1061MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1062MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1063MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
1064MODULE_DESCRIPTION("Ceph filesystem for Linux");
1065MODULE_LICENSE("GPL");
1066
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.