linux/fs/ceph/export.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/ceph/ceph_debug.h>
   3
   4#include <linux/exportfs.h>
   5#include <linux/slab.h>
   6#include <asm/unaligned.h>
   7
   8#include "super.h"
   9#include "mds_client.h"
  10
  11/*
  12 * Basic fh
  13 */
  14struct ceph_nfs_fh {
  15        u64 ino;
  16} __attribute__ ((packed));
  17
  18/*
  19 * Larger fh that includes parent ino.
  20 */
  21struct ceph_nfs_confh {
  22        u64 ino, parent_ino;
  23} __attribute__ ((packed));
  24
  25/*
  26 * fh for snapped inode
  27 */
  28struct ceph_nfs_snapfh {
  29        u64 ino;
  30        u64 snapid;
  31        u64 parent_ino;
  32        u32 hash;
  33} __attribute__ ((packed));
  34
  35static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
  36                              struct inode *parent_inode)
  37{
  38        static const int snap_handle_length =
  39                sizeof(struct ceph_nfs_snapfh) >> 2;
  40        struct ceph_nfs_snapfh *sfh = (void *)rawfh;
  41        u64 snapid = ceph_snap(inode);
  42        int ret;
  43        bool no_parent = true;
  44
  45        if (*max_len < snap_handle_length) {
  46                *max_len = snap_handle_length;
  47                ret = FILEID_INVALID;
  48                goto out;
  49        }
  50
  51        ret =  -EINVAL;
  52        if (snapid != CEPH_SNAPDIR) {
  53                struct inode *dir;
  54                struct dentry *dentry = d_find_alias(inode);
  55                if (!dentry)
  56                        goto out;
  57
  58                rcu_read_lock();
  59                dir = d_inode_rcu(dentry->d_parent);
  60                if (ceph_snap(dir) != CEPH_SNAPDIR) {
  61                        sfh->parent_ino = ceph_ino(dir);
  62                        sfh->hash = ceph_dentry_hash(dir, dentry);
  63                        no_parent = false;
  64                }
  65                rcu_read_unlock();
  66                dput(dentry);
  67        }
  68
  69        if (no_parent) {
  70                if (!S_ISDIR(inode->i_mode))
  71                        goto out;
  72                sfh->parent_ino = sfh->ino;
  73                sfh->hash = 0;
  74        }
  75        sfh->ino = ceph_ino(inode);
  76        sfh->snapid = snapid;
  77
  78        *max_len = snap_handle_length;
  79        ret = FILEID_BTRFS_WITH_PARENT;
  80out:
  81        dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
  82        return ret;
  83}
  84
  85static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
  86                          struct inode *parent_inode)
  87{
  88        static const int handle_length =
  89                sizeof(struct ceph_nfs_fh) >> 2;
  90        static const int connected_handle_length =
  91                sizeof(struct ceph_nfs_confh) >> 2;
  92        int type;
  93
  94        if (ceph_snap(inode) != CEPH_NOSNAP)
  95                return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
  96
  97        if (parent_inode && (*max_len < connected_handle_length)) {
  98                *max_len = connected_handle_length;
  99                return FILEID_INVALID;
 100        } else if (*max_len < handle_length) {
 101                *max_len = handle_length;
 102                return FILEID_INVALID;
 103        }
 104
 105        if (parent_inode) {
 106                struct ceph_nfs_confh *cfh = (void *)rawfh;
 107                dout("encode_fh %llx with parent %llx\n",
 108                     ceph_ino(inode), ceph_ino(parent_inode));
 109                cfh->ino = ceph_ino(inode);
 110                cfh->parent_ino = ceph_ino(parent_inode);
 111                *max_len = connected_handle_length;
 112                type = FILEID_INO32_GEN_PARENT;
 113        } else {
 114                struct ceph_nfs_fh *fh = (void *)rawfh;
 115                dout("encode_fh %llx\n", ceph_ino(inode));
 116                fh->ino = ceph_ino(inode);
 117                *max_len = handle_length;
 118                type = FILEID_INO32_GEN;
 119        }
 120        return type;
 121}
 122
 123static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
 124{
 125        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
 126        struct inode *inode;
 127        struct ceph_vino vino;
 128        int err;
 129
 130        vino.ino = ino;
 131        vino.snap = CEPH_NOSNAP;
 132
 133        if (ceph_vino_is_reserved(vino))
 134                return ERR_PTR(-ESTALE);
 135
 136        inode = ceph_find_inode(sb, vino);
 137        if (!inode) {
 138                struct ceph_mds_request *req;
 139                int mask;
 140
 141                req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
 142                                               USE_ANY_MDS);
 143                if (IS_ERR(req))
 144                        return ERR_CAST(req);
 145
 146                mask = CEPH_STAT_CAP_INODE;
 147                if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
 148                        mask |= CEPH_CAP_XATTR_SHARED;
 149                req->r_args.lookupino.mask = cpu_to_le32(mask);
 150
 151                req->r_ino1 = vino;
 152                req->r_num_caps = 1;
 153                err = ceph_mdsc_do_request(mdsc, NULL, req);
 154                inode = req->r_target_inode;
 155                if (inode)
 156                        ihold(inode);
 157                ceph_mdsc_put_request(req);
 158                if (!inode)
 159                        return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
 160        }
 161        return inode;
 162}
 163
 164struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
 165{
 166        struct inode *inode = __lookup_inode(sb, ino);
 167        if (IS_ERR(inode))
 168                return inode;
 169        if (inode->i_nlink == 0) {
 170                iput(inode);
 171                return ERR_PTR(-ESTALE);
 172        }
 173        return inode;
 174}
 175
 176static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
 177{
 178        struct inode *inode = __lookup_inode(sb, ino);
 179        int err;
 180
 181        if (IS_ERR(inode))
 182                return ERR_CAST(inode);
 183        /* We need LINK caps to reliably check i_nlink */
 184        err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
 185        if (err) {
 186                iput(inode);
 187                return ERR_PTR(err);
 188        }
 189        /* -ESTALE if inode as been unlinked and no file is open */
 190        if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
 191                iput(inode);
 192                return ERR_PTR(-ESTALE);
 193        }
 194        return d_obtain_alias(inode);
 195}
 196
 197static struct dentry *__snapfh_to_dentry(struct super_block *sb,
 198                                          struct ceph_nfs_snapfh *sfh,
 199                                          bool want_parent)
 200{
 201        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
 202        struct ceph_mds_request *req;
 203        struct inode *inode;
 204        struct ceph_vino vino;
 205        int mask;
 206        int err;
 207        bool unlinked = false;
 208
 209        if (want_parent) {
 210                vino.ino = sfh->parent_ino;
 211                if (sfh->snapid == CEPH_SNAPDIR)
 212                        vino.snap = CEPH_NOSNAP;
 213                else if (sfh->ino == sfh->parent_ino)
 214                        vino.snap = CEPH_SNAPDIR;
 215                else
 216                        vino.snap = sfh->snapid;
 217        } else {
 218                vino.ino = sfh->ino;
 219                vino.snap = sfh->snapid;
 220        }
 221
 222        if (ceph_vino_is_reserved(vino))
 223                return ERR_PTR(-ESTALE);
 224
 225        inode = ceph_find_inode(sb, vino);
 226        if (inode)
 227                return d_obtain_alias(inode);
 228
 229        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
 230                                       USE_ANY_MDS);
 231        if (IS_ERR(req))
 232                return ERR_CAST(req);
 233
 234        mask = CEPH_STAT_CAP_INODE;
 235        if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
 236                mask |= CEPH_CAP_XATTR_SHARED;
 237        req->r_args.lookupino.mask = cpu_to_le32(mask);
 238        if (vino.snap < CEPH_NOSNAP) {
 239                req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
 240                if (!want_parent && sfh->ino != sfh->parent_ino) {
 241                        req->r_args.lookupino.parent =
 242                                        cpu_to_le64(sfh->parent_ino);
 243                        req->r_args.lookupino.hash =
 244                                        cpu_to_le32(sfh->hash);
 245                }
 246        }
 247
 248        req->r_ino1 = vino;
 249        req->r_num_caps = 1;
 250        err = ceph_mdsc_do_request(mdsc, NULL, req);
 251        inode = req->r_target_inode;
 252        if (inode) {
 253                if (vino.snap == CEPH_SNAPDIR) {
 254                        if (inode->i_nlink == 0)
 255                                unlinked = true;
 256                        inode = ceph_get_snapdir(inode);
 257                } else if (ceph_snap(inode) == vino.snap) {
 258                        ihold(inode);
 259                } else {
 260                        /* mds does not support lookup snapped inode */
 261                        inode = ERR_PTR(-EOPNOTSUPP);
 262                }
 263        } else {
 264                inode = ERR_PTR(-ESTALE);
 265        }
 266        ceph_mdsc_put_request(req);
 267
 268        if (want_parent) {
 269                dout("snapfh_to_parent %llx.%llx\n err=%d\n",
 270                     vino.ino, vino.snap, err);
 271        } else {
 272                dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
 273                      vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
 274        }
 275        if (IS_ERR(inode))
 276                return ERR_CAST(inode);
 277        /* see comments in ceph_get_parent() */
 278        return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
 279}
 280
 281/*
 282 * convert regular fh to dentry
 283 */
 284static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
 285                                        struct fid *fid,
 286                                        int fh_len, int fh_type)
 287{
 288        struct ceph_nfs_fh *fh = (void *)fid->raw;
 289
 290        if (fh_type == FILEID_BTRFS_WITH_PARENT) {
 291                struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
 292                return __snapfh_to_dentry(sb, sfh, false);
 293        }
 294
 295        if (fh_type != FILEID_INO32_GEN  &&
 296            fh_type != FILEID_INO32_GEN_PARENT)
 297                return NULL;
 298        if (fh_len < sizeof(*fh) / 4)
 299                return NULL;
 300
 301        dout("fh_to_dentry %llx\n", fh->ino);
 302        return __fh_to_dentry(sb, fh->ino);
 303}
 304
 305static struct dentry *__get_parent(struct super_block *sb,
 306                                   struct dentry *child, u64 ino)
 307{
 308        struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
 309        struct ceph_mds_request *req;
 310        struct inode *inode;
 311        int mask;
 312        int err;
 313
 314        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
 315                                       USE_ANY_MDS);
 316        if (IS_ERR(req))
 317                return ERR_CAST(req);
 318
 319        if (child) {
 320                req->r_inode = d_inode(child);
 321                ihold(d_inode(child));
 322        } else {
 323                req->r_ino1 = (struct ceph_vino) {
 324                        .ino = ino,
 325                        .snap = CEPH_NOSNAP,
 326                };
 327        }
 328
 329        mask = CEPH_STAT_CAP_INODE;
 330        if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
 331                mask |= CEPH_CAP_XATTR_SHARED;
 332        req->r_args.getattr.mask = cpu_to_le32(mask);
 333
 334        req->r_num_caps = 1;
 335        err = ceph_mdsc_do_request(mdsc, NULL, req);
 336        if (err) {
 337                ceph_mdsc_put_request(req);
 338                return ERR_PTR(err);
 339        }
 340
 341        inode = req->r_target_inode;
 342        if (inode)
 343                ihold(inode);
 344        ceph_mdsc_put_request(req);
 345        if (!inode)
 346                return ERR_PTR(-ENOENT);
 347
 348        return d_obtain_alias(inode);
 349}
 350
 351static struct dentry *ceph_get_parent(struct dentry *child)
 352{
 353        struct inode *inode = d_inode(child);
 354        struct dentry *dn;
 355
 356        if (ceph_snap(inode) != CEPH_NOSNAP) {
 357                struct inode* dir;
 358                bool unlinked = false;
 359                /* do not support non-directory */
 360                if (!d_is_dir(child)) {
 361                        dn = ERR_PTR(-EINVAL);
 362                        goto out;
 363                }
 364                dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
 365                if (IS_ERR(dir)) {
 366                        dn = ERR_CAST(dir);
 367                        goto out;
 368                }
 369                /* There can be multiple paths to access snapped inode.
 370                 * For simplicity, treat snapdir of head inode as parent */
 371                if (ceph_snap(inode) != CEPH_SNAPDIR) {
 372                        struct inode *snapdir = ceph_get_snapdir(dir);
 373                        if (dir->i_nlink == 0)
 374                                unlinked = true;
 375                        iput(dir);
 376                        if (IS_ERR(snapdir)) {
 377                                dn = ERR_CAST(snapdir);
 378                                goto out;
 379                        }
 380                        dir = snapdir;
 381                }
 382                /* If directory has already been deleted, futher get_parent
 383                 * will fail. Do not mark snapdir dentry as disconnected,
 384                 * this prevent exportfs from doing futher get_parent. */
 385                if (unlinked)
 386                        dn = d_obtain_root(dir);
 387                else
 388                        dn = d_obtain_alias(dir);
 389        } else {
 390                dn = __get_parent(child->d_sb, child, 0);
 391        }
 392out:
 393        dout("get_parent %p ino %llx.%llx err=%ld\n",
 394             child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
 395        return dn;
 396}
 397
 398/*
 399 * convert regular fh to parent
 400 */
 401static struct dentry *ceph_fh_to_parent(struct super_block *sb,
 402                                        struct fid *fid,
 403                                        int fh_len, int fh_type)
 404{
 405        struct ceph_nfs_confh *cfh = (void *)fid->raw;
 406        struct dentry *dentry;
 407
 408        if (fh_type == FILEID_BTRFS_WITH_PARENT) {
 409                struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
 410                return __snapfh_to_dentry(sb, sfh, true);
 411        }
 412
 413        if (fh_type != FILEID_INO32_GEN_PARENT)
 414                return NULL;
 415        if (fh_len < sizeof(*cfh) / 4)
 416                return NULL;
 417
 418        dout("fh_to_parent %llx\n", cfh->parent_ino);
 419        dentry = __get_parent(sb, NULL, cfh->ino);
 420        if (unlikely(dentry == ERR_PTR(-ENOENT)))
 421                dentry = __fh_to_dentry(sb, cfh->parent_ino);
 422        return dentry;
 423}
 424
 425static int __get_snap_name(struct dentry *parent, char *name,
 426                           struct dentry *child)
 427{
 428        struct inode *inode = d_inode(child);
 429        struct inode *dir = d_inode(parent);
 430        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 431        struct ceph_mds_request *req = NULL;
 432        char *last_name = NULL;
 433        unsigned next_offset = 2;
 434        int err = -EINVAL;
 435
 436        if (ceph_ino(inode) != ceph_ino(dir))
 437                goto out;
 438        if (ceph_snap(inode) == CEPH_SNAPDIR) {
 439                if (ceph_snap(dir) == CEPH_NOSNAP) {
 440                        strcpy(name, fsc->mount_options->snapdir_name);
 441                        err = 0;
 442                }
 443                goto out;
 444        }
 445        if (ceph_snap(dir) != CEPH_SNAPDIR)
 446                goto out;
 447
 448        while (1) {
 449                struct ceph_mds_reply_info_parsed *rinfo;
 450                struct ceph_mds_reply_dir_entry *rde;
 451                int i;
 452
 453                req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
 454                                               USE_AUTH_MDS);
 455                if (IS_ERR(req)) {
 456                        err = PTR_ERR(req);
 457                        req = NULL;
 458                        goto out;
 459                }
 460                err = ceph_alloc_readdir_reply_buffer(req, inode);
 461                if (err)
 462                        goto out;
 463
 464                req->r_direct_mode = USE_AUTH_MDS;
 465                req->r_readdir_offset = next_offset;
 466                req->r_args.readdir.flags =
 467                                cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
 468                if (last_name) {
 469                        req->r_path2 = last_name;
 470                        last_name = NULL;
 471                }
 472
 473                req->r_inode = dir;
 474                ihold(dir);
 475                req->r_dentry = dget(parent);
 476
 477                inode_lock(dir);
 478                err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
 479                inode_unlock(dir);
 480
 481                if (err < 0)
 482                        goto out;
 483
 484                rinfo = &req->r_reply_info;
 485                for (i = 0; i < rinfo->dir_nr; i++) {
 486                        rde = rinfo->dir_entries + i;
 487                        BUG_ON(!rde->inode.in);
 488                        if (ceph_snap(inode) ==
 489                            le64_to_cpu(rde->inode.in->snapid)) {
 490                                memcpy(name, rde->name, rde->name_len);
 491                                name[rde->name_len] = '\0';
 492                                err = 0;
 493                                goto out;
 494                        }
 495                }
 496
 497                if (rinfo->dir_end)
 498                        break;
 499
 500                BUG_ON(rinfo->dir_nr <= 0);
 501                rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
 502                next_offset += rinfo->dir_nr;
 503                last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
 504                if (!last_name) {
 505                        err = -ENOMEM;
 506                        goto out;
 507                }
 508
 509                ceph_mdsc_put_request(req);
 510                req = NULL;
 511        }
 512        err = -ENOENT;
 513out:
 514        if (req)
 515                ceph_mdsc_put_request(req);
 516        kfree(last_name);
 517        dout("get_snap_name %p ino %llx.%llx err=%d\n",
 518             child, ceph_vinop(inode), err);
 519        return err;
 520}
 521
 522static int ceph_get_name(struct dentry *parent, char *name,
 523                         struct dentry *child)
 524{
 525        struct ceph_mds_client *mdsc;
 526        struct ceph_mds_request *req;
 527        struct inode *inode = d_inode(child);
 528        int err;
 529
 530        if (ceph_snap(inode) != CEPH_NOSNAP)
 531                return __get_snap_name(parent, name, child);
 532
 533        mdsc = ceph_inode_to_client(inode)->mdsc;
 534        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
 535                                       USE_ANY_MDS);
 536        if (IS_ERR(req))
 537                return PTR_ERR(req);
 538
 539        inode_lock(d_inode(parent));
 540
 541        req->r_inode = inode;
 542        ihold(inode);
 543        req->r_ino2 = ceph_vino(d_inode(parent));
 544        req->r_parent = d_inode(parent);
 545        ihold(req->r_parent);
 546        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 547        req->r_num_caps = 2;
 548        err = ceph_mdsc_do_request(mdsc, NULL, req);
 549
 550        inode_unlock(d_inode(parent));
 551
 552        if (!err) {
 553                struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
 554                memcpy(name, rinfo->dname, rinfo->dname_len);
 555                name[rinfo->dname_len] = 0;
 556                dout("get_name %p ino %llx.%llx name %s\n",
 557                     child, ceph_vinop(inode), name);
 558        } else {
 559                dout("get_name %p ino %llx.%llx err %d\n",
 560                     child, ceph_vinop(inode), err);
 561        }
 562
 563        ceph_mdsc_put_request(req);
 564        return err;
 565}
 566
 567const struct export_operations ceph_export_ops = {
 568        .encode_fh = ceph_encode_fh,
 569        .fh_to_dentry = ceph_fh_to_dentry,
 570        .fh_to_parent = ceph_fh_to_parent,
 571        .get_parent = ceph_get_parent,
 572        .get_name = ceph_get_name,
 573};
 574