linux/fs/locks.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  linux/fs/locks.c
   4 *
   5 * We implement four types of file locks: BSD locks, posix locks, open
   6 * file description locks, and leases.  For details about BSD locks,
   7 * see the flock(2) man page; for details about the other three, see
   8 * fcntl(2).
   9 *
  10 *
  11 * Locking conflicts and dependencies:
  12 * If multiple threads attempt to lock the same byte (or flock the same file)
  13 * only one can be granted the lock, and other must wait their turn.
  14 * The first lock has been "applied" or "granted", the others are "waiting"
  15 * and are "blocked" by the "applied" lock..
  16 *
  17 * Waiting and applied locks are all kept in trees whose properties are:
  18 *
  19 *      - the root of a tree may be an applied or waiting lock.
  20 *      - every other node in the tree is a waiting lock that
  21 *        conflicts with every ancestor of that node.
  22 *
  23 * Every such tree begins life as a waiting singleton which obviously
  24 * satisfies the above properties.
  25 *
  26 * The only ways we modify trees preserve these properties:
  27 *
  28 *      1. We may add a new leaf node, but only after first verifying that it
  29 *         conflicts with all of its ancestors.
  30 *      2. We may remove the root of a tree, creating a new singleton
  31 *         tree from the root and N new trees rooted in the immediate
  32 *         children.
  33 *      3. If the root of a tree is not currently an applied lock, we may
  34 *         apply it (if possible).
  35 *      4. We may upgrade the root of the tree (either extend its range,
  36 *         or upgrade its entire range from read to write).
  37 *
  38 * When an applied lock is modified in a way that reduces or downgrades any
  39 * part of its range, we remove all its children (2 above).  This particularly
  40 * happens when a lock is unlocked.
  41 *
  42 * For each of those child trees we "wake up" the thread which is
  43 * waiting for the lock so it can continue handling as follows: if the
  44 * root of the tree applies, we do so (3).  If it doesn't, it must
  45 * conflict with some applied lock.  We remove (wake up) all of its children
  46 * (2), and add it is a new leaf to the tree rooted in the applied
  47 * lock (1).  We then repeat the process recursively with those
  48 * children.
  49 *
  50 */
  51
  52#include <linux/capability.h>
  53#include <linux/file.h>
  54#include <linux/fdtable.h>
  55#include <linux/filelock.h>
  56#include <linux/fs.h>
  57#include <linux/init.h>
  58#include <linux/security.h>
  59#include <linux/slab.h>
  60#include <linux/syscalls.h>
  61#include <linux/time.h>
  62#include <linux/rcupdate.h>
  63#include <linux/pid_namespace.h>
  64#include <linux/hashtable.h>
  65#include <linux/percpu.h>
  66#include <linux/sysctl.h>
  67
  68#define CREATE_TRACE_POINTS
  69#include <trace/events/filelock.h>
  70
  71#include <linux/uaccess.h>
  72
  73#define IS_POSIX(fl)    (fl->fl_flags & FL_POSIX)
  74#define IS_FLOCK(fl)    (fl->fl_flags & FL_FLOCK)
  75#define IS_LEASE(fl)    (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
  76#define IS_OFDLCK(fl)   (fl->fl_flags & FL_OFDLCK)
  77#define IS_REMOTELCK(fl)        (fl->fl_pid <= 0)
  78
  79static bool lease_breaking(struct file_lock *fl)
  80{
  81        return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
  82}
  83
  84static int target_leasetype(struct file_lock *fl)
  85{
  86        if (fl->fl_flags & FL_UNLOCK_PENDING)
  87                return F_UNLCK;
  88        if (fl->fl_flags & FL_DOWNGRADE_PENDING)
  89                return F_RDLCK;
  90        return fl->fl_type;
  91}
  92
  93static int leases_enable = 1;
  94static int lease_break_time = 45;
  95
  96#ifdef CONFIG_SYSCTL
  97static struct ctl_table locks_sysctls[] = {
  98        {
  99                .procname       = "leases-enable",
 100                .data           = &leases_enable,
 101                .maxlen         = sizeof(int),
 102                .mode           = 0644,
 103                .proc_handler   = proc_dointvec,
 104        },
 105#ifdef CONFIG_MMU
 106        {
 107                .procname       = "lease-break-time",
 108                .data           = &lease_break_time,
 109                .maxlen         = sizeof(int),
 110                .mode           = 0644,
 111                .proc_handler   = proc_dointvec,
 112        },
 113#endif /* CONFIG_MMU */
 114        {}
 115};
 116
 117static int __init init_fs_locks_sysctls(void)
 118{
 119        register_sysctl_init("fs", locks_sysctls);
 120        return 0;
 121}
 122early_initcall(init_fs_locks_sysctls);
 123#endif /* CONFIG_SYSCTL */
 124
 125/*
 126 * The global file_lock_list is only used for displaying /proc/locks, so we
 127 * keep a list on each CPU, with each list protected by its own spinlock.
 128 * Global serialization is done using file_rwsem.
 129 *
 130 * Note that alterations to the list also require that the relevant flc_lock is
 131 * held.
 132 */
 133struct file_lock_list_struct {
 134        spinlock_t              lock;
 135        struct hlist_head       hlist;
 136};
 137static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
 138DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
 139
 140
 141/*
 142 * The blocked_hash is used to find POSIX lock loops for deadlock detection.
 143 * It is protected by blocked_lock_lock.
 144 *
 145 * We hash locks by lockowner in order to optimize searching for the lock a
 146 * particular lockowner is waiting on.
 147 *
 148 * FIXME: make this value scale via some heuristic? We generally will want more
 149 * buckets when we have more lockowners holding locks, but that's a little
 150 * difficult to determine without knowing what the workload will look like.
 151 */
 152#define BLOCKED_HASH_BITS       7
 153static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
 154
 155/*
 156 * This lock protects the blocked_hash. Generally, if you're accessing it, you
 157 * want to be holding this lock.
 158 *
 159 * In addition, it also protects the fl->fl_blocked_requests list, and the
 160 * fl->fl_blocker pointer for file_lock structures that are acting as lock
 161 * requests (in contrast to those that are acting as records of acquired locks).
 162 *
 163 * Note that when we acquire this lock in order to change the above fields,
 164 * we often hold the flc_lock as well. In certain cases, when reading the fields
 165 * protected by this lock, we can skip acquiring it iff we already hold the
 166 * flc_lock.
 167 */
 168static DEFINE_SPINLOCK(blocked_lock_lock);
 169
 170static struct kmem_cache *flctx_cache __ro_after_init;
 171static struct kmem_cache *filelock_cache __ro_after_init;
 172
 173static struct file_lock_context *
 174locks_get_lock_context(struct inode *inode, int type)
 175{
 176        struct file_lock_context *ctx;
 177
 178        /* paired with cmpxchg() below */
 179        ctx = locks_inode_context(inode);
 180        if (likely(ctx) || type == F_UNLCK)
 181                goto out;
 182
 183        ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
 184        if (!ctx)
 185                goto out;
 186
 187        spin_lock_init(&ctx->flc_lock);
 188        INIT_LIST_HEAD(&ctx->flc_flock);
 189        INIT_LIST_HEAD(&ctx->flc_posix);
 190        INIT_LIST_HEAD(&ctx->flc_lease);
 191
 192        /*
 193         * Assign the pointer if it's not already assigned. If it is, then
 194         * free the context we just allocated.
 195         */
 196        if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
 197                kmem_cache_free(flctx_cache, ctx);
 198                ctx = locks_inode_context(inode);
 199        }
 200out:
 201        trace_locks_get_lock_context(inode, type, ctx);
 202        return ctx;
 203}
 204
 205static void
 206locks_dump_ctx_list(struct list_head *list, char *list_type)
 207{
 208        struct file_lock *fl;
 209
 210        list_for_each_entry(fl, list, fl_list) {
 211                pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
 212        }
 213}
 214
 215static void
 216locks_check_ctx_lists(struct inode *inode)
 217{
 218        struct file_lock_context *ctx = inode->i_flctx;
 219
 220        if (unlikely(!list_empty(&ctx->flc_flock) ||
 221                     !list_empty(&ctx->flc_posix) ||
 222                     !list_empty(&ctx->flc_lease))) {
 223                pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
 224                        MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
 225                        inode->i_ino);
 226                locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
 227                locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
 228                locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
 229        }
 230}
 231
 232static void
 233locks_check_ctx_file_list(struct file *filp, struct list_head *list,
 234                                char *list_type)
 235{
 236        struct file_lock *fl;
 237        struct inode *inode = file_inode(filp);
 238
 239        list_for_each_entry(fl, list, fl_list)
 240                if (fl->fl_file == filp)
 241                        pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
 242                                " fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
 243                                list_type, MAJOR(inode->i_sb->s_dev),
 244                                MINOR(inode->i_sb->s_dev), inode->i_ino,
 245                                fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
 246}
 247
 248void
 249locks_free_lock_context(struct inode *inode)
 250{
 251        struct file_lock_context *ctx = locks_inode_context(inode);
 252
 253        if (unlikely(ctx)) {
 254                locks_check_ctx_lists(inode);
 255                kmem_cache_free(flctx_cache, ctx);
 256        }
 257}
 258
 259static void locks_init_lock_heads(struct file_lock *fl)
 260{
 261        INIT_HLIST_NODE(&fl->fl_link);
 262        INIT_LIST_HEAD(&fl->fl_list);
 263        INIT_LIST_HEAD(&fl->fl_blocked_requests);
 264        INIT_LIST_HEAD(&fl->fl_blocked_member);
 265        init_waitqueue_head(&fl->fl_wait);
 266}
 267
 268/* Allocate an empty lock structure. */
 269struct file_lock *locks_alloc_lock(void)
 270{
 271        struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
 272
 273        if (fl)
 274                locks_init_lock_heads(fl);
 275
 276        return fl;
 277}
 278EXPORT_SYMBOL_GPL(locks_alloc_lock);
 279
 280void locks_release_private(struct file_lock *fl)
 281{
 282        BUG_ON(waitqueue_active(&fl->fl_wait));
 283        BUG_ON(!list_empty(&fl->fl_list));
 284        BUG_ON(!list_empty(&fl->fl_blocked_requests));
 285        BUG_ON(!list_empty(&fl->fl_blocked_member));
 286        BUG_ON(!hlist_unhashed(&fl->fl_link));
 287
 288        if (fl->fl_ops) {
 289                if (fl->fl_ops->fl_release_private)
 290                        fl->fl_ops->fl_release_private(fl);
 291                fl->fl_ops = NULL;
 292        }
 293
 294        if (fl->fl_lmops) {
 295                if (fl->fl_lmops->lm_put_owner) {
 296                        fl->fl_lmops->lm_put_owner(fl->fl_owner);
 297                        fl->fl_owner = NULL;
 298                }
 299                fl->fl_lmops = NULL;
 300        }
 301}
 302EXPORT_SYMBOL_GPL(locks_release_private);
 303
 304/**
 305 * locks_owner_has_blockers - Check for blocking lock requests
 306 * @flctx: file lock context
 307 * @owner: lock owner
 308 *
 309 * Return values:
 310 *   %true: @owner has at least one blocker
 311 *   %false: @owner has no blockers
 312 */
 313bool locks_owner_has_blockers(struct file_lock_context *flctx,
 314                fl_owner_t owner)
 315{
 316        struct file_lock *fl;
 317
 318        spin_lock(&flctx->flc_lock);
 319        list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
 320                if (fl->fl_owner != owner)
 321                        continue;
 322                if (!list_empty(&fl->fl_blocked_requests)) {
 323                        spin_unlock(&flctx->flc_lock);
 324                        return true;
 325                }
 326        }
 327        spin_unlock(&flctx->flc_lock);
 328        return false;
 329}
 330EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
 331
 332/* Free a lock which is not in use. */
 333void locks_free_lock(struct file_lock *fl)
 334{
 335        locks_release_private(fl);
 336        kmem_cache_free(filelock_cache, fl);
 337}
 338EXPORT_SYMBOL(locks_free_lock);
 339
 340static void
 341locks_dispose_list(struct list_head *dispose)
 342{
 343        struct file_lock *fl;
 344
 345        while (!list_empty(dispose)) {
 346                fl = list_first_entry(dispose, struct file_lock, fl_list);
 347                list_del_init(&fl->fl_list);
 348                locks_free_lock(fl);
 349        }
 350}
 351
 352void locks_init_lock(struct file_lock *fl)
 353{
 354        memset(fl, 0, sizeof(struct file_lock));
 355        locks_init_lock_heads(fl);
 356}
 357EXPORT_SYMBOL(locks_init_lock);
 358
 359/*
 360 * Initialize a new lock from an existing file_lock structure.
 361 */
 362void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
 363{
 364        new->fl_owner = fl->fl_owner;
 365        new->fl_pid = fl->fl_pid;
 366        new->fl_file = NULL;
 367        new->fl_flags = fl->fl_flags;
 368        new->fl_type = fl->fl_type;
 369        new->fl_start = fl->fl_start;
 370        new->fl_end = fl->fl_end;
 371        new->fl_lmops = fl->fl_lmops;
 372        new->fl_ops = NULL;
 373
 374        if (fl->fl_lmops) {
 375                if (fl->fl_lmops->lm_get_owner)
 376                        fl->fl_lmops->lm_get_owner(fl->fl_owner);
 377        }
 378}
 379EXPORT_SYMBOL(locks_copy_conflock);
 380
 381void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 382{
 383        /* "new" must be a freshly-initialized lock */
 384        WARN_ON_ONCE(new->fl_ops);
 385
 386        locks_copy_conflock(new, fl);
 387
 388        new->fl_file = fl->fl_file;
 389        new->fl_ops = fl->fl_ops;
 390
 391        if (fl->fl_ops) {
 392                if (fl->fl_ops->fl_copy_lock)
 393                        fl->fl_ops->fl_copy_lock(new, fl);
 394        }
 395}
 396EXPORT_SYMBOL(locks_copy_lock);
 397
 398static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
 399{
 400        struct file_lock *f;
 401
 402        /*
 403         * As ctx->flc_lock is held, new requests cannot be added to
 404         * ->fl_blocked_requests, so we don't need a lock to check if it
 405         * is empty.
 406         */
 407        if (list_empty(&fl->fl_blocked_requests))
 408                return;
 409        spin_lock(&blocked_lock_lock);
 410        list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
 411        list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
 412                f->fl_blocker = new;
 413        spin_unlock(&blocked_lock_lock);
 414}
 415
 416static inline int flock_translate_cmd(int cmd) {
 417        switch (cmd) {
 418        case LOCK_SH:
 419                return F_RDLCK;
 420        case LOCK_EX:
 421                return F_WRLCK;
 422        case LOCK_UN:
 423                return F_UNLCK;
 424        }
 425        return -EINVAL;
 426}
 427
 428/* Fill in a file_lock structure with an appropriate FLOCK lock. */
 429static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
 430{
 431        locks_init_lock(fl);
 432
 433        fl->fl_file = filp;
 434        fl->fl_owner = filp;
 435        fl->fl_pid = current->tgid;
 436        fl->fl_flags = FL_FLOCK;
 437        fl->fl_type = type;
 438        fl->fl_end = OFFSET_MAX;
 439}
 440
 441static int assign_type(struct file_lock *fl, int type)
 442{
 443        switch (type) {
 444        case F_RDLCK:
 445        case F_WRLCK:
 446        case F_UNLCK:
 447                fl->fl_type = type;
 448                break;
 449        default:
 450                return -EINVAL;
 451        }
 452        return 0;
 453}
 454
 455static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 456                                 struct flock64 *l)
 457{
 458        switch (l->l_whence) {
 459        case SEEK_SET:
 460                fl->fl_start = 0;
 461                break;
 462        case SEEK_CUR:
 463                fl->fl_start = filp->f_pos;
 464                break;
 465        case SEEK_END:
 466                fl->fl_start = i_size_read(file_inode(filp));
 467                break;
 468        default:
 469                return -EINVAL;
 470        }
 471        if (l->l_start > OFFSET_MAX - fl->fl_start)
 472                return -EOVERFLOW;
 473        fl->fl_start += l->l_start;
 474        if (fl->fl_start < 0)
 475                return -EINVAL;
 476
 477        /* POSIX-1996 leaves the case l->l_len < 0 undefined;
 478           POSIX-2001 defines it. */
 479        if (l->l_len > 0) {
 480                if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
 481                        return -EOVERFLOW;
 482                fl->fl_end = fl->fl_start + (l->l_len - 1);
 483
 484        } else if (l->l_len < 0) {
 485                if (fl->fl_start + l->l_len < 0)
 486                        return -EINVAL;
 487                fl->fl_end = fl->fl_start - 1;
 488                fl->fl_start += l->l_len;
 489        } else
 490                fl->fl_end = OFFSET_MAX;
 491
 492        fl->fl_owner = current->files;
 493        fl->fl_pid = current->tgid;
 494        fl->fl_file = filp;
 495        fl->fl_flags = FL_POSIX;
 496        fl->fl_ops = NULL;
 497        fl->fl_lmops = NULL;
 498
 499        return assign_type(fl, l->l_type);
 500}
 501
 502/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
 503 * style lock.
 504 */
 505static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 506                               struct flock *l)
 507{
 508        struct flock64 ll = {
 509                .l_type = l->l_type,
 510                .l_whence = l->l_whence,
 511                .l_start = l->l_start,
 512                .l_len = l->l_len,
 513        };
 514
 515        return flock64_to_posix_lock(filp, fl, &ll);
 516}
 517
 518/* default lease lock manager operations */
 519static bool
 520lease_break_callback(struct file_lock *fl)
 521{
 522        kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
 523        return false;
 524}
 525
 526static void
 527lease_setup(struct file_lock *fl, void **priv)
 528{
 529        struct file *filp = fl->fl_file;
 530        struct fasync_struct *fa = *priv;
 531
 532        /*
 533         * fasync_insert_entry() returns the old entry if any. If there was no
 534         * old entry, then it used "priv" and inserted it into the fasync list.
 535         * Clear the pointer to indicate that it shouldn't be freed.
 536         */
 537        if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
 538                *priv = NULL;
 539
 540        __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
 541}
 542
 543static const struct lock_manager_operations lease_manager_ops = {
 544        .lm_break = lease_break_callback,
 545        .lm_change = lease_modify,
 546        .lm_setup = lease_setup,
 547};
 548
 549/*
 550 * Initialize a lease, use the default lock manager operations
 551 */
 552static int lease_init(struct file *filp, int type, struct file_lock *fl)
 553{
 554        if (assign_type(fl, type) != 0)
 555                return -EINVAL;
 556
 557        fl->fl_owner = filp;
 558        fl->fl_pid = current->tgid;
 559
 560        fl->fl_file = filp;
 561        fl->fl_flags = FL_LEASE;
 562        fl->fl_start = 0;
 563        fl->fl_end = OFFSET_MAX;
 564        fl->fl_ops = NULL;
 565        fl->fl_lmops = &lease_manager_ops;
 566        return 0;
 567}
 568
 569/* Allocate a file_lock initialised to this type of lease */
 570static struct file_lock *lease_alloc(struct file *filp, int type)
 571{
 572        struct file_lock *fl = locks_alloc_lock();
 573        int error = -ENOMEM;
 574
 575        if (fl == NULL)
 576                return ERR_PTR(error);
 577
 578        error = lease_init(filp, type, fl);
 579        if (error) {
 580                locks_free_lock(fl);
 581                return ERR_PTR(error);
 582        }
 583        return fl;
 584}
 585
 586/* Check if two locks overlap each other.
 587 */
 588static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 589{
 590        return ((fl1->fl_end >= fl2->fl_start) &&
 591                (fl2->fl_end >= fl1->fl_start));
 592}
 593
 594/*
 595 * Check whether two locks have the same owner.
 596 */
 597static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 598{
 599        return fl1->fl_owner == fl2->fl_owner;
 600}
 601
 602/* Must be called with the flc_lock held! */
 603static void locks_insert_global_locks(struct file_lock *fl)
 604{
 605        struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
 606
 607        percpu_rwsem_assert_held(&file_rwsem);
 608
 609        spin_lock(&fll->lock);
 610        fl->fl_link_cpu = smp_processor_id();
 611        hlist_add_head(&fl->fl_link, &fll->hlist);
 612        spin_unlock(&fll->lock);
 613}
 614
 615/* Must be called with the flc_lock held! */
 616static void locks_delete_global_locks(struct file_lock *fl)
 617{
 618        struct file_lock_list_struct *fll;
 619
 620        percpu_rwsem_assert_held(&file_rwsem);
 621
 622        /*
 623         * Avoid taking lock if already unhashed. This is safe since this check
 624         * is done while holding the flc_lock, and new insertions into the list
 625         * also require that it be held.
 626         */
 627        if (hlist_unhashed(&fl->fl_link))
 628                return;
 629
 630        fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
 631        spin_lock(&fll->lock);
 632        hlist_del_init(&fl->fl_link);
 633        spin_unlock(&fll->lock);
 634}
 635
 636static unsigned long
 637posix_owner_key(struct file_lock *fl)
 638{
 639        return (unsigned long)fl->fl_owner;
 640}
 641
 642static void locks_insert_global_blocked(struct file_lock *waiter)
 643{
 644        lockdep_assert_held(&blocked_lock_lock);
 645
 646        hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
 647}
 648
 649static void locks_delete_global_blocked(struct file_lock *waiter)
 650{
 651        lockdep_assert_held(&blocked_lock_lock);
 652
 653        hash_del(&waiter->fl_link);
 654}
 655
 656/* Remove waiter from blocker's block list.
 657 * When blocker ends up pointing to itself then the list is empty.
 658 *
 659 * Must be called with blocked_lock_lock held.
 660 */
 661static void __locks_delete_block(struct file_lock *waiter)
 662{
 663        locks_delete_global_blocked(waiter);
 664        list_del_init(&waiter->fl_blocked_member);
 665}
 666
 667static void __locks_wake_up_blocks(struct file_lock *blocker)
 668{
 669        while (!list_empty(&blocker->fl_blocked_requests)) {
 670                struct file_lock *waiter;
 671
 672                waiter = list_first_entry(&blocker->fl_blocked_requests,
 673                                          struct file_lock, fl_blocked_member);
 674                __locks_delete_block(waiter);
 675                if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
 676                        waiter->fl_lmops->lm_notify(waiter);
 677                else
 678                        wake_up(&waiter->fl_wait);
 679
 680                /*
 681                 * The setting of fl_blocker to NULL marks the "done"
 682                 * point in deleting a block. Paired with acquire at the top
 683                 * of locks_delete_block().
 684                 */
 685                smp_store_release(&waiter->fl_blocker, NULL);
 686        }
 687}
 688
 689/**
 690 *      locks_delete_block - stop waiting for a file lock
 691 *      @waiter: the lock which was waiting
 692 *
 693 *      lockd/nfsd need to disconnect the lock while working on it.
 694 */
 695int locks_delete_block(struct file_lock *waiter)
 696{
 697        int status = -ENOENT;
 698
 699        /*
 700         * If fl_blocker is NULL, it won't be set again as this thread "owns"
 701         * the lock and is the only one that might try to claim the lock.
 702         *
 703         * We use acquire/release to manage fl_blocker so that we can
 704         * optimize away taking the blocked_lock_lock in many cases.
 705         *
 706         * The smp_load_acquire guarantees two things:
 707         *
 708         * 1/ that fl_blocked_requests can be tested locklessly. If something
 709         * was recently added to that list it must have been in a locked region
 710         * *before* the locked region when fl_blocker was set to NULL.
 711         *
 712         * 2/ that no other thread is accessing 'waiter', so it is safe to free
 713         * it.  __locks_wake_up_blocks is careful not to touch waiter after
 714         * fl_blocker is released.
 715         *
 716         * If a lockless check of fl_blocker shows it to be NULL, we know that
 717         * no new locks can be inserted into its fl_blocked_requests list, and
 718         * can avoid doing anything further if the list is empty.
 719         */
 720        if (!smp_load_acquire(&waiter->fl_blocker) &&
 721            list_empty(&waiter->fl_blocked_requests))
 722                return status;
 723
 724        spin_lock(&blocked_lock_lock);
 725        if (waiter->fl_blocker)
 726                status = 0;
 727        __locks_wake_up_blocks(waiter);
 728        __locks_delete_block(waiter);
 729
 730        /*
 731         * The setting of fl_blocker to NULL marks the "done" point in deleting
 732         * a block. Paired with acquire at the top of this function.
 733         */
 734        smp_store_release(&waiter->fl_blocker, NULL);
 735        spin_unlock(&blocked_lock_lock);
 736        return status;
 737}
 738EXPORT_SYMBOL(locks_delete_block);
 739
 740/* Insert waiter into blocker's block list.
 741 * We use a circular list so that processes can be easily woken up in
 742 * the order they blocked. The documentation doesn't require this but
 743 * it seems like the reasonable thing to do.
 744 *
 745 * Must be called with both the flc_lock and blocked_lock_lock held. The
 746 * fl_blocked_requests list itself is protected by the blocked_lock_lock,
 747 * but by ensuring that the flc_lock is also held on insertions we can avoid
 748 * taking the blocked_lock_lock in some cases when we see that the
 749 * fl_blocked_requests list is empty.
 750 *
 751 * Rather than just adding to the list, we check for conflicts with any existing
 752 * waiters, and add beneath any waiter that blocks the new waiter.
 753 * Thus wakeups don't happen until needed.
 754 */
 755static void __locks_insert_block(struct file_lock *blocker,
 756                                 struct file_lock *waiter,
 757                                 bool conflict(struct file_lock *,
 758                                               struct file_lock *))
 759{
 760        struct file_lock *fl;
 761        BUG_ON(!list_empty(&waiter->fl_blocked_member));
 762
 763new_blocker:
 764        list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member)
 765                if (conflict(fl, waiter)) {
 766                        blocker =  fl;
 767                        goto new_blocker;
 768                }
 769        waiter->fl_blocker = blocker;
 770        list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests);
 771        if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
 772                locks_insert_global_blocked(waiter);
 773
 774        /* The requests in waiter->fl_blocked are known to conflict with
 775         * waiter, but might not conflict with blocker, or the requests
 776         * and lock which block it.  So they all need to be woken.
 777         */
 778        __locks_wake_up_blocks(waiter);
 779}
 780
 781/* Must be called with flc_lock held. */
 782static void locks_insert_block(struct file_lock *blocker,
 783                               struct file_lock *waiter,
 784                               bool conflict(struct file_lock *,
 785                                             struct file_lock *))
 786{
 787        spin_lock(&blocked_lock_lock);
 788        __locks_insert_block(blocker, waiter, conflict);
 789        spin_unlock(&blocked_lock_lock);
 790}
 791
 792/*
 793 * Wake up processes blocked waiting for blocker.
 794 *
 795 * Must be called with the inode->flc_lock held!
 796 */
 797static void locks_wake_up_blocks(struct file_lock *blocker)
 798{
 799        /*
 800         * Avoid taking global lock if list is empty. This is safe since new
 801         * blocked requests are only added to the list under the flc_lock, and
 802         * the flc_lock is always held here. Note that removal from the
 803         * fl_blocked_requests list does not require the flc_lock, so we must
 804         * recheck list_empty() after acquiring the blocked_lock_lock.
 805         */
 806        if (list_empty(&blocker->fl_blocked_requests))
 807                return;
 808
 809        spin_lock(&blocked_lock_lock);
 810        __locks_wake_up_blocks(blocker);
 811        spin_unlock(&blocked_lock_lock);
 812}
 813
 814static void
 815locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
 816{
 817        list_add_tail(&fl->fl_list, before);
 818        locks_insert_global_locks(fl);
 819}
 820
 821static void
 822locks_unlink_lock_ctx(struct file_lock *fl)
 823{
 824        locks_delete_global_locks(fl);
 825        list_del_init(&fl->fl_list);
 826        locks_wake_up_blocks(fl);
 827}
 828
 829static void
 830locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose)
 831{
 832        locks_unlink_lock_ctx(fl);
 833        if (dispose)
 834                list_add(&fl->fl_list, dispose);
 835        else
 836                locks_free_lock(fl);
 837}
 838
 839/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
 840 * checks for shared/exclusive status of overlapping locks.
 841 */
 842static bool locks_conflict(struct file_lock *caller_fl,
 843                           struct file_lock *sys_fl)
 844{
 845        if (sys_fl->fl_type == F_WRLCK)
 846                return true;
 847        if (caller_fl->fl_type == F_WRLCK)
 848                return true;
 849        return false;
 850}
 851
 852/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
 853 * checking before calling the locks_conflict().
 854 */
 855static bool posix_locks_conflict(struct file_lock *caller_fl,
 856                                 struct file_lock *sys_fl)
 857{
 858        /* POSIX locks owned by the same process do not conflict with
 859         * each other.
 860         */
 861        if (posix_same_owner(caller_fl, sys_fl))
 862                return false;
 863
 864        /* Check whether they overlap */
 865        if (!locks_overlap(caller_fl, sys_fl))
 866                return false;
 867
 868        return locks_conflict(caller_fl, sys_fl);
 869}
 870
 871/* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK
 872 * path so checks for additional GETLK-specific things like F_UNLCK.
 873 */
 874static bool posix_test_locks_conflict(struct file_lock *caller_fl,
 875                                      struct file_lock *sys_fl)
 876{
 877        /* F_UNLCK checks any locks on the same fd. */
 878        if (caller_fl->fl_type == F_UNLCK) {
 879                if (!posix_same_owner(caller_fl, sys_fl))
 880                        return false;
 881                return locks_overlap(caller_fl, sys_fl);
 882        }
 883        return posix_locks_conflict(caller_fl, sys_fl);
 884}
 885
 886/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
 887 * checking before calling the locks_conflict().
 888 */
 889static bool flock_locks_conflict(struct file_lock *caller_fl,
 890                                 struct file_lock *sys_fl)
 891{
 892        /* FLOCK locks referring to the same filp do not conflict with
 893         * each other.
 894         */
 895        if (caller_fl->fl_file == sys_fl->fl_file)
 896                return false;
 897
 898        return locks_conflict(caller_fl, sys_fl);
 899}
 900
 901void
 902posix_test_lock(struct file *filp, struct file_lock *fl)
 903{
 904        struct file_lock *cfl;
 905        struct file_lock_context *ctx;
 906        struct inode *inode = file_inode(filp);
 907        void *owner;
 908        void (*func)(void);
 909
 910        ctx = locks_inode_context(inode);
 911        if (!ctx || list_empty_careful(&ctx->flc_posix)) {
 912                fl->fl_type = F_UNLCK;
 913                return;
 914        }
 915
 916retry:
 917        spin_lock(&ctx->flc_lock);
 918        list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
 919                if (!posix_test_locks_conflict(fl, cfl))
 920                        continue;
 921                if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
 922                        && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
 923                        owner = cfl->fl_lmops->lm_mod_owner;
 924                        func = cfl->fl_lmops->lm_expire_lock;
 925                        __module_get(owner);
 926                        spin_unlock(&ctx->flc_lock);
 927                        (*func)();
 928                        module_put(owner);
 929                        goto retry;
 930                }
 931                locks_copy_conflock(fl, cfl);
 932                goto out;
 933        }
 934        fl->fl_type = F_UNLCK;
 935out:
 936        spin_unlock(&ctx->flc_lock);
 937        return;
 938}
 939EXPORT_SYMBOL(posix_test_lock);
 940
 941/*
 942 * Deadlock detection:
 943 *
 944 * We attempt to detect deadlocks that are due purely to posix file
 945 * locks.
 946 *
 947 * We assume that a task can be waiting for at most one lock at a time.
 948 * So for any acquired lock, the process holding that lock may be
 949 * waiting on at most one other lock.  That lock in turns may be held by
 950 * someone waiting for at most one other lock.  Given a requested lock
 951 * caller_fl which is about to wait for a conflicting lock block_fl, we
 952 * follow this chain of waiters to ensure we are not about to create a
 953 * cycle.
 954 *
 955 * Since we do this before we ever put a process to sleep on a lock, we
 956 * are ensured that there is never a cycle; that is what guarantees that
 957 * the while() loop in posix_locks_deadlock() eventually completes.
 958 *
 959 * Note: the above assumption may not be true when handling lock
 960 * requests from a broken NFS client. It may also fail in the presence
 961 * of tasks (such as posix threads) sharing the same open file table.
 962 * To handle those cases, we just bail out after a few iterations.
 963 *
 964 * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
 965 * Because the owner is not even nominally tied to a thread of
 966 * execution, the deadlock detection below can't reasonably work well. Just
 967 * skip it for those.
 968 *
 969 * In principle, we could do a more limited deadlock detection on FL_OFDLCK
 970 * locks that just checks for the case where two tasks are attempting to
 971 * upgrade from read to write locks on the same inode.
 972 */
 973
 974#define MAX_DEADLK_ITERATIONS 10
 975
 976/* Find a lock that the owner of the given block_fl is blocking on. */
 977static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
 978{
 979        struct file_lock *fl;
 980
 981        hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
 982                if (posix_same_owner(fl, block_fl)) {
 983                        while (fl->fl_blocker)
 984                                fl = fl->fl_blocker;
 985                        return fl;
 986                }
 987        }
 988        return NULL;
 989}
 990
 991/* Must be called with the blocked_lock_lock held! */
 992static int posix_locks_deadlock(struct file_lock *caller_fl,
 993                                struct file_lock *block_fl)
 994{
 995        int i = 0;
 996
 997        lockdep_assert_held(&blocked_lock_lock);
 998
 999        /*
1000         * This deadlock detector can't reasonably detect deadlocks with
1001         * FL_OFDLCK locks, since they aren't owned by a process, per-se.
1002         */
1003        if (IS_OFDLCK(caller_fl))
1004                return 0;
1005
1006        while ((block_fl = what_owner_is_waiting_for(block_fl))) {
1007                if (i++ > MAX_DEADLK_ITERATIONS)
1008                        return 0;
1009                if (posix_same_owner(caller_fl, block_fl))
1010                        return 1;
1011        }
1012        return 0;
1013}
1014
1015/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
1016 * after any leases, but before any posix locks.
1017 *
1018 * Note that if called with an FL_EXISTS argument, the caller may determine
1019 * whether or not a lock was successfully freed by testing the return
1020 * value for -ENOENT.
1021 */
1022static int flock_lock_inode(struct inode *inode, struct file_lock *request)
1023{
1024        struct file_lock *new_fl = NULL;
1025        struct file_lock *fl;
1026        struct file_lock_context *ctx;
1027        int error = 0;
1028        bool found = false;
1029        LIST_HEAD(dispose);
1030
1031        ctx = locks_get_lock_context(inode, request->fl_type);
1032        if (!ctx) {
1033                if (request->fl_type != F_UNLCK)
1034                        return -ENOMEM;
1035                return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
1036        }
1037
1038        if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
1039                new_fl = locks_alloc_lock();
1040                if (!new_fl)
1041                        return -ENOMEM;
1042        }
1043
1044        percpu_down_read(&file_rwsem);
1045        spin_lock(&ctx->flc_lock);
1046        if (request->fl_flags & FL_ACCESS)
1047                goto find_conflict;
1048
1049        list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
1050                if (request->fl_file != fl->fl_file)
1051                        continue;
1052                if (request->fl_type == fl->fl_type)
1053                        goto out;
1054                found = true;
1055                locks_delete_lock_ctx(fl, &dispose);
1056                break;
1057        }
1058
1059        if (request->fl_type == F_UNLCK) {
1060                if ((request->fl_flags & FL_EXISTS) && !found)
1061                        error = -ENOENT;
1062                goto out;
1063        }
1064
1065find_conflict:
1066        list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
1067                if (!flock_locks_conflict(request, fl))
1068                        continue;
1069                error = -EAGAIN;
1070                if (!(request->fl_flags & FL_SLEEP))
1071                        goto out;
1072                error = FILE_LOCK_DEFERRED;
1073                locks_insert_block(fl, request, flock_locks_conflict);
1074                goto out;
1075        }
1076        if (request->fl_flags & FL_ACCESS)
1077                goto out;
1078        locks_copy_lock(new_fl, request);
1079        locks_move_blocks(new_fl, request);
1080        locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
1081        new_fl = NULL;
1082        error = 0;
1083
1084out:
1085        spin_unlock(&ctx->flc_lock);
1086        percpu_up_read(&file_rwsem);
1087        if (new_fl)
1088                locks_free_lock(new_fl);
1089        locks_dispose_list(&dispose);
1090        trace_flock_lock_inode(inode, request, error);
1091        return error;
1092}
1093
1094static int posix_lock_inode(struct inode *inode, struct file_lock *request,
1095                            struct file_lock *conflock)
1096{
1097        struct file_lock *fl, *tmp;
1098        struct file_lock *new_fl = NULL;
1099        struct file_lock *new_fl2 = NULL;
1100        struct file_lock *left = NULL;
1101        struct file_lock *right = NULL;
1102        struct file_lock_context *ctx;
1103        int error;
1104        bool added = false;
1105        LIST_HEAD(dispose);
1106        void *owner;
1107        void (*func)(void);
1108
1109        ctx = locks_get_lock_context(inode, request->fl_type);
1110        if (!ctx)
1111                return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
1112
1113        /*
1114         * We may need two file_lock structures for this operation,
1115         * so we get them in advance to avoid races.
1116         *
1117         * In some cases we can be sure, that no new locks will be needed
1118         */
1119        if (!(request->fl_flags & FL_ACCESS) &&
1120            (request->fl_type != F_UNLCK ||
1121             request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
1122                new_fl = locks_alloc_lock();
1123                new_fl2 = locks_alloc_lock();
1124        }
1125
1126retry:
1127        percpu_down_read(&file_rwsem);
1128        spin_lock(&ctx->flc_lock);
1129        /*
1130         * New lock request. Walk all POSIX locks and look for conflicts. If
1131         * there are any, either return error or put the request on the
1132         * blocker's list of waiters and the global blocked_hash.
1133         */
1134        if (request->fl_type != F_UNLCK) {
1135                list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1136                        if (!posix_locks_conflict(request, fl))
1137                                continue;
1138                        if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
1139                                && (*fl->fl_lmops->lm_lock_expirable)(fl)) {
1140                                owner = fl->fl_lmops->lm_mod_owner;
1141                                func = fl->fl_lmops->lm_expire_lock;
1142                                __module_get(owner);
1143                                spin_unlock(&ctx->flc_lock);
1144                                percpu_up_read(&file_rwsem);
1145                                (*func)();
1146                                module_put(owner);
1147                                goto retry;
1148                        }
1149                        if (conflock)
1150                                locks_copy_conflock(conflock, fl);
1151                        error = -EAGAIN;
1152                        if (!(request->fl_flags & FL_SLEEP))
1153                                goto out;
1154                        /*
1155                         * Deadlock detection and insertion into the blocked
1156                         * locks list must be done while holding the same lock!
1157                         */
1158                        error = -EDEADLK;
1159                        spin_lock(&blocked_lock_lock);
1160                        /*
1161                         * Ensure that we don't find any locks blocked on this
1162                         * request during deadlock detection.
1163                         */
1164                        __locks_wake_up_blocks(request);
1165                        if (likely(!posix_locks_deadlock(request, fl))) {
1166                                error = FILE_LOCK_DEFERRED;
1167                                __locks_insert_block(fl, request,
1168                                                     posix_locks_conflict);
1169                        }
1170                        spin_unlock(&blocked_lock_lock);
1171                        goto out;
1172                }
1173        }
1174
1175        /* If we're just looking for a conflict, we're done. */
1176        error = 0;
1177        if (request->fl_flags & FL_ACCESS)
1178                goto out;
1179
1180        /* Find the first old lock with the same owner as the new lock */
1181        list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1182                if (posix_same_owner(request, fl))
1183                        break;
1184        }
1185
1186        /* Process locks with this owner. */
1187        list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
1188                if (!posix_same_owner(request, fl))
1189                        break;
1190
1191                /* Detect adjacent or overlapping regions (if same lock type) */
1192                if (request->fl_type == fl->fl_type) {
1193                        /* In all comparisons of start vs end, use
1194                         * "start - 1" rather than "end + 1". If end
1195                         * is OFFSET_MAX, end + 1 will become negative.
1196                         */
1197                        if (fl->fl_end < request->fl_start - 1)
1198                                continue;
1199                        /* If the next lock in the list has entirely bigger
1200                         * addresses than the new one, insert the lock here.
1201                         */
1202                        if (fl->fl_start - 1 > request->fl_end)
1203                                break;
1204
1205                        /* If we come here, the new and old lock are of the
1206                         * same type and adjacent or overlapping. Make one
1207                         * lock yielding from the lower start address of both
1208                         * locks to the higher end address.
1209                         */
1210                        if (fl->fl_start > request->fl_start)
1211                                fl->fl_start = request->fl_start;
1212                        else
1213                                request->fl_start = fl->fl_start;
1214                        if (fl->fl_end < request->fl_end)
1215                                fl->fl_end = request->fl_end;
1216                        else
1217                                request->fl_end = fl->fl_end;
1218                        if (added) {
1219                                locks_delete_lock_ctx(fl, &dispose);
1220                                continue;
1221                        }
1222                        request = fl;
1223                        added = true;
1224                } else {
1225                        /* Processing for different lock types is a bit
1226                         * more complex.
1227                         */
1228                        if (fl->fl_end < request->fl_start)
1229                                continue;
1230                        if (fl->fl_start > request->fl_end)
1231                                break;
1232                        if (request->fl_type == F_UNLCK)
1233                                added = true;
1234                        if (fl->fl_start < request->fl_start)
1235                                left = fl;
1236                        /* If the next lock in the list has a higher end
1237                         * address than the new one, insert the new one here.
1238                         */
1239                        if (fl->fl_end > request->fl_end) {
1240                                right = fl;
1241                                break;
1242                        }
1243                        if (fl->fl_start >= request->fl_start) {
1244                                /* The new lock completely replaces an old
1245                                 * one (This may happen several times).
1246                                 */
1247                                if (added) {
1248                                        locks_delete_lock_ctx(fl, &dispose);
1249                                        continue;
1250                                }
1251                                /*
1252                                 * Replace the old lock with new_fl, and
1253                                 * remove the old one. It's safe to do the
1254                                 * insert here since we know that we won't be
1255                                 * using new_fl later, and that the lock is
1256                                 * just replacing an existing lock.
1257                                 */
1258                                error = -ENOLCK;
1259                                if (!new_fl)
1260                                        goto out;
1261                                locks_copy_lock(new_fl, request);
1262                                locks_move_blocks(new_fl, request);
1263                                request = new_fl;
1264                                new_fl = NULL;
1265                                locks_insert_lock_ctx(request, &fl->fl_list);
1266                                locks_delete_lock_ctx(fl, &dispose);
1267                                added = true;
1268                        }
1269                }
1270        }
1271
1272        /*
1273         * The above code only modifies existing locks in case of merging or
1274         * replacing. If new lock(s) need to be inserted all modifications are
1275         * done below this, so it's safe yet to bail out.
1276         */
1277        error = -ENOLCK; /* "no luck" */
1278        if (right && left == right && !new_fl2)
1279                goto out;
1280
1281        error = 0;
1282        if (!added) {
1283                if (request->fl_type == F_UNLCK) {
1284                        if (request->fl_flags & FL_EXISTS)
1285                                error = -ENOENT;
1286                        goto out;
1287                }
1288
1289                if (!new_fl) {
1290                        error = -ENOLCK;
1291                        goto out;
1292                }
1293                locks_copy_lock(new_fl, request);
1294                locks_move_blocks(new_fl, request);
1295                locks_insert_lock_ctx(new_fl, &fl->fl_list);
1296                fl = new_fl;
1297                new_fl = NULL;
1298        }
1299        if (right) {
1300                if (left == right) {
1301                        /* The new lock breaks the old one in two pieces,
1302                         * so we have to use the second new lock.
1303                         */
1304                        left = new_fl2;
1305                        new_fl2 = NULL;
1306                        locks_copy_lock(left, right);
1307                        locks_insert_lock_ctx(left, &fl->fl_list);
1308                }
1309                right->fl_start = request->fl_end + 1;
1310                locks_wake_up_blocks(right);
1311        }
1312        if (left) {
1313                left->fl_end = request->fl_start - 1;
1314                locks_wake_up_blocks(left);
1315        }
1316 out:
1317        spin_unlock(&ctx->flc_lock);
1318        percpu_up_read(&file_rwsem);
1319        trace_posix_lock_inode(inode, request, error);
1320        /*
1321         * Free any unused locks.
1322         */
1323        if (new_fl)
1324                locks_free_lock(new_fl);
1325        if (new_fl2)
1326                locks_free_lock(new_fl2);
1327        locks_dispose_list(&dispose);
1328
1329        return error;
1330}
1331
1332/**
1333 * posix_lock_file - Apply a POSIX-style lock to a file
1334 * @filp: The file to apply the lock to
1335 * @fl: The lock to be applied
1336 * @conflock: Place to return a copy of the conflicting lock, if found.
1337 *
1338 * Add a POSIX style lock to a file.
1339 * We merge adjacent & overlapping locks whenever possible.
1340 * POSIX locks are sorted by owner task, then by starting address
1341 *
1342 * Note that if called with an FL_EXISTS argument, the caller may determine
1343 * whether or not a lock was successfully freed by testing the return
1344 * value for -ENOENT.
1345 */
1346int posix_lock_file(struct file *filp, struct file_lock *fl,
1347                        struct file_lock *conflock)
1348{
1349        return posix_lock_inode(file_inode(filp), fl, conflock);
1350}
1351EXPORT_SYMBOL(posix_lock_file);
1352
1353/**
1354 * posix_lock_inode_wait - Apply a POSIX-style lock to a file
1355 * @inode: inode of file to which lock request should be applied
1356 * @fl: The lock to be applied
1357 *
1358 * Apply a POSIX style lock request to an inode.
1359 */
1360static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1361{
1362        int error;
1363        might_sleep ();
1364        for (;;) {
1365                error = posix_lock_inode(inode, fl, NULL);
1366                if (error != FILE_LOCK_DEFERRED)
1367                        break;
1368                error = wait_event_interruptible(fl->fl_wait,
1369                                        list_empty(&fl->fl_blocked_member));
1370                if (error)
1371                        break;
1372        }
1373        locks_delete_block(fl);
1374        return error;
1375}
1376
1377static void lease_clear_pending(struct file_lock *fl, int arg)
1378{
1379        switch (arg) {
1380        case F_UNLCK:
1381                fl->fl_flags &= ~FL_UNLOCK_PENDING;
1382                fallthrough;
1383        case F_RDLCK:
1384                fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
1385        }
1386}
1387
1388/* We already had a lease on this file; just change its type */
1389int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
1390{
1391        int error = assign_type(fl, arg);
1392
1393        if (error)
1394                return error;
1395        lease_clear_pending(fl, arg);
1396        locks_wake_up_blocks(fl);
1397        if (arg == F_UNLCK) {
1398                struct file *filp = fl->fl_file;
1399
1400                f_delown(filp);
1401                filp->f_owner.signum = 0;
1402                fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
1403                if (fl->fl_fasync != NULL) {
1404                        printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1405                        fl->fl_fasync = NULL;
1406                }
1407                locks_delete_lock_ctx(fl, dispose);
1408        }
1409        return 0;
1410}
1411EXPORT_SYMBOL(lease_modify);
1412
1413static bool past_time(unsigned long then)
1414{
1415        if (!then)
1416                /* 0 is a special value meaning "this never expires": */
1417                return false;
1418        return time_after(jiffies, then);
1419}
1420
1421static void time_out_leases(struct inode *inode, struct list_head *dispose)
1422{
1423        struct file_lock_context *ctx = inode->i_flctx;
1424        struct file_lock *fl, *tmp;
1425
1426        lockdep_assert_held(&ctx->flc_lock);
1427
1428        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1429                trace_time_out_leases(inode, fl);
1430                if (past_time(fl->fl_downgrade_time))
1431                        lease_modify(fl, F_RDLCK, dispose);
1432                if (past_time(fl->fl_break_time))
1433                        lease_modify(fl, F_UNLCK, dispose);
1434        }
1435}
1436
1437static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1438{
1439        bool rc;
1440
1441        if (lease->fl_lmops->lm_breaker_owns_lease
1442                        && lease->fl_lmops->lm_breaker_owns_lease(lease))
1443                return false;
1444        if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
1445                rc = false;
1446                goto trace;
1447        }
1448        if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
1449                rc = false;
1450                goto trace;
1451        }
1452
1453        rc = locks_conflict(breaker, lease);
1454trace:
1455        trace_leases_conflict(rc, lease, breaker);
1456        return rc;
1457}
1458
1459static bool
1460any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1461{
1462        struct file_lock_context *ctx = inode->i_flctx;
1463        struct file_lock *fl;
1464
1465        lockdep_assert_held(&ctx->flc_lock);
1466
1467        list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1468                if (leases_conflict(fl, breaker))
1469                        return true;
1470        }
1471        return false;
1472}
1473
1474/**
1475 *      __break_lease   -       revoke all outstanding leases on file
1476 *      @inode: the inode of the file to return
1477 *      @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1478 *          break all leases
1479 *      @type: FL_LEASE: break leases and delegations; FL_DELEG: break
1480 *          only delegations
1481 *
1482 *      break_lease (inlined for speed) has checked there already is at least
1483 *      some kind of lock (maybe a lease) on this file.  Leases are broken on
1484 *      a call to open() or truncate().  This function can sleep unless you
1485 *      specified %O_NONBLOCK to your open().
1486 */
1487int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1488{
1489        int error = 0;
1490        struct file_lock_context *ctx;
1491        struct file_lock *new_fl, *fl, *tmp;
1492        unsigned long break_time;
1493        int want_write = (mode & O_ACCMODE) != O_RDONLY;
1494        LIST_HEAD(dispose);
1495
1496        new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1497        if (IS_ERR(new_fl))
1498                return PTR_ERR(new_fl);
1499        new_fl->fl_flags = type;
1500
1501        /* typically we will check that ctx is non-NULL before calling */
1502        ctx = locks_inode_context(inode);
1503        if (!ctx) {
1504                WARN_ON_ONCE(1);
1505                goto free_lock;
1506        }
1507
1508        percpu_down_read(&file_rwsem);
1509        spin_lock(&ctx->flc_lock);
1510
1511        time_out_leases(inode, &dispose);
1512
1513        if (!any_leases_conflict(inode, new_fl))
1514                goto out;
1515
1516        break_time = 0;
1517        if (lease_break_time > 0) {
1518                break_time = jiffies + lease_break_time * HZ;
1519                if (break_time == 0)
1520                        break_time++;   /* so that 0 means no break time */
1521        }
1522
1523        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1524                if (!leases_conflict(fl, new_fl))
1525                        continue;
1526                if (want_write) {
1527                        if (fl->fl_flags & FL_UNLOCK_PENDING)
1528                                continue;
1529                        fl->fl_flags |= FL_UNLOCK_PENDING;
1530                        fl->fl_break_time = break_time;
1531                } else {
1532                        if (lease_breaking(fl))
1533                                continue;
1534                        fl->fl_flags |= FL_DOWNGRADE_PENDING;
1535                        fl->fl_downgrade_time = break_time;
1536                }
1537                if (fl->fl_lmops->lm_break(fl))
1538                        locks_delete_lock_ctx(fl, &dispose);
1539        }
1540
1541        if (list_empty(&ctx->flc_lease))
1542                goto out;
1543
1544        if (mode & O_NONBLOCK) {
1545                trace_break_lease_noblock(inode, new_fl);
1546                error = -EWOULDBLOCK;
1547                goto out;
1548        }
1549
1550restart:
1551        fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
1552        break_time = fl->fl_break_time;
1553        if (break_time != 0)
1554                break_time -= jiffies;
1555        if (break_time == 0)
1556                break_time++;
1557        locks_insert_block(fl, new_fl, leases_conflict);
1558        trace_break_lease_block(inode, new_fl);
1559        spin_unlock(&ctx->flc_lock);
1560        percpu_up_read(&file_rwsem);
1561
1562        locks_dispose_list(&dispose);
1563        error = wait_event_interruptible_timeout(new_fl->fl_wait,
1564                                        list_empty(&new_fl->fl_blocked_member),
1565                                        break_time);
1566
1567        percpu_down_read(&file_rwsem);
1568        spin_lock(&ctx->flc_lock);
1569        trace_break_lease_unblock(inode, new_fl);
1570        locks_delete_block(new_fl);
1571        if (error >= 0) {
1572                /*
1573                 * Wait for the next conflicting lease that has not been
1574                 * broken yet
1575                 */
1576                if (error == 0)
1577                        time_out_leases(inode, &dispose);
1578                if (any_leases_conflict(inode, new_fl))
1579                        goto restart;
1580                error = 0;
1581        }
1582out:
1583        spin_unlock(&ctx->flc_lock);
1584        percpu_up_read(&file_rwsem);
1585        locks_dispose_list(&dispose);
1586free_lock:
1587        locks_free_lock(new_fl);
1588        return error;
1589}
1590EXPORT_SYMBOL(__break_lease);
1591
1592/**
1593 *      lease_get_mtime - update modified time of an inode with exclusive lease
1594 *      @inode: the inode
1595 *      @time:  pointer to a timespec which contains the last modified time
1596 *
1597 * This is to force NFS clients to flush their caches for files with
1598 * exclusive leases.  The justification is that if someone has an
1599 * exclusive lease, then they could be modifying it.
1600 */
1601void lease_get_mtime(struct inode *inode, struct timespec64 *time)
1602{
1603        bool has_lease = false;
1604        struct file_lock_context *ctx;
1605        struct file_lock *fl;
1606
1607        ctx = locks_inode_context(inode);
1608        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1609                spin_lock(&ctx->flc_lock);
1610                fl = list_first_entry_or_null(&ctx->flc_lease,
1611                                              struct file_lock, fl_list);
1612                if (fl && (fl->fl_type == F_WRLCK))
1613                        has_lease = true;
1614                spin_unlock(&ctx->flc_lock);
1615        }
1616
1617        if (has_lease)
1618                *time = current_time(inode);
1619}
1620EXPORT_SYMBOL(lease_get_mtime);
1621
1622/**
1623 *      fcntl_getlease - Enquire what lease is currently active
1624 *      @filp: the file
1625 *
1626 *      The value returned by this function will be one of
1627 *      (if no lease break is pending):
1628 *
1629 *      %F_RDLCK to indicate a shared lease is held.
1630 *
1631 *      %F_WRLCK to indicate an exclusive lease is held.
1632 *
1633 *      %F_UNLCK to indicate no lease is held.
1634 *
1635 *      (if a lease break is pending):
1636 *
1637 *      %F_RDLCK to indicate an exclusive lease needs to be
1638 *              changed to a shared lease (or removed).
1639 *
1640 *      %F_UNLCK to indicate the lease needs to be removed.
1641 *
1642 *      XXX: sfr & willy disagree over whether F_INPROGRESS
1643 *      should be returned to userspace.
1644 */
1645int fcntl_getlease(struct file *filp)
1646{
1647        struct file_lock *fl;
1648        struct inode *inode = file_inode(filp);
1649        struct file_lock_context *ctx;
1650        int type = F_UNLCK;
1651        LIST_HEAD(dispose);
1652
1653        ctx = locks_inode_context(inode);
1654        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1655                percpu_down_read(&file_rwsem);
1656                spin_lock(&ctx->flc_lock);
1657                time_out_leases(inode, &dispose);
1658                list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1659                        if (fl->fl_file != filp)
1660                                continue;
1661                        type = target_leasetype(fl);
1662                        break;
1663                }
1664                spin_unlock(&ctx->flc_lock);
1665                percpu_up_read(&file_rwsem);
1666
1667                locks_dispose_list(&dispose);
1668        }
1669        return type;
1670}
1671
1672/**
1673 * check_conflicting_open - see if the given file points to an inode that has
1674 *                          an existing open that would conflict with the
1675 *                          desired lease.
1676 * @filp:       file to check
1677 * @arg:        type of lease that we're trying to acquire
1678 * @flags:      current lock flags
1679 *
1680 * Check to see if there's an existing open fd on this file that would
1681 * conflict with the lease we're trying to set.
1682 */
1683static int
1684check_conflicting_open(struct file *filp, const int arg, int flags)
1685{
1686        struct inode *inode = file_inode(filp);
1687        int self_wcount = 0, self_rcount = 0;
1688
1689        if (flags & FL_LAYOUT)
1690                return 0;
1691        if (flags & FL_DELEG)
1692                /* We leave these checks to the caller */
1693                return 0;
1694
1695        if (arg == F_RDLCK)
1696                return inode_is_open_for_write(inode) ? -EAGAIN : 0;
1697        else if (arg != F_WRLCK)
1698                return 0;
1699
1700        /*
1701         * Make sure that only read/write count is from lease requestor.
1702         * Note that this will result in denying write leases when i_writecount
1703         * is negative, which is what we want.  (We shouldn't grant write leases
1704         * on files open for execution.)
1705         */
1706        if (filp->f_mode & FMODE_WRITE)
1707                self_wcount = 1;
1708        else if (filp->f_mode & FMODE_READ)
1709                self_rcount = 1;
1710
1711        if (atomic_read(&inode->i_writecount) != self_wcount ||
1712            atomic_read(&inode->i_readcount) != self_rcount)
1713                return -EAGAIN;
1714
1715        return 0;
1716}
1717
1718static int
1719generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **priv)
1720{
1721        struct file_lock *fl, *my_fl = NULL, *lease;
1722        struct inode *inode = file_inode(filp);
1723        struct file_lock_context *ctx;
1724        bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1725        int error;
1726        LIST_HEAD(dispose);
1727
1728        lease = *flp;
1729        trace_generic_add_lease(inode, lease);
1730
1731        /* Note that arg is never F_UNLCK here */
1732        ctx = locks_get_lock_context(inode, arg);
1733        if (!ctx)
1734                return -ENOMEM;
1735
1736        /*
1737         * In the delegation case we need mutual exclusion with
1738         * a number of operations that take the i_mutex.  We trylock
1739         * because delegations are an optional optimization, and if
1740         * there's some chance of a conflict--we'd rather not
1741         * bother, maybe that's a sign this just isn't a good file to
1742         * hand out a delegation on.
1743         */
1744        if (is_deleg && !inode_trylock(inode))
1745                return -EAGAIN;
1746
1747        percpu_down_read(&file_rwsem);
1748        spin_lock(&ctx->flc_lock);
1749        time_out_leases(inode, &dispose);
1750        error = check_conflicting_open(filp, arg, lease->fl_flags);
1751        if (error)
1752                goto out;
1753
1754        /*
1755         * At this point, we know that if there is an exclusive
1756         * lease on this file, then we hold it on this filp
1757         * (otherwise our open of this file would have blocked).
1758         * And if we are trying to acquire an exclusive lease,
1759         * then the file is not open by anyone (including us)
1760         * except for this filp.
1761         */
1762        error = -EAGAIN;
1763        list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1764                if (fl->fl_file == filp &&
1765                    fl->fl_owner == lease->fl_owner) {
1766                        my_fl = fl;
1767                        continue;
1768                }
1769
1770                /*
1771                 * No exclusive leases if someone else has a lease on
1772                 * this file:
1773                 */
1774                if (arg == F_WRLCK)
1775                        goto out;
1776                /*
1777                 * Modifying our existing lease is OK, but no getting a
1778                 * new lease if someone else is opening for write:
1779                 */
1780                if (fl->fl_flags & FL_UNLOCK_PENDING)
1781                        goto out;
1782        }
1783
1784        if (my_fl != NULL) {
1785                lease = my_fl;
1786                error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1787                if (error)
1788                        goto out;
1789                goto out_setup;
1790        }
1791
1792        error = -EINVAL;
1793        if (!leases_enable)
1794                goto out;
1795
1796        locks_insert_lock_ctx(lease, &ctx->flc_lease);
1797        /*
1798         * The check in break_lease() is lockless. It's possible for another
1799         * open to race in after we did the earlier check for a conflicting
1800         * open but before the lease was inserted. Check again for a
1801         * conflicting open and cancel the lease if there is one.
1802         *
1803         * We also add a barrier here to ensure that the insertion of the lock
1804         * precedes these checks.
1805         */
1806        smp_mb();
1807        error = check_conflicting_open(filp, arg, lease->fl_flags);
1808        if (error) {
1809                locks_unlink_lock_ctx(lease);
1810                goto out;
1811        }
1812
1813out_setup:
1814        if (lease->fl_lmops->lm_setup)
1815                lease->fl_lmops->lm_setup(lease, priv);
1816out:
1817        spin_unlock(&ctx->flc_lock);
1818        percpu_up_read(&file_rwsem);
1819        locks_dispose_list(&dispose);
1820        if (is_deleg)
1821                inode_unlock(inode);
1822        if (!error && !my_fl)
1823                *flp = NULL;
1824        return error;
1825}
1826
1827static int generic_delete_lease(struct file *filp, void *owner)
1828{
1829        int error = -EAGAIN;
1830        struct file_lock *fl, *victim = NULL;
1831        struct inode *inode = file_inode(filp);
1832        struct file_lock_context *ctx;
1833        LIST_HEAD(dispose);
1834
1835        ctx = locks_inode_context(inode);
1836        if (!ctx) {
1837                trace_generic_delete_lease(inode, NULL);
1838                return error;
1839        }
1840
1841        percpu_down_read(&file_rwsem);
1842        spin_lock(&ctx->flc_lock);
1843        list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1844                if (fl->fl_file == filp &&
1845                    fl->fl_owner == owner) {
1846                        victim = fl;
1847                        break;
1848                }
1849        }
1850        trace_generic_delete_lease(inode, victim);
1851        if (victim)
1852                error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1853        spin_unlock(&ctx->flc_lock);
1854        percpu_up_read(&file_rwsem);
1855        locks_dispose_list(&dispose);
1856        return error;
1857}
1858
1859/**
1860 *      generic_setlease        -       sets a lease on an open file
1861 *      @filp:  file pointer
1862 *      @arg:   type of lease to obtain
1863 *      @flp:   input - file_lock to use, output - file_lock inserted
1864 *      @priv:  private data for lm_setup (may be NULL if lm_setup
1865 *              doesn't require it)
1866 *
1867 *      The (input) flp->fl_lmops->lm_break function is required
1868 *      by break_lease().
1869 */
1870int generic_setlease(struct file *filp, int arg, struct file_lock **flp,
1871                        void **priv)
1872{
1873        struct inode *inode = file_inode(filp);
1874        vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode);
1875        int error;
1876
1877        if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
1878                return -EACCES;
1879        if (!S_ISREG(inode->i_mode))
1880                return -EINVAL;
1881        error = security_file_lock(filp, arg);
1882        if (error)
1883                return error;
1884
1885        switch (arg) {
1886        case F_UNLCK:
1887                return generic_delete_lease(filp, *priv);
1888        case F_RDLCK:
1889        case F_WRLCK:
1890                if (!(*flp)->fl_lmops->lm_break) {
1891                        WARN_ON_ONCE(1);
1892                        return -ENOLCK;
1893                }
1894
1895                return generic_add_lease(filp, arg, flp, priv);
1896        default:
1897                return -EINVAL;
1898        }
1899}
1900EXPORT_SYMBOL(generic_setlease);
1901
1902/*
1903 * Kernel subsystems can register to be notified on any attempt to set
1904 * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
1905 * to close files that it may have cached when there is an attempt to set a
1906 * conflicting lease.
1907 */
1908static struct srcu_notifier_head lease_notifier_chain;
1909
1910static inline void
1911lease_notifier_chain_init(void)
1912{
1913        srcu_init_notifier_head(&lease_notifier_chain);
1914}
1915
1916static inline void
1917setlease_notifier(int arg, struct file_lock *lease)
1918{
1919        if (arg != F_UNLCK)
1920                srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
1921}
1922
1923int lease_register_notifier(struct notifier_block *nb)
1924{
1925        return srcu_notifier_chain_register(&lease_notifier_chain, nb);
1926}
1927EXPORT_SYMBOL_GPL(lease_register_notifier);
1928
1929void lease_unregister_notifier(struct notifier_block *nb)
1930{
1931        srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
1932}
1933EXPORT_SYMBOL_GPL(lease_unregister_notifier);
1934
1935/**
1936 * vfs_setlease        -       sets a lease on an open file
1937 * @filp:       file pointer
1938 * @arg:        type of lease to obtain
1939 * @lease:      file_lock to use when adding a lease
1940 * @priv:       private info for lm_setup when adding a lease (may be
1941 *              NULL if lm_setup doesn't require it)
1942 *
1943 * Call this to establish a lease on the file. The "lease" argument is not
1944 * used for F_UNLCK requests and may be NULL. For commands that set or alter
1945 * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
1946 * set; if not, this function will return -ENOLCK (and generate a scary-looking
1947 * stack trace).
1948 *
1949 * The "priv" pointer is passed directly to the lm_setup function as-is. It
1950 * may be NULL if the lm_setup operation doesn't require it.
1951 */
1952int
1953vfs_setlease(struct file *filp, int arg, struct file_lock **lease, void **priv)
1954{
1955        if (lease)
1956                setlease_notifier(arg, *lease);
1957        if (filp->f_op->setlease)
1958                return filp->f_op->setlease(filp, arg, lease, priv);
1959        else
1960                return generic_setlease(filp, arg, lease, priv);
1961}
1962EXPORT_SYMBOL_GPL(vfs_setlease);
1963
1964static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
1965{
1966        struct file_lock *fl;
1967        struct fasync_struct *new;
1968        int error;
1969
1970        fl = lease_alloc(filp, arg);
1971        if (IS_ERR(fl))
1972                return PTR_ERR(fl);
1973
1974        new = fasync_alloc();
1975        if (!new) {
1976                locks_free_lock(fl);
1977                return -ENOMEM;
1978        }
1979        new->fa_fd = fd;
1980
1981        error = vfs_setlease(filp, arg, &fl, (void **)&new);
1982        if (fl)
1983                locks_free_lock(fl);
1984        if (new)
1985                fasync_free(new);
1986        return error;
1987}
1988
1989/**
1990 *      fcntl_setlease  -       sets a lease on an open file
1991 *      @fd: open file descriptor
1992 *      @filp: file pointer
1993 *      @arg: type of lease to obtain
1994 *
1995 *      Call this fcntl to establish a lease on the file.
1996 *      Note that you also need to call %F_SETSIG to
1997 *      receive a signal when the lease is broken.
1998 */
1999int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
2000{
2001        if (arg == F_UNLCK)
2002                return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
2003        return do_fcntl_add_lease(fd, filp, arg);
2004}
2005
2006/**
2007 * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
2008 * @inode: inode of the file to apply to
2009 * @fl: The lock to be applied
2010 *
2011 * Apply a FLOCK style lock request to an inode.
2012 */
2013static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
2014{
2015        int error;
2016        might_sleep();
2017        for (;;) {
2018                error = flock_lock_inode(inode, fl);
2019                if (error != FILE_LOCK_DEFERRED)
2020                        break;
2021                error = wait_event_interruptible(fl->fl_wait,
2022                                list_empty(&fl->fl_blocked_member));
2023                if (error)
2024                        break;
2025        }
2026        locks_delete_block(fl);
2027        return error;
2028}
2029
2030/**
2031 * locks_lock_inode_wait - Apply a lock to an inode
2032 * @inode: inode of the file to apply to
2033 * @fl: The lock to be applied
2034 *
2035 * Apply a POSIX or FLOCK style lock request to an inode.
2036 */
2037int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
2038{
2039        int res = 0;
2040        switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
2041                case FL_POSIX:
2042                        res = posix_lock_inode_wait(inode, fl);
2043                        break;
2044                case FL_FLOCK:
2045                        res = flock_lock_inode_wait(inode, fl);
2046                        break;
2047                default:
2048                        BUG();
2049        }
2050        return res;
2051}
2052EXPORT_SYMBOL(locks_lock_inode_wait);
2053
2054/**
2055 *      sys_flock: - flock() system call.
2056 *      @fd: the file descriptor to lock.
2057 *      @cmd: the type of lock to apply.
2058 *
2059 *      Apply a %FL_FLOCK style lock to an open file descriptor.
2060 *      The @cmd can be one of:
2061 *
2062 *      - %LOCK_SH -- a shared lock.
2063 *      - %LOCK_EX -- an exclusive lock.
2064 *      - %LOCK_UN -- remove an existing lock.
2065 *      - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
2066 *
2067 *      %LOCK_MAND support has been removed from the kernel.
2068 */
2069SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2070{
2071        int can_sleep, error, type;
2072        struct file_lock fl;
2073        struct fd f;
2074
2075        /*
2076         * LOCK_MAND locks were broken for a long time in that they never
2077         * conflicted with one another and didn't prevent any sort of open,
2078         * read or write activity.
2079         *
2080         * Just ignore these requests now, to preserve legacy behavior, but
2081         * throw a warning to let people know that they don't actually work.
2082         */
2083        if (cmd & LOCK_MAND) {
2084                pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid);
2085                return 0;
2086        }
2087
2088        type = flock_translate_cmd(cmd & ~LOCK_NB);
2089        if (type < 0)
2090                return type;
2091
2092        error = -EBADF;
2093        f = fdget(fd);
2094        if (!f.file)
2095                return error;
2096
2097        if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
2098                goto out_putf;
2099
2100        flock_make_lock(f.file, &fl, type);
2101
2102        error = security_file_lock(f.file, fl.fl_type);
2103        if (error)
2104                goto out_putf;
2105
2106        can_sleep = !(cmd & LOCK_NB);
2107        if (can_sleep)
2108                fl.fl_flags |= FL_SLEEP;
2109
2110        if (f.file->f_op->flock)
2111                error = f.file->f_op->flock(f.file,
2112                                            (can_sleep) ? F_SETLKW : F_SETLK,
2113                                            &fl);
2114        else
2115                error = locks_lock_file_wait(f.file, &fl);
2116
2117        locks_release_private(&fl);
2118 out_putf:
2119        fdput(f);
2120
2121        return error;
2122}
2123
2124/**
2125 * vfs_test_lock - test file byte range lock
2126 * @filp: The file to test lock for
2127 * @fl: The lock to test; also used to hold result
2128 *
2129 * Returns -ERRNO on failure.  Indicates presence of conflicting lock by
2130 * setting conf->fl_type to something other than F_UNLCK.
2131 */
2132int vfs_test_lock(struct file *filp, struct file_lock *fl)
2133{
2134        WARN_ON_ONCE(filp != fl->fl_file);
2135        if (filp->f_op->lock)
2136                return filp->f_op->lock(filp, F_GETLK, fl);
2137        posix_test_lock(filp, fl);
2138        return 0;
2139}
2140EXPORT_SYMBOL_GPL(vfs_test_lock);
2141
2142/**
2143 * locks_translate_pid - translate a file_lock's fl_pid number into a namespace
2144 * @fl: The file_lock who's fl_pid should be translated
2145 * @ns: The namespace into which the pid should be translated
2146 *
2147 * Used to translate a fl_pid into a namespace virtual pid number
2148 */
2149static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
2150{
2151        pid_t vnr;
2152        struct pid *pid;
2153
2154        if (IS_OFDLCK(fl))
2155                return -1;
2156        if (IS_REMOTELCK(fl))
2157                return fl->fl_pid;
2158        /*
2159         * If the flock owner process is dead and its pid has been already
2160         * freed, the translation below won't work, but we still want to show
2161         * flock owner pid number in init pidns.
2162         */
2163        if (ns == &init_pid_ns)
2164                return (pid_t)fl->fl_pid;
2165
2166        rcu_read_lock();
2167        pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
2168        vnr = pid_nr_ns(pid, ns);
2169        rcu_read_unlock();
2170        return vnr;
2171}
2172
2173static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
2174{
2175        flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2176#if BITS_PER_LONG == 32
2177        /*
2178         * Make sure we can represent the posix lock via
2179         * legacy 32bit flock.
2180         */
2181        if (fl->fl_start > OFFT_OFFSET_MAX)
2182                return -EOVERFLOW;
2183        if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
2184                return -EOVERFLOW;
2185#endif
2186        flock->l_start = fl->fl_start;
2187        flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2188                fl->fl_end - fl->fl_start + 1;
2189        flock->l_whence = 0;
2190        flock->l_type = fl->fl_type;
2191        return 0;
2192}
2193
2194#if BITS_PER_LONG == 32
2195static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
2196{
2197        flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2198        flock->l_start = fl->fl_start;
2199        flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2200                fl->fl_end - fl->fl_start + 1;
2201        flock->l_whence = 0;
2202        flock->l_type = fl->fl_type;
2203}
2204#endif
2205
2206/* Report the first existing lock that would conflict with l.
2207 * This implements the F_GETLK command of fcntl().
2208 */
2209int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
2210{
2211        struct file_lock *fl;
2212        int error;
2213
2214        fl = locks_alloc_lock();
2215        if (fl == NULL)
2216                return -ENOMEM;
2217        error = -EINVAL;
2218        if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
2219                        && flock->l_type != F_WRLCK)
2220                goto out;
2221
2222        error = flock_to_posix_lock(filp, fl, flock);
2223        if (error)
2224                goto out;
2225
2226        if (cmd == F_OFD_GETLK) {
2227                error = -EINVAL;
2228                if (flock->l_pid != 0)
2229                        goto out;
2230
2231                fl->fl_flags |= FL_OFDLCK;
2232                fl->fl_owner = filp;
2233        }
2234
2235        error = vfs_test_lock(filp, fl);
2236        if (error)
2237                goto out;
2238
2239        flock->l_type = fl->fl_type;
2240        if (fl->fl_type != F_UNLCK) {
2241                error = posix_lock_to_flock(flock, fl);
2242                if (error)
2243                        goto out;
2244        }
2245out:
2246        locks_free_lock(fl);
2247        return error;
2248}
2249
2250/**
2251 * vfs_lock_file - file byte range lock
2252 * @filp: The file to apply the lock to
2253 * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
2254 * @fl: The lock to be applied
2255 * @conf: Place to return a copy of the conflicting lock, if found.
2256 *
2257 * A caller that doesn't care about the conflicting lock may pass NULL
2258 * as the final argument.
2259 *
2260 * If the filesystem defines a private ->lock() method, then @conf will
2261 * be left unchanged; so a caller that cares should initialize it to
2262 * some acceptable default.
2263 *
2264 * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
2265 * locks, the ->lock() interface may return asynchronously, before the lock has
2266 * been granted or denied by the underlying filesystem, if (and only if)
2267 * lm_grant is set. Additionally EXPORT_OP_ASYNC_LOCK in export_operations
2268 * flags need to be set.
2269 *
2270 * Callers expecting ->lock() to return asynchronously will only use F_SETLK,
2271 * not F_SETLKW; they will set FL_SLEEP if (and only if) the request is for a
2272 * blocking lock. When ->lock() does return asynchronously, it must return
2273 * FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock request completes.
2274 * If the request is for non-blocking lock the file system should return
2275 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
2276 * with the result. If the request timed out the callback routine will return a
2277 * nonzero return code and the file system should release the lock. The file
2278 * system is also responsible to keep a corresponding posix lock when it
2279 * grants a lock so the VFS can find out which locks are locally held and do
2280 * the correct lock cleanup when required.
2281 * The underlying filesystem must not drop the kernel lock or call
2282 * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2283 * return code.
2284 */
2285int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
2286{
2287        WARN_ON_ONCE(filp != fl->fl_file);
2288        if (filp->f_op->lock)
2289                return filp->f_op->lock(filp, cmd, fl);
2290        else
2291                return posix_lock_file(filp, fl, conf);
2292}
2293EXPORT_SYMBOL_GPL(vfs_lock_file);
2294
2295static int do_lock_file_wait(struct file *filp, unsigned int cmd,
2296                             struct file_lock *fl)
2297{
2298        int error;
2299
2300        error = security_file_lock(filp, fl->fl_type);
2301        if (error)
2302                return error;
2303
2304        for (;;) {
2305                error = vfs_lock_file(filp, cmd, fl, NULL);
2306                if (error != FILE_LOCK_DEFERRED)
2307                        break;
2308                error = wait_event_interruptible(fl->fl_wait,
2309                                        list_empty(&fl->fl_blocked_member));
2310                if (error)
2311                        break;
2312        }
2313        locks_delete_block(fl);
2314
2315        return error;
2316}
2317
2318/* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */
2319static int
2320check_fmode_for_setlk(struct file_lock *fl)
2321{
2322        switch (fl->fl_type) {
2323        case F_RDLCK:
2324                if (!(fl->fl_file->f_mode & FMODE_READ))
2325                        return -EBADF;
2326                break;
2327        case F_WRLCK:
2328                if (!(fl->fl_file->f_mode & FMODE_WRITE))
2329                        return -EBADF;
2330        }
2331        return 0;
2332}
2333
2334/* Apply the lock described by l to an open file descriptor.
2335 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2336 */
2337int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
2338                struct flock *flock)
2339{
2340        struct file_lock *file_lock = locks_alloc_lock();
2341        struct inode *inode = file_inode(filp);
2342        struct file *f;
2343        int error;
2344
2345        if (file_lock == NULL)
2346                return -ENOLCK;
2347
2348        error = flock_to_posix_lock(filp, file_lock, flock);
2349        if (error)
2350                goto out;
2351
2352        error = check_fmode_for_setlk(file_lock);
2353        if (error)
2354                goto out;
2355
2356        /*
2357         * If the cmd is requesting file-private locks, then set the
2358         * FL_OFDLCK flag and override the owner.
2359         */
2360        switch (cmd) {
2361        case F_OFD_SETLK:
2362                error = -EINVAL;
2363                if (flock->l_pid != 0)
2364                        goto out;
2365
2366                cmd = F_SETLK;
2367                file_lock->fl_flags |= FL_OFDLCK;
2368                file_lock->fl_owner = filp;
2369                break;
2370        case F_OFD_SETLKW:
2371                error = -EINVAL;
2372                if (flock->l_pid != 0)
2373                        goto out;
2374
2375                cmd = F_SETLKW;
2376                file_lock->fl_flags |= FL_OFDLCK;
2377                file_lock->fl_owner = filp;
2378                fallthrough;
2379        case F_SETLKW:
2380                file_lock->fl_flags |= FL_SLEEP;
2381        }
2382
2383        error = do_lock_file_wait(filp, cmd, file_lock);
2384
2385        /*
2386         * Attempt to detect a close/fcntl race and recover by releasing the
2387         * lock that was just acquired. There is no need to do that when we're
2388         * unlocking though, or for OFD locks.
2389         */
2390        if (!error && file_lock->fl_type != F_UNLCK &&
2391            !(file_lock->fl_flags & FL_OFDLCK)) {
2392                struct files_struct *files = current->files;
2393                /*
2394                 * We need that spin_lock here - it prevents reordering between
2395                 * update of i_flctx->flc_posix and check for it done in
2396                 * close(). rcu_read_lock() wouldn't do.
2397                 */
2398                spin_lock(&files->file_lock);
2399                f = files_lookup_fd_locked(files, fd);
2400                spin_unlock(&files->file_lock);
2401                if (f != filp) {
2402                        file_lock->fl_type = F_UNLCK;
2403                        error = do_lock_file_wait(filp, cmd, file_lock);
2404                        WARN_ON_ONCE(error);
2405                        error = -EBADF;
2406                }
2407        }
2408out:
2409        trace_fcntl_setlk(inode, file_lock, error);
2410        locks_free_lock(file_lock);
2411        return error;
2412}
2413
2414#if BITS_PER_LONG == 32
2415/* Report the first existing lock that would conflict with l.
2416 * This implements the F_GETLK command of fcntl().
2417 */
2418int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
2419{
2420        struct file_lock *fl;
2421        int error;
2422
2423        fl = locks_alloc_lock();
2424        if (fl == NULL)
2425                return -ENOMEM;
2426
2427        error = -EINVAL;
2428        if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
2429                        && flock->l_type != F_WRLCK)
2430                goto out;
2431
2432        error = flock64_to_posix_lock(filp, fl, flock);
2433        if (error)
2434                goto out;
2435
2436        if (cmd == F_OFD_GETLK) {
2437                error = -EINVAL;
2438                if (flock->l_pid != 0)
2439                        goto out;
2440
2441                fl->fl_flags |= FL_OFDLCK;
2442                fl->fl_owner = filp;
2443        }
2444
2445        error = vfs_test_lock(filp, fl);
2446        if (error)
2447                goto out;
2448
2449        flock->l_type = fl->fl_type;
2450        if (fl->fl_type != F_UNLCK)
2451                posix_lock_to_flock64(flock, fl);
2452
2453out:
2454        locks_free_lock(fl);
2455        return error;
2456}
2457
2458/* Apply the lock described by l to an open file descriptor.
2459 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2460 */
2461int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
2462                struct flock64 *flock)
2463{
2464        struct file_lock *file_lock = locks_alloc_lock();
2465        struct file *f;
2466        int error;
2467
2468        if (file_lock == NULL)
2469                return -ENOLCK;
2470
2471        error = flock64_to_posix_lock(filp, file_lock, flock);
2472        if (error)
2473                goto out;
2474
2475        error = check_fmode_for_setlk(file_lock);
2476        if (error)
2477                goto out;
2478
2479        /*
2480         * If the cmd is requesting file-private locks, then set the
2481         * FL_OFDLCK flag and override the owner.
2482         */
2483        switch (cmd) {
2484        case F_OFD_SETLK:
2485                error = -EINVAL;
2486                if (flock->l_pid != 0)
2487                        goto out;
2488
2489                cmd = F_SETLK64;
2490                file_lock->fl_flags |= FL_OFDLCK;
2491                file_lock->fl_owner = filp;
2492                break;
2493        case F_OFD_SETLKW:
2494                error = -EINVAL;
2495                if (flock->l_pid != 0)
2496                        goto out;
2497
2498                cmd = F_SETLKW64;
2499                file_lock->fl_flags |= FL_OFDLCK;
2500                file_lock->fl_owner = filp;
2501                fallthrough;
2502        case F_SETLKW64:
2503                file_lock->fl_flags |= FL_SLEEP;
2504        }
2505
2506        error = do_lock_file_wait(filp, cmd, file_lock);
2507
2508        /*
2509         * Attempt to detect a close/fcntl race and recover by releasing the
2510         * lock that was just acquired. There is no need to do that when we're
2511         * unlocking though, or for OFD locks.
2512         */
2513        if (!error && file_lock->fl_type != F_UNLCK &&
2514            !(file_lock->fl_flags & FL_OFDLCK)) {
2515                struct files_struct *files = current->files;
2516                /*
2517                 * We need that spin_lock here - it prevents reordering between
2518                 * update of i_flctx->flc_posix and check for it done in
2519                 * close(). rcu_read_lock() wouldn't do.
2520                 */
2521                spin_lock(&files->file_lock);
2522                f = files_lookup_fd_locked(files, fd);
2523                spin_unlock(&files->file_lock);
2524                if (f != filp) {
2525                        file_lock->fl_type = F_UNLCK;
2526                        error = do_lock_file_wait(filp, cmd, file_lock);
2527                        WARN_ON_ONCE(error);
2528                        error = -EBADF;
2529                }
2530        }
2531out:
2532        locks_free_lock(file_lock);
2533        return error;
2534}
2535#endif /* BITS_PER_LONG == 32 */
2536
2537/*
2538 * This function is called when the file is being removed
2539 * from the task's fd array.  POSIX locks belonging to this task
2540 * are deleted at this time.
2541 */
2542void locks_remove_posix(struct file *filp, fl_owner_t owner)
2543{
2544        int error;
2545        struct inode *inode = file_inode(filp);
2546        struct file_lock lock;
2547        struct file_lock_context *ctx;
2548
2549        /*
2550         * If there are no locks held on this file, we don't need to call
2551         * posix_lock_file().  Another process could be setting a lock on this
2552         * file at the same time, but we wouldn't remove that lock anyway.
2553         */
2554        ctx = locks_inode_context(inode);
2555        if (!ctx || list_empty(&ctx->flc_posix))
2556                return;
2557
2558        locks_init_lock(&lock);
2559        lock.fl_type = F_UNLCK;
2560        lock.fl_flags = FL_POSIX | FL_CLOSE;
2561        lock.fl_start = 0;
2562        lock.fl_end = OFFSET_MAX;
2563        lock.fl_owner = owner;
2564        lock.fl_pid = current->tgid;
2565        lock.fl_file = filp;
2566        lock.fl_ops = NULL;
2567        lock.fl_lmops = NULL;
2568
2569        error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
2570
2571        if (lock.fl_ops && lock.fl_ops->fl_release_private)
2572                lock.fl_ops->fl_release_private(&lock);
2573        trace_locks_remove_posix(inode, &lock, error);
2574}
2575EXPORT_SYMBOL(locks_remove_posix);
2576
2577/* The i_flctx must be valid when calling into here */
2578static void
2579locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
2580{
2581        struct file_lock fl;
2582        struct inode *inode = file_inode(filp);
2583
2584        if (list_empty(&flctx->flc_flock))
2585                return;
2586
2587        flock_make_lock(filp, &fl, F_UNLCK);
2588        fl.fl_flags |= FL_CLOSE;
2589
2590        if (filp->f_op->flock)
2591                filp->f_op->flock(filp, F_SETLKW, &fl);
2592        else
2593                flock_lock_inode(inode, &fl);
2594
2595        if (fl.fl_ops && fl.fl_ops->fl_release_private)
2596                fl.fl_ops->fl_release_private(&fl);
2597}
2598
2599/* The i_flctx must be valid when calling into here */
2600static void
2601locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
2602{
2603        struct file_lock *fl, *tmp;
2604        LIST_HEAD(dispose);
2605
2606        if (list_empty(&ctx->flc_lease))
2607                return;
2608
2609        percpu_down_read(&file_rwsem);
2610        spin_lock(&ctx->flc_lock);
2611        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
2612                if (filp == fl->fl_file)
2613                        lease_modify(fl, F_UNLCK, &dispose);
2614        spin_unlock(&ctx->flc_lock);
2615        percpu_up_read(&file_rwsem);
2616
2617        locks_dispose_list(&dispose);
2618}
2619
2620/*
2621 * This function is called on the last close of an open file.
2622 */
2623void locks_remove_file(struct file *filp)
2624{
2625        struct file_lock_context *ctx;
2626
2627        ctx = locks_inode_context(file_inode(filp));
2628        if (!ctx)
2629                return;
2630
2631        /* remove any OFD locks */
2632        locks_remove_posix(filp, filp);
2633
2634        /* remove flock locks */
2635        locks_remove_flock(filp, ctx);
2636
2637        /* remove any leases */
2638        locks_remove_lease(filp, ctx);
2639
2640        spin_lock(&ctx->flc_lock);
2641        locks_check_ctx_file_list(filp, &ctx->flc_posix, "POSIX");
2642        locks_check_ctx_file_list(filp, &ctx->flc_flock, "FLOCK");
2643        locks_check_ctx_file_list(filp, &ctx->flc_lease, "LEASE");
2644        spin_unlock(&ctx->flc_lock);
2645}
2646
2647/**
2648 * vfs_cancel_lock - file byte range unblock lock
2649 * @filp: The file to apply the unblock to
2650 * @fl: The lock to be unblocked
2651 *
2652 * Used by lock managers to cancel blocked requests
2653 */
2654int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2655{
2656        WARN_ON_ONCE(filp != fl->fl_file);
2657        if (filp->f_op->lock)
2658                return filp->f_op->lock(filp, F_CANCELLK, fl);
2659        return 0;
2660}
2661EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2662
2663/**
2664 * vfs_inode_has_locks - are any file locks held on @inode?
2665 * @inode: inode to check for locks
2666 *
2667 * Return true if there are any FL_POSIX or FL_FLOCK locks currently
2668 * set on @inode.
2669 */
2670bool vfs_inode_has_locks(struct inode *inode)
2671{
2672        struct file_lock_context *ctx;
2673        bool ret;
2674
2675        ctx = locks_inode_context(inode);
2676        if (!ctx)
2677                return false;
2678
2679        spin_lock(&ctx->flc_lock);
2680        ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
2681        spin_unlock(&ctx->flc_lock);
2682        return ret;
2683}
2684EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
2685
2686#ifdef CONFIG_PROC_FS
2687#include <linux/proc_fs.h>
2688#include <linux/seq_file.h>
2689
2690struct locks_iterator {
2691        int     li_cpu;
2692        loff_t  li_pos;
2693};
2694
2695static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2696                            loff_t id, char *pfx, int repeat)
2697{
2698        struct inode *inode = NULL;
2699        unsigned int fl_pid;
2700        struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
2701        int type;
2702
2703        fl_pid = locks_translate_pid(fl, proc_pidns);
2704        /*
2705         * If lock owner is dead (and pid is freed) or not visible in current
2706         * pidns, zero is shown as a pid value. Check lock info from
2707         * init_pid_ns to get saved lock pid value.
2708         */
2709
2710        if (fl->fl_file != NULL)
2711                inode = file_inode(fl->fl_file);
2712
2713        seq_printf(f, "%lld: ", id);
2714
2715        if (repeat)
2716                seq_printf(f, "%*s", repeat - 1 + (int)strlen(pfx), pfx);
2717
2718        if (IS_POSIX(fl)) {
2719                if (fl->fl_flags & FL_ACCESS)
2720                        seq_puts(f, "ACCESS");
2721                else if (IS_OFDLCK(fl))
2722                        seq_puts(f, "OFDLCK");
2723                else
2724                        seq_puts(f, "POSIX ");
2725
2726                seq_printf(f, " %s ",
2727                             (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
2728        } else if (IS_FLOCK(fl)) {
2729                seq_puts(f, "FLOCK  ADVISORY  ");
2730        } else if (IS_LEASE(fl)) {
2731                if (fl->fl_flags & FL_DELEG)
2732                        seq_puts(f, "DELEG  ");
2733                else
2734                        seq_puts(f, "LEASE  ");
2735
2736                if (lease_breaking(fl))
2737                        seq_puts(f, "BREAKING  ");
2738                else if (fl->fl_file)
2739                        seq_puts(f, "ACTIVE    ");
2740                else
2741                        seq_puts(f, "BREAKER   ");
2742        } else {
2743                seq_puts(f, "UNKNOWN UNKNOWN  ");
2744        }
2745        type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
2746
2747        seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
2748                             (type == F_RDLCK) ? "READ" : "UNLCK");
2749        if (inode) {
2750                /* userspace relies on this representation of dev_t */
2751                seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
2752                                MAJOR(inode->i_sb->s_dev),
2753                                MINOR(inode->i_sb->s_dev), inode->i_ino);
2754        } else {
2755                seq_printf(f, "%d <none>:0 ", fl_pid);
2756        }
2757        if (IS_POSIX(fl)) {
2758                if (fl->fl_end == OFFSET_MAX)
2759                        seq_printf(f, "%Ld EOF\n", fl->fl_start);
2760                else
2761                        seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2762        } else {
2763                seq_puts(f, "0 EOF\n");
2764        }
2765}
2766
2767static struct file_lock *get_next_blocked_member(struct file_lock *node)
2768{
2769        struct file_lock *tmp;
2770
2771        /* NULL node or root node */
2772        if (node == NULL || node->fl_blocker == NULL)
2773                return NULL;
2774
2775        /* Next member in the linked list could be itself */
2776        tmp = list_next_entry(node, fl_blocked_member);
2777        if (list_entry_is_head(tmp, &node->fl_blocker->fl_blocked_requests, fl_blocked_member)
2778                || tmp == node) {
2779                return NULL;
2780        }
2781
2782        return tmp;
2783}
2784
2785static int locks_show(struct seq_file *f, void *v)
2786{
2787        struct locks_iterator *iter = f->private;
2788        struct file_lock *cur, *tmp;
2789        struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
2790        int level = 0;
2791
2792        cur = hlist_entry(v, struct file_lock, fl_link);
2793
2794        if (locks_translate_pid(cur, proc_pidns) == 0)
2795                return 0;
2796
2797        /* View this crossed linked list as a binary tree, the first member of fl_blocked_requests
2798         * is the left child of current node, the next silibing in fl_blocked_member is the
2799         * right child, we can alse get the parent of current node from fl_blocker, so this
2800         * question becomes traversal of a binary tree
2801         */
2802        while (cur != NULL) {
2803                if (level)
2804                        lock_get_status(f, cur, iter->li_pos, "-> ", level);
2805                else
2806                        lock_get_status(f, cur, iter->li_pos, "", level);
2807
2808                if (!list_empty(&cur->fl_blocked_requests)) {
2809                        /* Turn left */
2810                        cur = list_first_entry_or_null(&cur->fl_blocked_requests,
2811                                struct file_lock, fl_blocked_member);
2812                        level++;
2813                } else {
2814                        /* Turn right */
2815                        tmp = get_next_blocked_member(cur);
2816                        /* Fall back to parent node */
2817                        while (tmp == NULL && cur->fl_blocker != NULL) {
2818                                cur = cur->fl_blocker;
2819                                level--;
2820                                tmp = get_next_blocked_member(cur);
2821                        }
2822                        cur = tmp;
2823                }
2824        }
2825
2826        return 0;
2827}
2828
2829static void __show_fd_locks(struct seq_file *f,
2830                        struct list_head *head, int *id,
2831                        struct file *filp, struct files_struct *files)
2832{
2833        struct file_lock *fl;
2834
2835        list_for_each_entry(fl, head, fl_list) {
2836
2837                if (filp != fl->fl_file)
2838                        continue;
2839                if (fl->fl_owner != files &&
2840                    fl->fl_owner != filp)
2841                        continue;
2842
2843                (*id)++;
2844                seq_puts(f, "lock:\t");
2845                lock_get_status(f, fl, *id, "", 0);
2846        }
2847}
2848
2849void show_fd_locks(struct seq_file *f,
2850                  struct file *filp, struct files_struct *files)
2851{
2852        struct inode *inode = file_inode(filp);
2853        struct file_lock_context *ctx;
2854        int id = 0;
2855
2856        ctx = locks_inode_context(inode);
2857        if (!ctx)
2858                return;
2859
2860        spin_lock(&ctx->flc_lock);
2861        __show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
2862        __show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
2863        __show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
2864        spin_unlock(&ctx->flc_lock);
2865}
2866
2867static void *locks_start(struct seq_file *f, loff_t *pos)
2868        __acquires(&blocked_lock_lock)
2869{
2870        struct locks_iterator *iter = f->private;
2871
2872        iter->li_pos = *pos + 1;
2873        percpu_down_write(&file_rwsem);
2874        spin_lock(&blocked_lock_lock);
2875        return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
2876}
2877
2878static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2879{
2880        struct locks_iterator *iter = f->private;
2881
2882        ++iter->li_pos;
2883        return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
2884}
2885
2886static void locks_stop(struct seq_file *f, void *v)
2887        __releases(&blocked_lock_lock)
2888{
2889        spin_unlock(&blocked_lock_lock);
2890        percpu_up_write(&file_rwsem);
2891}
2892
2893static const struct seq_operations locks_seq_operations = {
2894        .start  = locks_start,
2895        .next   = locks_next,
2896        .stop   = locks_stop,
2897        .show   = locks_show,
2898};
2899
2900static int __init proc_locks_init(void)
2901{
2902        proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
2903                        sizeof(struct locks_iterator), NULL);
2904        return 0;
2905}
2906fs_initcall(proc_locks_init);
2907#endif
2908
2909static int __init filelock_init(void)
2910{
2911        int i;
2912
2913        flctx_cache = kmem_cache_create("file_lock_ctx",
2914                        sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
2915
2916        filelock_cache = kmem_cache_create("file_lock_cache",
2917                        sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
2918
2919        for_each_possible_cpu(i) {
2920                struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
2921
2922                spin_lock_init(&fll->lock);
2923                INIT_HLIST_HEAD(&fll->hlist);
2924        }
2925
2926        lease_notifier_chain_init();
2927        return 0;
2928}
2929core_initcall(filelock_init);
2930