linux-old/fs/locks.c
<<
>>
Prefs
   1#define MSNFS   /* HACK HACK */
   2/*
   3 *  linux/fs/locks.c
   4 *
   5 *  Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
   6 *  Doug Evans (dje@spiff.uucp), August 07, 1992
   7 *
   8 *  Deadlock detection added.
   9 *  FIXME: one thing isn't handled yet:
  10 *      - mandatory locks (requires lots of changes elsewhere)
  11 *  Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994.
  12 *
  13 *  Miscellaneous edits, and a total rewrite of posix_lock_file() code.
  14 *  Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
  15 *  
  16 *  Converted file_lock_table to a linked list from an array, which eliminates
  17 *  the limits on how many active file locks are open.
  18 *  Chad Page (pageone@netcom.com), November 27, 1994
  19 * 
  20 *  Removed dependency on file descriptors. dup()'ed file descriptors now
  21 *  get the same locks as the original file descriptors, and a close() on
  22 *  any file descriptor removes ALL the locks on the file for the current
  23 *  process. Since locks still depend on the process id, locks are inherited
  24 *  after an exec() but not after a fork(). This agrees with POSIX, and both
  25 *  BSD and SVR4 practice.
  26 *  Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995
  27 *
  28 *  Scrapped free list which is redundant now that we allocate locks
  29 *  dynamically with kmalloc()/kfree().
  30 *  Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995
  31 *
  32 *  Implemented two lock personalities - FL_FLOCK and FL_POSIX.
  33 *
  34 *  FL_POSIX locks are created with calls to fcntl() and lockf() through the
  35 *  fcntl() system call. They have the semantics described above.
  36 *
  37 *  FL_FLOCK locks are created with calls to flock(), through the flock()
  38 *  system call, which is new. Old C libraries implement flock() via fcntl()
  39 *  and will continue to use the old, broken implementation.
  40 *
  41 *  FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated
  42 *  with a file pointer (filp). As a result they can be shared by a parent
  43 *  process and its children after a fork(). They are removed when the last
  44 *  file descriptor referring to the file pointer is closed (unless explicitly
  45 *  unlocked). 
  46 *
  47 *  FL_FLOCK locks never deadlock, an existing lock is always removed before
  48 *  upgrading from shared to exclusive (or vice versa). When this happens
  49 *  any processes blocked by the current lock are woken up and allowed to
  50 *  run before the new lock is applied.
  51 *  Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
  52 *
  53 *  Removed some race conditions in flock_lock_file(), marked other possible
  54 *  races. Just grep for FIXME to see them. 
  55 *  Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
  56 *
  57 *  Addressed Dmitry's concerns. Deadlock checking no longer recursive.
  58 *  Lock allocation changed to GFP_ATOMIC as we can't afford to sleep
  59 *  once we've checked for blocking and deadlocking.
  60 *  Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996.
  61 *
  62 *  Initial implementation of mandatory locks. SunOS turned out to be
  63 *  a rotten model, so I implemented the "obvious" semantics.
  64 *  See 'linux/Documentation/mandatory.txt' for details.
  65 *  Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
  66 *
  67 *  Don't allow mandatory locks on mmap()'ed files. Added simple functions to
  68 *  check if a file has mandatory locks, used by mmap(), open() and creat() to
  69 *  see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference
  70 *  Manual, Section 2.
  71 *  Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996.
  72 *
  73 *  Tidied up block list handling. Added '/proc/locks' interface.
  74 *  Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996.
  75 *
  76 *  Fixed deadlock condition for pathological code that mixes calls to
  77 *  flock() and fcntl().
  78 *  Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996.
  79 *
  80 *  Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use
  81 *  for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to
  82 *  guarantee sensible behaviour in the case where file system modules might
  83 *  be compiled with different options than the kernel itself.
  84 *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
  85 *
  86 *  Added a couple of missing wake_up() calls. Thanks to Thomas Meckel
  87 *  (Thomas.Meckel@mni.fh-giessen.de) for spotting this.
  88 *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
  89 *
  90 *  Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK
  91 *  locks. Changed process synchronisation to avoid dereferencing locks that
  92 *  have already been freed.
  93 *  Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996.
  94 *
  95 *  Made the block list a circular list to minimise searching in the list.
  96 *  Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996.
  97 *
  98 *  Made mandatory locking a mount option. Default is not to allow mandatory
  99 *  locking.
 100 *  Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996.
 101 *
 102 *  Some adaptations for NFS support.
 103 *  Olaf Kirch (okir@monad.swb.de), Dec 1996,
 104 *
 105 *  Fixed /proc/locks interface so that we can't overrun the buffer we are handed.
 106 *  Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997.
 107 *
 108 *  Use slab allocator instead of kmalloc/kfree.
 109 *  Use generic list implementation from <linux/list.h>.
 110 *  Sped up posix_locks_deadlock by only considering blocked locks.
 111 *  Matthew Wilcox <willy@thepuffingroup.com>, March, 2000.
 112 *
 113 *  Leases and LOCK_MAND
 114 *  Matthew Wilcox <willy@linuxcare.com>, June, 2000.
 115 *  Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000.
 116 */
 117
 118#include <linux/slab.h>
 119#include <linux/file.h>
 120#include <linux/smp_lock.h>
 121#include <linux/init.h>
 122#include <linux/capability.h>
 123#include <linux/sched.h>
 124#include <linux/timer.h>
 125
 126#include <asm/semaphore.h>
 127#include <asm/uaccess.h>
 128
 129int leases_enable = 1;
 130int lease_break_time = 45;
 131
 132LIST_HEAD(file_lock_list);
 133static LIST_HEAD(blocked_list);
 134
 135static kmem_cache_t *filelock_cache;
 136
 137/* Allocate an empty lock structure. */
 138static struct file_lock *locks_alloc_lock(int account)
 139{
 140        struct file_lock *fl;
 141        if (account && current->locks >= current->rlim[RLIMIT_LOCKS].rlim_cur)
 142                return NULL;
 143        fl = kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
 144        if (fl)
 145                current->locks++;
 146        return fl;
 147}
 148
 149/* Free a lock which is not in use. */
 150static inline void locks_free_lock(struct file_lock *fl)
 151{
 152        if (fl == NULL) {
 153                BUG();
 154                return;
 155        }
 156        current->locks--;
 157        if (waitqueue_active(&fl->fl_wait))
 158                panic("Attempting to free lock with active wait queue");
 159
 160        if (!list_empty(&fl->fl_block))
 161                panic("Attempting to free lock with active block list");
 162
 163        if (!list_empty(&fl->fl_link))
 164                panic("Attempting to free lock on active lock list");
 165
 166        kmem_cache_free(filelock_cache, fl);
 167}
 168
 169void locks_init_lock(struct file_lock *fl)
 170{
 171        INIT_LIST_HEAD(&fl->fl_link);
 172        INIT_LIST_HEAD(&fl->fl_block);
 173        init_waitqueue_head(&fl->fl_wait);
 174        fl->fl_next = NULL;
 175        fl->fl_fasync = NULL;
 176        fl->fl_owner = 0;
 177        fl->fl_pid = 0;
 178        fl->fl_file = NULL;
 179        fl->fl_flags = 0;
 180        fl->fl_type = 0;
 181        fl->fl_start = fl->fl_end = 0;
 182        fl->fl_notify = NULL;
 183        fl->fl_insert = NULL;
 184        fl->fl_remove = NULL;
 185}
 186
 187/*
 188 * Initialises the fields of the file lock which are invariant for
 189 * free file_locks.
 190 */
 191static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
 192{
 193        struct file_lock *lock = (struct file_lock *) foo;
 194
 195        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) !=
 196                                        SLAB_CTOR_CONSTRUCTOR)
 197                return;
 198
 199        locks_init_lock(lock);
 200}
 201
 202/*
 203 * Initialize a new lock from an existing file_lock structure.
 204 */
 205void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 206{
 207        new->fl_owner = fl->fl_owner;
 208        new->fl_pid = fl->fl_pid;
 209        new->fl_file = fl->fl_file;
 210        new->fl_flags = fl->fl_flags;
 211        new->fl_type = fl->fl_type;
 212        new->fl_start = fl->fl_start;
 213        new->fl_end = fl->fl_end;
 214        new->fl_notify = fl->fl_notify;
 215        new->fl_insert = fl->fl_insert;
 216        new->fl_remove = fl->fl_remove;
 217        new->fl_u = fl->fl_u;
 218}
 219
 220/* Fill in a file_lock structure with an appropriate FLOCK lock. */
 221static struct file_lock *flock_make_lock(struct file *filp, unsigned int type)
 222{
 223        struct file_lock *fl = locks_alloc_lock(1);
 224        if (fl == NULL)
 225                return NULL;
 226
 227        fl->fl_owner = NULL;
 228        fl->fl_file = filp;
 229        fl->fl_pid = current->pid;
 230        fl->fl_flags = FL_FLOCK;
 231        fl->fl_type = type;
 232        fl->fl_start = 0;
 233        fl->fl_end = OFFSET_MAX;
 234        fl->fl_notify = NULL;
 235        fl->fl_insert = NULL;
 236        fl->fl_remove = NULL;
 237        
 238        return fl;
 239}
 240
 241static int assign_type(struct file_lock *fl, int type)
 242{
 243        switch (type) {
 244        case F_RDLCK:
 245        case F_WRLCK:
 246        case F_UNLCK:
 247                fl->fl_type = type;
 248                break;
 249        default:
 250                return -EINVAL;
 251        }
 252        return 0;
 253}
 254
 255/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
 256 * style lock.
 257 */
 258static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 259                               struct flock *l)
 260{
 261        off_t start, end;
 262
 263        switch (l->l_whence) {
 264        case 0: /*SEEK_SET*/
 265                start = 0;
 266                break;
 267        case 1: /*SEEK_CUR*/
 268                start = filp->f_pos;
 269                break;
 270        case 2: /*SEEK_END*/
 271                start = filp->f_dentry->d_inode->i_size;
 272                break;
 273        default:
 274                return -EINVAL;
 275        }
 276
 277        if (((start += l->l_start) < 0) || (l->l_len < 0))
 278                return -EINVAL;
 279        end = start + l->l_len - 1;
 280        if (l->l_len > 0 && end < 0)
 281                return -EOVERFLOW;
 282        fl->fl_start = start;   /* we record the absolute position */
 283        fl->fl_end = end;
 284        if (l->l_len == 0)
 285                fl->fl_end = OFFSET_MAX;
 286        
 287        fl->fl_owner = current->files;
 288        fl->fl_pid = current->pid;
 289        fl->fl_file = filp;
 290        fl->fl_flags = FL_POSIX;
 291        fl->fl_notify = NULL;
 292        fl->fl_insert = NULL;
 293        fl->fl_remove = NULL;
 294
 295        return assign_type(fl, l->l_type);
 296}
 297
 298#if BITS_PER_LONG == 32
 299static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 300                                 struct flock64 *l)
 301{
 302        loff_t start;
 303
 304        switch (l->l_whence) {
 305        case 0: /*SEEK_SET*/
 306                start = 0;
 307                break;
 308        case 1: /*SEEK_CUR*/
 309                start = filp->f_pos;
 310                break;
 311        case 2: /*SEEK_END*/
 312                start = filp->f_dentry->d_inode->i_size;
 313                break;
 314        default:
 315                return -EINVAL;
 316        }
 317
 318        if (((start += l->l_start) < 0) || (l->l_len < 0))
 319                return -EINVAL;
 320        fl->fl_end = start + l->l_len - 1;
 321        if (l->l_len > 0 && fl->fl_end < 0)
 322                return -EOVERFLOW;
 323        fl->fl_start = start;   /* we record the absolute position */
 324        if (l->l_len == 0)
 325                fl->fl_end = OFFSET_MAX;
 326        
 327        fl->fl_owner = current->files;
 328        fl->fl_pid = current->pid;
 329        fl->fl_file = filp;
 330        fl->fl_flags = FL_POSIX;
 331        fl->fl_notify = NULL;
 332        fl->fl_insert = NULL;
 333        fl->fl_remove = NULL;
 334
 335        switch (l->l_type) {
 336        case F_RDLCK:
 337        case F_WRLCK:
 338        case F_UNLCK:
 339                fl->fl_type = l->l_type;
 340                break;
 341        default:
 342                return -EINVAL;
 343        }
 344
 345        return (0);
 346}
 347#endif
 348
 349/* Allocate a file_lock initialised to this type of lease */
 350static int lease_alloc(struct file *filp, int type, struct file_lock **flp)
 351{
 352        struct file_lock *fl = locks_alloc_lock(1);
 353        if (fl == NULL)
 354                return -ENOMEM;
 355
 356        fl->fl_owner = current->files;
 357        fl->fl_pid = current->pid;
 358
 359        fl->fl_file = filp;
 360        fl->fl_flags = FL_LEASE;
 361        if (assign_type(fl, type) != 0) {
 362                locks_free_lock(fl);
 363                return -EINVAL;
 364        }
 365        fl->fl_start = 0;
 366        fl->fl_end = OFFSET_MAX;
 367        fl->fl_notify = NULL;
 368        fl->fl_insert = NULL;
 369        fl->fl_remove = NULL;
 370
 371        *flp = fl;
 372        return 0;
 373}
 374
 375/* Check if two locks overlap each other.
 376 */
 377static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 378{
 379        return ((fl1->fl_end >= fl2->fl_start) &&
 380                (fl2->fl_end >= fl1->fl_start));
 381}
 382
 383/*
 384 * Check whether two locks have the same owner
 385 * N.B. Do we need the test on PID as well as owner?
 386 * (Clone tasks should be considered as one "owner".)
 387 */
 388static inline int
 389locks_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 390{
 391        return (fl1->fl_owner == fl2->fl_owner) &&
 392               (fl1->fl_pid   == fl2->fl_pid);
 393}
 394
 395/* Remove waiter from blocker's block list.
 396 * When blocker ends up pointing to itself then the list is empty.
 397 */
 398static void locks_delete_block(struct file_lock *waiter)
 399{
 400        list_del(&waiter->fl_block);
 401        INIT_LIST_HEAD(&waiter->fl_block);
 402        list_del(&waiter->fl_link);
 403        INIT_LIST_HEAD(&waiter->fl_link);
 404        waiter->fl_next = NULL;
 405}
 406
 407/* Insert waiter into blocker's block list.
 408 * We use a circular list so that processes can be easily woken up in
 409 * the order they blocked. The documentation doesn't require this but
 410 * it seems like the reasonable thing to do.
 411 */
 412static void locks_insert_block(struct file_lock *blocker, 
 413                               struct file_lock *waiter)
 414{
 415        if (!list_empty(&waiter->fl_block)) {
 416                printk(KERN_ERR "locks_insert_block: removing duplicated lock "
 417                        "(pid=%d %Ld-%Ld type=%d)\n", waiter->fl_pid,
 418                        waiter->fl_start, waiter->fl_end, waiter->fl_type);
 419                locks_delete_block(waiter);
 420        }
 421        list_add_tail(&waiter->fl_block, &blocker->fl_block);
 422        waiter->fl_next = blocker;
 423        list_add(&waiter->fl_link, &blocked_list);
 424}
 425
 426static inline
 427void locks_notify_blocked(struct file_lock *waiter)
 428{
 429        if (waiter->fl_notify)
 430                waiter->fl_notify(waiter);
 431        else
 432                wake_up(&waiter->fl_wait);
 433}
 434
 435/* Wake up processes blocked waiting for blocker.
 436 * If told to wait then schedule the processes until the block list
 437 * is empty, otherwise empty the block list ourselves.
 438 */
 439static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait)
 440{
 441        while (!list_empty(&blocker->fl_block)) {
 442                struct file_lock *waiter = list_entry(blocker->fl_block.next, struct file_lock, fl_block);
 443
 444                if (wait) {
 445                        locks_notify_blocked(waiter);
 446                        /* Let the blocked process remove waiter from the
 447                         * block list when it gets scheduled.
 448                         */
 449                        yield();
 450                } else {
 451                        /* Remove waiter from the block list, because by the
 452                         * time it wakes up blocker won't exist any more.
 453                         */
 454                        locks_delete_block(waiter);
 455                        locks_notify_blocked(waiter);
 456                }
 457        }
 458}
 459
 460/* Insert file lock fl into an inode's lock list at the position indicated
 461 * by pos. At the same time add the lock to the global file lock list.
 462 */
 463static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
 464{
 465        list_add(&fl->fl_link, &file_lock_list);
 466
 467        /* insert into file's list */
 468        fl->fl_next = *pos;
 469        *pos = fl;
 470
 471        if (fl->fl_insert)
 472                fl->fl_insert(fl);
 473}
 474
 475/*
 476 * Remove lock from the lock lists
 477 */
 478static inline void _unhash_lock(struct file_lock **thisfl_p)
 479{
 480        struct file_lock *fl = *thisfl_p;
 481
 482        *thisfl_p = fl->fl_next;
 483        fl->fl_next = NULL;
 484
 485        list_del_init(&fl->fl_link);
 486}
 487
 488/*
 489 * Wake up processes that are blocked waiting for this lock,
 490 * notify the FS that the lock has been cleared and
 491 * finally free the lock.
 492 */
 493static inline void _delete_lock(struct file_lock *fl, unsigned int wait)
 494{
 495        fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
 496        if (fl->fl_fasync != NULL){
 497                printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
 498                fl->fl_fasync = NULL;
 499        }
 500
 501        if (fl->fl_remove)
 502                fl->fl_remove(fl);
 503
 504        locks_wake_up_blocks(fl, wait);
 505        locks_free_lock(fl);
 506}
 507
 508/*
 509 * Delete a lock and then free it.
 510 */
 511static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait)
 512{
 513        struct file_lock *fl = *thisfl_p;
 514
 515        _unhash_lock(thisfl_p);
 516        _delete_lock(fl, wait);
 517}
 518
 519/*
 520 * Call back client filesystem in order to get it to unregister a lock,
 521 * then delete lock. Essentially useful only in locks_remove_*().
 522 * Note: this must be called with the semaphore already held!
 523 */
 524static inline void locks_unlock_delete(struct file_lock **thisfl_p)
 525{
 526        struct file_lock *fl = *thisfl_p;
 527        int (*lock)(struct file *, int, struct file_lock *);
 528
 529        _unhash_lock(thisfl_p);
 530        if (fl->fl_file->f_op &&
 531            (lock = fl->fl_file->f_op->lock) != NULL) {
 532                fl->fl_type = F_UNLCK;
 533                lock(fl->fl_file, F_SETLK, fl);
 534        }
 535        _delete_lock(fl, 0);
 536}
 537
 538/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
 539 * checks for shared/exclusive status of overlapping locks.
 540 */
 541static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 542{
 543        switch (caller_fl->fl_type) {
 544        case F_RDLCK:
 545                return (sys_fl->fl_type == F_WRLCK);
 546
 547        case F_WRLCK:
 548                return (1);
 549
 550        default:
 551                printk(KERN_ERR "locks_conflict(): impossible lock type - %d\n",
 552                       caller_fl->fl_type);
 553                break;
 554        }
 555        return (0);     /* This should never happen */
 556}
 557
 558/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
 559 * checking before calling the locks_conflict().
 560 */
 561static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 562{
 563        /* POSIX locks owned by the same process do not conflict with
 564         * each other.
 565         */
 566        if (!(sys_fl->fl_flags & FL_POSIX) ||
 567            locks_same_owner(caller_fl, sys_fl))
 568                return (0);
 569
 570        /* Check whether they overlap */
 571        if (!locks_overlap(caller_fl, sys_fl))
 572                return 0;
 573
 574        return (locks_conflict(caller_fl, sys_fl));
 575}
 576
 577/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
 578 * checking before calling the locks_conflict().
 579 */
 580static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 581{
 582        /* FLOCK locks referring to the same filp do not conflict with
 583         * each other.
 584         */
 585        if (!(sys_fl->fl_flags & FL_FLOCK) ||
 586            (caller_fl->fl_file == sys_fl->fl_file))
 587                return (0);
 588#ifdef MSNFS
 589        if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
 590                return 0;
 591#endif
 592
 593        return (locks_conflict(caller_fl, sys_fl));
 594}
 595
 596static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout)
 597{
 598        int result = 0;
 599        DECLARE_WAITQUEUE(wait, current);
 600
 601        current->state = TASK_INTERRUPTIBLE;
 602        add_wait_queue(fl_wait, &wait);
 603        if (timeout == 0)
 604                schedule();
 605        else
 606                result = schedule_timeout(timeout);
 607        if (signal_pending(current))
 608                result = -ERESTARTSYS;
 609        remove_wait_queue(fl_wait, &wait);
 610        current->state = TASK_RUNNING;
 611        return result;
 612}
 613
 614static int locks_block_on(struct file_lock *blocker, struct file_lock *waiter)
 615{
 616        int result;
 617        locks_insert_block(blocker, waiter);
 618        result = interruptible_sleep_on_locked(&waiter->fl_wait, 0);
 619        locks_delete_block(waiter);
 620        return result;
 621}
 622
 623static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time)
 624{
 625        int result;
 626        locks_insert_block(blocker, waiter);
 627        result = interruptible_sleep_on_locked(&waiter->fl_wait, time);
 628        locks_delete_block(waiter);
 629        return result;
 630}
 631
 632struct file_lock *
 633posix_test_lock(struct file *filp, struct file_lock *fl)
 634{
 635        struct file_lock *cfl;
 636
 637        lock_kernel();
 638        for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
 639                if (!(cfl->fl_flags & FL_POSIX))
 640                        continue;
 641                if (posix_locks_conflict(cfl, fl))
 642                        break;
 643        }
 644        unlock_kernel();
 645
 646        return (cfl);
 647}
 648
 649/* This function tests for deadlock condition before putting a process to
 650 * sleep. The detection scheme is no longer recursive. Recursive was neat,
 651 * but dangerous - we risked stack corruption if the lock data was bad, or
 652 * if the recursion was too deep for any other reason.
 653 *
 654 * We rely on the fact that a task can only be on one lock's wait queue
 655 * at a time. When we find blocked_task on a wait queue we can re-search
 656 * with blocked_task equal to that queue's owner, until either blocked_task
 657 * isn't found, or blocked_task is found on a queue owned by my_task.
 658 *
 659 * Note: the above assumption may not be true when handling lock requests
 660 * from a broken NFS client. But broken NFS clients have a lot more to
 661 * worry about than proper deadlock detection anyway... --okir
 662 */
 663int posix_locks_deadlock(struct file_lock *caller_fl,
 664                                struct file_lock *block_fl)
 665{
 666        struct list_head *tmp;
 667        fl_owner_t caller_owner, blocked_owner;
 668        unsigned int     caller_pid, blocked_pid;
 669
 670        caller_owner = caller_fl->fl_owner;
 671        caller_pid = caller_fl->fl_pid;
 672        blocked_owner = block_fl->fl_owner;
 673        blocked_pid = block_fl->fl_pid;
 674
 675next_task:
 676        if (caller_owner == blocked_owner && caller_pid == blocked_pid)
 677                return 1;
 678        list_for_each(tmp, &blocked_list) {
 679                struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
 680                if ((fl->fl_owner == blocked_owner)
 681                    && (fl->fl_pid == blocked_pid)) {
 682                        fl = fl->fl_next;
 683                        blocked_owner = fl->fl_owner;
 684                        blocked_pid = fl->fl_pid;
 685                        goto next_task;
 686                }
 687        }
 688        return 0;
 689}
 690
 691int locks_mandatory_locked(struct inode *inode)
 692{
 693        fl_owner_t owner = current->files;
 694        struct file_lock *fl;
 695
 696        /*
 697         * Search the lock list for this inode for any POSIX locks.
 698         */
 699        lock_kernel();
 700        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 701                if (!(fl->fl_flags & FL_POSIX))
 702                        continue;
 703                if (fl->fl_owner != owner)
 704                        break;
 705        }
 706        unlock_kernel();
 707        return fl ? -EAGAIN : 0;
 708}
 709
 710int locks_mandatory_area(int read_write, struct inode *inode,
 711                         struct file *filp, loff_t offset,
 712                         size_t count)
 713{
 714        struct file_lock *fl;
 715        struct file_lock *new_fl = locks_alloc_lock(0);
 716        int error;
 717
 718        if (new_fl == NULL)
 719                return -ENOMEM;
 720
 721        new_fl->fl_owner = current->files;
 722        new_fl->fl_pid = current->pid;
 723        new_fl->fl_file = filp;
 724        new_fl->fl_flags = FL_POSIX | FL_ACCESS;
 725        new_fl->fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
 726        new_fl->fl_start = offset;
 727        new_fl->fl_end = offset + count - 1;
 728
 729        error = 0;
 730        lock_kernel();
 731
 732repeat:
 733        /* Search the lock list for this inode for locks that conflict with
 734         * the proposed read/write.
 735         */
 736        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 737                if (!(fl->fl_flags & FL_POSIX))
 738                        continue;
 739                if (fl->fl_start > new_fl->fl_end)
 740                        break;
 741                if (posix_locks_conflict(new_fl, fl)) {
 742                        error = -EAGAIN;
 743                        if (filp && (filp->f_flags & O_NONBLOCK))
 744                                break;
 745                        error = -EDEADLK;
 746                        if (posix_locks_deadlock(new_fl, fl))
 747                                break;
 748        
 749                        error = locks_block_on(fl, new_fl);
 750                        if (error != 0)
 751                                break;
 752        
 753                        /*
 754                         * If we've been sleeping someone might have
 755                         * changed the permissions behind our back.
 756                         */
 757                        if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID)
 758                                break;
 759                        goto repeat;
 760                }
 761        }
 762        locks_free_lock(new_fl);
 763        unlock_kernel();
 764        return error;
 765}
 766
 767/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
 768 * at the head of the list, but that's secret knowledge known only to
 769 * flock_lock_file and posix_lock_file.
 770 */
 771static int flock_lock_file(struct file *filp, unsigned int lock_type,
 772                           unsigned int wait)
 773{
 774        struct file_lock *fl;
 775        struct file_lock *new_fl = NULL;
 776        struct file_lock **before;
 777        struct inode * inode = filp->f_dentry->d_inode;
 778        int error, change;
 779        int unlock = (lock_type == F_UNLCK);
 780
 781        /*
 782         * If we need a new lock, get it in advance to avoid races.
 783         */
 784        if (!unlock) {
 785                error = -ENOLCK;
 786                new_fl = flock_make_lock(filp, lock_type);
 787                if (!new_fl)
 788                        return error;
 789        }
 790
 791        error = 0;
 792search:
 793        change = 0;
 794        before = &inode->i_flock;
 795        while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) {
 796                if (filp == fl->fl_file) {
 797                        if (lock_type == fl->fl_type)
 798                                goto out;
 799                        change = 1;
 800                        break;
 801                }
 802                before = &fl->fl_next;
 803        }
 804        /* change means that we are changing the type of an existing lock,
 805         * or else unlocking it.
 806         */
 807        if (change) {
 808                /* N.B. What if the wait argument is false? */
 809                locks_delete_lock(before, !unlock);
 810                /*
 811                 * If we waited, another lock may have been added ...
 812                 */
 813                if (!unlock)
 814                        goto search;
 815        }
 816        if (unlock)
 817                goto out;
 818
 819repeat:
 820        for (fl = inode->i_flock; (fl != NULL) && (fl->fl_flags & FL_FLOCK);
 821             fl = fl->fl_next) {
 822                if (!flock_locks_conflict(new_fl, fl))
 823                        continue;
 824                error = -EAGAIN;
 825                if (!wait)
 826                        goto out;
 827                error = locks_block_on(fl, new_fl);
 828                if (error != 0)
 829                        goto out;
 830                goto repeat;
 831        }
 832        locks_insert_lock(&inode->i_flock, new_fl);
 833        new_fl = NULL;
 834        error = 0;
 835
 836out:
 837        if (new_fl)
 838                locks_free_lock(new_fl);
 839        return error;
 840}
 841
 842/**
 843 *      posix_lock_file:
 844 *      @filp: The file to apply the lock to
 845 *      @caller: The lock to be applied
 846 *      @wait: 1 to retry automatically, 0 to return -EAGAIN
 847 *
 848 * Add a POSIX style lock to a file.
 849 * We merge adjacent locks whenever possible. POSIX locks are sorted by owner
 850 * task, then by starting address
 851 *
 852 * Kai Petzke writes:
 853 * To make freeing a lock much faster, we keep a pointer to the lock before the
 854 * actual one. But the real gain of the new coding was, that lock_it() and
 855 * unlock_it() became one function.
 856 *
 857 * To all purists: Yes, I use a few goto's. Just pass on to the next function.
 858 */
 859
 860int posix_lock_file(struct file *filp, struct file_lock *caller,
 861                           unsigned int wait)
 862{
 863        struct file_lock *fl;
 864        struct file_lock *new_fl, *new_fl2;
 865        struct file_lock *left = NULL;
 866        struct file_lock *right = NULL;
 867        struct file_lock **before;
 868        struct inode * inode = filp->f_dentry->d_inode;
 869        int error, added = 0;
 870
 871        /*
 872         * We may need two file_lock structures for this operation,
 873         * so we get them in advance to avoid races.
 874         */
 875        new_fl = locks_alloc_lock(0);
 876        new_fl2 = locks_alloc_lock(0);
 877        error = -ENOLCK; /* "no luck" */
 878        if (!(new_fl && new_fl2))
 879                goto out_nolock;
 880
 881        lock_kernel();
 882        if (caller->fl_type != F_UNLCK) {
 883  repeat:
 884                for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 885                        if (!(fl->fl_flags & FL_POSIX))
 886                                continue;
 887                        if (!posix_locks_conflict(caller, fl))
 888                                continue;
 889                        error = -EAGAIN;
 890                        if (!wait)
 891                                goto out;
 892                        error = -EDEADLK;
 893                        if (posix_locks_deadlock(caller, fl))
 894                                goto out;
 895
 896                        error = locks_block_on(fl, caller);
 897                        if (error != 0)
 898                                goto out;
 899                        goto repeat;
 900                }
 901        }
 902
 903        /*
 904         * We've allocated the new locks in advance, so there are no
 905         * errors possible (and no blocking operations) from here on.
 906         * 
 907         * Find the first old lock with the same owner as the new lock.
 908         */
 909        
 910        before = &inode->i_flock;
 911
 912        /* First skip locks owned by other processes.
 913         */
 914        while ((fl = *before) && (!(fl->fl_flags & FL_POSIX) ||
 915                                  !locks_same_owner(caller, fl))) {
 916                before = &fl->fl_next;
 917        }
 918
 919        /* Process locks with this owner.
 920         */
 921        while ((fl = *before) && locks_same_owner(caller, fl)) {
 922                /* Detect adjacent or overlapping regions (if same lock type)
 923                 */
 924                if (caller->fl_type == fl->fl_type) {
 925                        if (fl->fl_end < caller->fl_start - 1)
 926                                goto next_lock;
 927                        /* If the next lock in the list has entirely bigger
 928                         * addresses than the new one, insert the lock here.
 929                         */
 930                        if (fl->fl_start > caller->fl_end + 1)
 931                                break;
 932
 933                        /* If we come here, the new and old lock are of the
 934                         * same type and adjacent or overlapping. Make one
 935                         * lock yielding from the lower start address of both
 936                         * locks to the higher end address.
 937                         */
 938                        if (fl->fl_start > caller->fl_start)
 939                                fl->fl_start = caller->fl_start;
 940                        else
 941                                caller->fl_start = fl->fl_start;
 942                        if (fl->fl_end < caller->fl_end)
 943                                fl->fl_end = caller->fl_end;
 944                        else
 945                                caller->fl_end = fl->fl_end;
 946                        if (added) {
 947                                locks_delete_lock(before, 0);
 948                                continue;
 949                        }
 950                        caller = fl;
 951                        added = 1;
 952                }
 953                else {
 954                        /* Processing for different lock types is a bit
 955                         * more complex.
 956                         */
 957                        if (fl->fl_end < caller->fl_start)
 958                                goto next_lock;
 959                        if (fl->fl_start > caller->fl_end)
 960                                break;
 961                        if (caller->fl_type == F_UNLCK)
 962                                added = 1;
 963                        if (fl->fl_start < caller->fl_start)
 964                                left = fl;
 965                        /* If the next lock in the list has a higher end
 966                         * address than the new one, insert the new one here.
 967                         */
 968                        if (fl->fl_end > caller->fl_end) {
 969                                right = fl;
 970                                break;
 971                        }
 972                        if (fl->fl_start >= caller->fl_start) {
 973                                /* The new lock completely replaces an old
 974                                 * one (This may happen several times).
 975                                 */
 976                                if (added) {
 977                                        locks_delete_lock(before, 0);
 978                                        continue;
 979                                }
 980                                /* Replace the old lock with the new one.
 981                                 * Wake up anybody waiting for the old one,
 982                                 * as the change in lock type might satisfy
 983                                 * their needs.
 984                                 */
 985                                locks_wake_up_blocks(fl, 0);    /* This cannot schedule()! */
 986                                fl->fl_start = caller->fl_start;
 987                                fl->fl_end = caller->fl_end;
 988                                fl->fl_type = caller->fl_type;
 989                                fl->fl_u = caller->fl_u;
 990                                caller = fl;
 991                                added = 1;
 992                        }
 993                }
 994                /* Go on to next lock.
 995                 */
 996        next_lock:
 997                before = &fl->fl_next;
 998        }
 999
1000        error = 0;
1001        if (!added) {
1002                if (caller->fl_type == F_UNLCK)
1003                        goto out;
1004                locks_copy_lock(new_fl, caller);
1005                locks_insert_lock(before, new_fl);
1006                new_fl = NULL;
1007        }
1008        if (right) {
1009                if (left == right) {
1010                        /* The new lock breaks the old one in two pieces,
1011                         * so we have to use the second new lock.
1012                         */
1013                        left = new_fl2;
1014                        new_fl2 = NULL;
1015                        locks_copy_lock(left, right);
1016                        locks_insert_lock(before, left);
1017                }
1018                right->fl_start = caller->fl_end + 1;
1019                locks_wake_up_blocks(right, 0);
1020        }
1021        if (left) {
1022                left->fl_end = caller->fl_start - 1;
1023                locks_wake_up_blocks(left, 0);
1024        }
1025out:
1026        unlock_kernel();
1027out_nolock:
1028        /*
1029         * Free any unused locks.
1030         */
1031        if (new_fl)
1032                locks_free_lock(new_fl);
1033        if (new_fl2)
1034                locks_free_lock(new_fl2);
1035        return error;
1036}
1037
1038static inline int flock_translate_cmd(int cmd) {
1039#ifdef MSNFS
1040        if (cmd & LOCK_MAND)
1041                return cmd & (LOCK_MAND | LOCK_RW);
1042#endif
1043        switch (cmd &~ LOCK_NB) {
1044        case LOCK_SH:
1045                return F_RDLCK;
1046        case LOCK_EX:
1047                return F_WRLCK;
1048        case LOCK_UN:
1049                return F_UNLCK;
1050        }
1051        return -EINVAL;
1052}
1053
1054/* We already had a lease on this file; just change its type */
1055static int lease_modify(struct file_lock **before, int arg)
1056{
1057        struct file_lock *fl = *before;
1058        int error = assign_type(fl, arg);
1059
1060        if (error)
1061                return error;
1062        locks_wake_up_blocks(fl, 0);
1063        if (arg == F_UNLCK) {
1064                struct file *filp = fl->fl_file;
1065
1066                filp->f_owner.pid = 0;
1067                filp->f_owner.uid = 0;
1068                filp->f_owner.euid = 0;
1069                filp->f_owner.signum = 0;
1070                locks_delete_lock(before, 0);
1071        }
1072        return 0;
1073}
1074
1075static void time_out_leases(struct inode *inode)
1076{
1077        struct file_lock **before;
1078        struct file_lock *fl;
1079
1080        before = &inode->i_flock;
1081        while ((fl = *before) && (fl->fl_flags & FL_LEASE)
1082                        && (fl->fl_type & F_INPROGRESS)) {
1083                if ((fl->fl_break_time == 0)
1084                                || time_before(jiffies, fl->fl_break_time)) {
1085                        before = &fl->fl_next;
1086                        continue;
1087                }
1088                printk(KERN_INFO "lease broken - owner pid = %d\n", fl->fl_pid);
1089                lease_modify(before, fl->fl_type & ~F_INPROGRESS);
1090                if (fl == *before)      /* lease_modify may have freed fl */
1091                        before = &fl->fl_next;
1092        }
1093}
1094
1095/**
1096 *      __get_lease     -       revoke all outstanding leases on file
1097 *      @inode: the inode of the file to return
1098 *      @mode: the open mode (read or write)
1099 *
1100 *      get_lease (inlined for speed) has checked there already
1101 *      is a lease on this file.  Leases are broken on a call to open()
1102 *      or truncate().  This function can sleep unless you
1103 *      specified %O_NONBLOCK to your open().
1104 */
1105int __get_lease(struct inode *inode, unsigned int mode)
1106{
1107        int error = 0, future;
1108        struct file_lock *new_fl, *flock;
1109        struct file_lock *fl;
1110        int alloc_err;
1111        unsigned long break_time;
1112        int i_have_this_lease = 0;
1113
1114        alloc_err = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK,
1115                        &new_fl);
1116
1117        lock_kernel();
1118
1119        time_out_leases(inode);
1120
1121        flock = inode->i_flock;
1122        if ((flock == NULL) || (flock->fl_flags & FL_LEASE) == 0)
1123                goto out;
1124
1125        for (fl = flock; fl && (fl->fl_flags & FL_LEASE); fl = fl->fl_next)
1126                if (fl->fl_owner == current->files)
1127                        i_have_this_lease = 1;
1128
1129        if (mode & FMODE_WRITE) {
1130                /* If we want write access, we have to revoke any lease. */
1131                future = F_UNLCK | F_INPROGRESS;
1132        } else if (flock->fl_type & F_INPROGRESS) {
1133                /* If the lease is already being broken, we just leave it */
1134                future = flock->fl_type;
1135        } else if (flock->fl_type & F_WRLCK) {
1136                /* Downgrade the exclusive lease to a read-only lease. */
1137                future = F_RDLCK | F_INPROGRESS;
1138        } else {
1139                /* the existing lease was read-only, so we can read too. */
1140                goto out;
1141        }
1142
1143        if (alloc_err && !i_have_this_lease && ((mode & O_NONBLOCK) == 0)) {
1144                error = alloc_err;
1145                goto out;
1146        }
1147
1148        break_time = 0;
1149        if (lease_break_time > 0) {
1150                break_time = jiffies + lease_break_time * HZ;
1151                if (break_time == 0)
1152                        break_time++;   /* so that 0 means no break time */
1153        }
1154
1155        for (fl = flock; fl && (fl->fl_flags & FL_LEASE); fl = fl->fl_next) {
1156                if (fl->fl_type != future) {
1157                        fl->fl_type = future;
1158                        fl->fl_break_time = break_time;
1159                        kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
1160                }
1161        }
1162
1163        if (i_have_this_lease || (mode & O_NONBLOCK)) {
1164                error = -EWOULDBLOCK;
1165                goto out;
1166        }
1167
1168restart:
1169        break_time = flock->fl_break_time;
1170        if (break_time != 0) {
1171                break_time -= jiffies;
1172                if (break_time == 0)
1173                        break_time++;
1174        }
1175        error = locks_block_on_timeout(flock, new_fl, break_time);
1176        if (error >= 0) {
1177                if (error == 0)
1178                        time_out_leases(inode);
1179                /* Wait for the next lease that has not been broken yet */
1180                for (flock = inode->i_flock;
1181                                flock && (flock->fl_flags & FL_LEASE);
1182                                flock = flock->fl_next) {
1183                        if (flock->fl_type & F_INPROGRESS)
1184                                goto restart;
1185                }
1186                error = 0;
1187        }
1188
1189out:
1190        unlock_kernel();
1191        if (!alloc_err)
1192                locks_free_lock(new_fl);
1193        return error;
1194}
1195
1196/**
1197 *      lease_get_mtime
1198 *      @inode: the inode
1199 *
1200 * This is to force NFS clients to flush their caches for files with
1201 * exclusive leases.  The justification is that if someone has an
1202 * exclusive lease, then they could be modifiying it.
1203 */
1204time_t lease_get_mtime(struct inode *inode)
1205{
1206        struct file_lock *flock = inode->i_flock;
1207        if (flock && (flock->fl_flags & FL_LEASE) && (flock->fl_type & F_WRLCK))
1208                return CURRENT_TIME;
1209        return inode->i_mtime;
1210}
1211
1212/**
1213 *      fcntl_getlease - Enquire what lease is currently active
1214 *      @filp: the file
1215 *
1216 *      The value returned by this function will be one of
1217 *      (if no lease break is pending):
1218 *
1219 *      %F_RDLCK to indicate a shared lease is held.
1220 *
1221 *      %F_WRLCK to indicate an exclusive lease is held.
1222 *
1223 *      %F_UNLCK to indicate no lease is held.
1224 *
1225 *      (if a lease break is pending):
1226 *
1227 *      %F_RDLCK to indicate an exclusive lease needs to be
1228 *              changed to a shared lease (or removed).
1229 *
1230 *      %F_UNLCK to indicate the lease needs to be removed.
1231 *
1232 *      XXX: sfr & willy disagree over whether F_INPROGRESS
1233 *      should be returned to userspace.
1234 */
1235int fcntl_getlease(struct file *filp)
1236{
1237        struct file_lock *fl;
1238        int type = F_UNLCK;
1239
1240        lock_kernel();
1241        time_out_leases(filp->f_dentry->d_inode);
1242        for (fl = filp->f_dentry->d_inode->i_flock;
1243                        fl && (fl->fl_flags & FL_LEASE);
1244                        fl = fl->fl_next) {
1245                if (fl->fl_file == filp) {
1246                        type = fl->fl_type & ~F_INPROGRESS;
1247                        break;
1248                }
1249        }
1250        unlock_kernel();
1251        return type;
1252}
1253
1254/**
1255 *      fcntl_setlease  -       sets a lease on an open file
1256 *      @fd: open file descriptor
1257 *      @filp: file pointer
1258 *      @arg: type of lease to obtain
1259 *
1260 *      Call this fcntl to establish a lease on the file.
1261 *      Note that you also need to call %F_SETSIG to
1262 *      receive a signal when the lease is broken.
1263 */
1264int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1265{
1266        struct file_lock *fl, **before, **my_before = NULL;
1267        struct dentry *dentry;
1268        struct inode *inode;
1269        int error, rdlease_count = 0, wrlease_count = 0;
1270
1271        dentry = filp->f_dentry;
1272        inode = dentry->d_inode;
1273
1274        if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE))
1275                return -EACCES;
1276        if (!S_ISREG(inode->i_mode))
1277                return -EINVAL;
1278
1279        lock_kernel();
1280
1281        time_out_leases(inode);
1282
1283        /*
1284         * FIXME: What about F_RDLCK and files open for writing?
1285         */
1286        error = -EAGAIN;
1287        if ((arg == F_WRLCK)
1288            && ((atomic_read(&dentry->d_count) > 1)
1289                || (atomic_read(&inode->i_count) > 1)))
1290                goto out_unlock;
1291
1292        /*
1293         * At this point, we know that if there is an exclusive
1294         * lease on this file, then we hold it on this filp
1295         * (otherwise our open of this file would have blocked).
1296         * And if we are trying to acquire an exclusive lease,
1297         * then the file is not open by anyone (including us)
1298         * except for this filp.
1299         */
1300        for (before = &inode->i_flock;
1301                        ((fl = *before) != NULL) && (fl->fl_flags & FL_LEASE);
1302                        before = &fl->fl_next) {
1303                if (fl->fl_file == filp)
1304                        my_before = before;
1305                else if (fl->fl_type == (F_INPROGRESS | F_UNLCK))
1306                        /*
1307                         * Someone is in the process of opening this
1308                         * file for writing so we may not take an
1309                         * exclusive lease on it.
1310                         */
1311                        wrlease_count++;
1312                else
1313                        rdlease_count++;
1314        }
1315
1316        if ((arg == F_RDLCK && (wrlease_count > 0)) ||
1317            (arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0)))
1318                goto out_unlock;
1319
1320        if (my_before != NULL) {
1321                error = lease_modify(my_before, arg);
1322                goto out_unlock;
1323        }
1324
1325        error = 0;
1326        if (arg == F_UNLCK)
1327                goto out_unlock;
1328
1329        error = -EINVAL;
1330        if (!leases_enable)
1331                goto out_unlock;
1332
1333        error = lease_alloc(filp, arg, &fl);
1334        if (error)
1335                goto out_unlock;
1336
1337        error = fasync_helper(fd, filp, 1, &fl->fl_fasync);
1338        if (error < 0) {
1339                locks_free_lock(fl);
1340                goto out_unlock;
1341        }
1342        fl->fl_next = *before;
1343        *before = fl;
1344        list_add(&fl->fl_link, &file_lock_list);
1345        filp->f_owner.pid = current->pid;
1346        filp->f_owner.uid = current->uid;
1347        filp->f_owner.euid = current->euid;
1348out_unlock:
1349        unlock_kernel();
1350        return error;
1351}
1352
1353/**
1354 *      sys_flock: - flock() system call.
1355 *      @fd: the file descriptor to lock.
1356 *      @cmd: the type of lock to apply.
1357 *
1358 *      Apply a %FL_FLOCK style lock to an open file descriptor.
1359 *      The @cmd can be one of
1360 *
1361 *      %LOCK_SH -- a shared lock.
1362 *
1363 *      %LOCK_EX -- an exclusive lock.
1364 *
1365 *      %LOCK_UN -- remove an existing lock.
1366 *
1367 *      %LOCK_MAND -- a `mandatory' flock.  This exists to emulate Windows Share Modes.
1368 *
1369 *      %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other
1370 *      processes read and write access respectively.
1371 */
1372asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
1373{
1374        struct file *filp;
1375        int error, type;
1376
1377        error = -EBADF;
1378        filp = fget(fd);
1379        if (!filp)
1380                goto out;
1381
1382        error = flock_translate_cmd(cmd);
1383        if (error < 0)
1384                goto out_putf;
1385        type = error;
1386
1387        error = -EBADF;
1388        if ((type != F_UNLCK)
1389#ifdef MSNFS
1390                && !(type & LOCK_MAND)
1391#endif
1392                && !(filp->f_mode & 3))
1393                goto out_putf;
1394
1395        lock_kernel();
1396        error = flock_lock_file(filp, type,
1397                                (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1);
1398        unlock_kernel();
1399
1400out_putf:
1401        fput(filp);
1402out:
1403        return error;
1404}
1405
1406/* Report the first existing lock that would conflict with l.
1407 * This implements the F_GETLK command of fcntl().
1408 */
1409int fcntl_getlk(unsigned int fd, struct flock *l)
1410{
1411        struct file *filp;
1412        struct file_lock *fl, file_lock;
1413        struct flock flock;
1414        int error;
1415
1416        error = -EFAULT;
1417        if (copy_from_user(&flock, l, sizeof(flock)))
1418                goto out;
1419        error = -EINVAL;
1420        if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
1421                goto out;
1422
1423        error = -EBADF;
1424        filp = fget(fd);
1425        if (!filp)
1426                goto out;
1427
1428        error = flock_to_posix_lock(filp, &file_lock, &flock);
1429        if (error)
1430                goto out_putf;
1431
1432        if (filp->f_op && filp->f_op->lock) {
1433                error = filp->f_op->lock(filp, F_GETLK, &file_lock);
1434                if (error < 0)
1435                        goto out_putf;
1436                else if (error == LOCK_USE_CLNT)
1437                  /* Bypass for NFS with no locking - 2.0.36 compat */
1438                  fl = posix_test_lock(filp, &file_lock);
1439                else
1440                  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
1441        } else {
1442                fl = posix_test_lock(filp, &file_lock);
1443        }
1444 
1445        flock.l_type = F_UNLCK;
1446        if (fl != NULL) {
1447                flock.l_pid = fl->fl_pid;
1448#if BITS_PER_LONG == 32
1449                /*
1450                 * Make sure we can represent the posix lock via
1451                 * legacy 32bit flock.
1452                 */
1453                error = -EOVERFLOW;
1454                if (fl->fl_start > OFFT_OFFSET_MAX)
1455                        goto out_putf;
1456                if ((fl->fl_end != OFFSET_MAX)
1457                    && (fl->fl_end > OFFT_OFFSET_MAX))
1458                        goto out_putf;
1459#endif
1460                flock.l_start = fl->fl_start;
1461                flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
1462                        fl->fl_end - fl->fl_start + 1;
1463                flock.l_whence = 0;
1464                flock.l_type = fl->fl_type;
1465        }
1466        error = -EFAULT;
1467        if (!copy_to_user(l, &flock, sizeof(flock)))
1468                error = 0;
1469  
1470out_putf:
1471        fput(filp);
1472out:
1473        return error;
1474}
1475
1476/* Apply the lock described by l to an open file descriptor.
1477 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1478 */
1479int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
1480{
1481        struct file *filp;
1482        struct file_lock *file_lock = locks_alloc_lock(0);
1483        struct flock flock;
1484        struct inode *inode;
1485        int error;
1486
1487        if (file_lock == NULL)
1488                return -ENOLCK;
1489
1490        /*
1491         * This might block, so we do it before checking the inode.
1492         */
1493        error = -EFAULT;
1494        if (copy_from_user(&flock, l, sizeof(flock)))
1495                goto out;
1496
1497        /* Get arguments and validate them ...
1498         */
1499
1500        error = -EBADF;
1501        filp = fget(fd);
1502        if (!filp)
1503                goto out;
1504
1505        error = -EINVAL;
1506        inode = filp->f_dentry->d_inode;
1507
1508        /* Don't allow mandatory locks on files that may be memory mapped
1509         * and shared.
1510         */
1511        if (IS_MANDLOCK(inode) &&
1512            (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) {
1513                struct address_space *mapping = inode->i_mapping;
1514
1515                if (mapping->i_mmap_shared != NULL) {
1516                        error = -EAGAIN;
1517                        goto out_putf;
1518                }
1519        }
1520
1521        error = flock_to_posix_lock(filp, file_lock, &flock);
1522        if (error)
1523                goto out_putf;
1524        
1525        error = -EBADF;
1526        switch (flock.l_type) {
1527        case F_RDLCK:
1528                if (!(filp->f_mode & FMODE_READ))
1529                        goto out_putf;
1530                break;
1531        case F_WRLCK:
1532                if (!(filp->f_mode & FMODE_WRITE))
1533                        goto out_putf;
1534                break;
1535        case F_UNLCK:
1536                break;
1537        case F_SHLCK:
1538        case F_EXLCK:
1539#ifdef __sparc__
1540/* warn a bit for now, but don't overdo it */
1541{
1542        static int count = 0;
1543        if (!count) {
1544                count=1;
1545                printk(KERN_WARNING
1546                       "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n",
1547                       current->pid, current->comm);
1548        }
1549}
1550                if (!(filp->f_mode & 3))
1551                        goto out_putf;
1552                break;
1553#endif
1554        default:
1555                error = -EINVAL;
1556                goto out_putf;
1557        }
1558
1559        if (filp->f_op && filp->f_op->lock != NULL) {
1560                error = filp->f_op->lock(filp, cmd, file_lock);
1561                if (error < 0)
1562                        goto out_putf;
1563        }
1564        error = posix_lock_file(filp, file_lock, cmd == F_SETLKW);
1565
1566out_putf:
1567        fput(filp);
1568out:
1569        locks_free_lock(file_lock);
1570        return error;
1571}
1572
1573#if BITS_PER_LONG == 32
1574/* Report the first existing lock that would conflict with l.
1575 * This implements the F_GETLK command of fcntl().
1576 */
1577int fcntl_getlk64(unsigned int fd, struct flock64 *l)
1578{
1579        struct file *filp;
1580        struct file_lock *fl, file_lock;
1581        struct flock64 flock;
1582        int error;
1583
1584        error = -EFAULT;
1585        if (copy_from_user(&flock, l, sizeof(flock)))
1586                goto out;
1587        error = -EINVAL;
1588        if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
1589                goto out;
1590
1591        error = -EBADF;
1592        filp = fget(fd);
1593        if (!filp)
1594                goto out;
1595
1596        error = flock64_to_posix_lock(filp, &file_lock, &flock);
1597        if (error)
1598                goto out_putf;
1599
1600        if (filp->f_op && filp->f_op->lock) {
1601                error = filp->f_op->lock(filp, F_GETLK, &file_lock);
1602                if (error < 0)
1603                        goto out_putf;
1604                else if (error == LOCK_USE_CLNT)
1605                  /* Bypass for NFS with no locking - 2.0.36 compat */
1606                  fl = posix_test_lock(filp, &file_lock);
1607                else
1608                  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
1609        } else {
1610                fl = posix_test_lock(filp, &file_lock);
1611        }
1612 
1613        flock.l_type = F_UNLCK;
1614        if (fl != NULL) {
1615                flock.l_pid = fl->fl_pid;
1616                flock.l_start = fl->fl_start;
1617                flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
1618                        fl->fl_end - fl->fl_start + 1;
1619                flock.l_whence = 0;
1620                flock.l_type = fl->fl_type;
1621        }
1622        error = -EFAULT;
1623        if (!copy_to_user(l, &flock, sizeof(flock)))
1624                error = 0;
1625  
1626out_putf:
1627        fput(filp);
1628out:
1629        return error;
1630}
1631
1632/* Apply the lock described by l to an open file descriptor.
1633 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1634 */
1635int fcntl_setlk64(unsigned int fd, unsigned int cmd, struct flock64 *l)
1636{
1637        struct file *filp;
1638        struct file_lock *file_lock = locks_alloc_lock(0);
1639        struct flock64 flock;
1640        struct inode *inode;
1641        int error;
1642
1643        if (file_lock == NULL)
1644                return -ENOLCK;
1645
1646        /*
1647         * This might block, so we do it before checking the inode.
1648         */
1649        error = -EFAULT;
1650        if (copy_from_user(&flock, l, sizeof(flock)))
1651                goto out;
1652
1653        /* Get arguments and validate them ...
1654         */
1655
1656        error = -EBADF;
1657        filp = fget(fd);
1658        if (!filp)
1659                goto out;
1660
1661        error = -EINVAL;
1662        inode = filp->f_dentry->d_inode;
1663
1664        /* Don't allow mandatory locks on files that may be memory mapped
1665         * and shared.
1666         */
1667        if (IS_MANDLOCK(inode) &&
1668            (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) {
1669                struct address_space *mapping = inode->i_mapping;
1670
1671                if (mapping->i_mmap_shared != NULL) {
1672                        error = -EAGAIN;
1673                        goto out_putf;
1674                }
1675        }
1676
1677        error = flock64_to_posix_lock(filp, file_lock, &flock);
1678        if (error)
1679                goto out_putf;
1680        
1681        error = -EBADF;
1682        switch (flock.l_type) {
1683        case F_RDLCK:
1684                if (!(filp->f_mode & FMODE_READ))
1685                        goto out_putf;
1686                break;
1687        case F_WRLCK:
1688                if (!(filp->f_mode & FMODE_WRITE))
1689                        goto out_putf;
1690                break;
1691        case F_UNLCK:
1692                break;
1693        case F_SHLCK:
1694        case F_EXLCK:
1695        default:
1696                error = -EINVAL;
1697                goto out_putf;
1698        }
1699
1700        if (filp->f_op && filp->f_op->lock != NULL) {
1701                error = filp->f_op->lock(filp, cmd, file_lock);
1702                if (error < 0)
1703                        goto out_putf;
1704        }
1705        error = posix_lock_file(filp, file_lock, cmd == F_SETLKW64);
1706
1707out_putf:
1708        fput(filp);
1709out:
1710        locks_free_lock(file_lock);
1711        return error;
1712}
1713#endif /* BITS_PER_LONG == 32 */
1714
1715/*
1716 * This function is called when the file is being removed
1717 * from the task's fd array.
1718 */
1719void locks_remove_posix(struct file *filp, fl_owner_t owner)
1720{
1721        struct inode * inode = filp->f_dentry->d_inode;
1722        struct file_lock *fl;
1723        struct file_lock **before;
1724
1725        /*
1726         * For POSIX locks we free all locks on this file for the given task.
1727         */
1728        if (!inode->i_flock) {
1729                /*
1730                 * Notice that something might be grabbing a lock right now.
1731                 * Consider it as a race won by us - event is async, so even if
1732                 * we miss the lock added we can trivially consider it as added
1733                 * after we went through this call.
1734                 */
1735                return;
1736        }
1737        lock_kernel();
1738        before = &inode->i_flock;
1739        while ((fl = *before) != NULL) {
1740                if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) {
1741                        locks_unlock_delete(before);
1742                        before = &inode->i_flock;
1743                        continue;
1744                }
1745                before = &fl->fl_next;
1746        }
1747        unlock_kernel();
1748}
1749
1750/*
1751 * This function is called on the last close of an open file.
1752 */
1753void locks_remove_flock(struct file *filp)
1754{
1755        struct inode * inode = filp->f_dentry->d_inode; 
1756        struct file_lock *fl;
1757        struct file_lock **before;
1758
1759        if (!inode->i_flock)
1760                return;
1761
1762        lock_kernel();
1763        before = &inode->i_flock;
1764
1765        while ((fl = *before) != NULL) {
1766                if (fl->fl_file == filp) {
1767                        if (fl->fl_flags & FL_FLOCK) {
1768                                locks_delete_lock(before, 0);
1769                                continue;
1770                        }
1771                        if (fl->fl_flags & FL_LEASE) {
1772                                lease_modify(before, F_UNLCK);
1773                                continue;
1774                        }
1775                }
1776                before = &fl->fl_next;
1777        }
1778        unlock_kernel();
1779}
1780
1781/**
1782 *      posix_block_lock - blocks waiting for a file lock
1783 *      @blocker: the lock which is blocking
1784 *      @waiter: the lock which conflicts and has to wait
1785 *
1786 * lockd needs to block waiting for locks.
1787 */
1788void
1789posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
1790{
1791        locks_insert_block(blocker, waiter);
1792}
1793
1794/**
1795 *      posix_unblock_lock - stop waiting for a file lock
1796 *      @waiter: the lock which was waiting
1797 *
1798 *      lockd needs to block waiting for locks.
1799 */
1800void
1801posix_unblock_lock(struct file_lock *waiter)
1802{
1803        if (!list_empty(&waiter->fl_block))
1804                locks_delete_block(waiter);
1805}
1806
1807static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
1808{
1809        struct inode *inode = NULL;
1810
1811        if (fl->fl_file != NULL)
1812                inode = fl->fl_file->f_dentry->d_inode;
1813
1814        out += sprintf(out, "%d:%s ", id, pfx);
1815        if (fl->fl_flags & FL_POSIX) {
1816                out += sprintf(out, "%6s %s ",
1817                             (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
1818                             (inode == NULL) ? "*NOINODE*" :
1819                             (IS_MANDLOCK(inode) &&
1820                              (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ?
1821                             "MANDATORY" : "ADVISORY ");
1822        } else if (fl->fl_flags & FL_FLOCK) {
1823#ifdef MSNFS
1824                if (fl->fl_type & LOCK_MAND) {
1825                        out += sprintf(out, "FLOCK  MSNFS     ");
1826                } else
1827#endif
1828                        out += sprintf(out, "FLOCK  ADVISORY  ");
1829        } else if (fl->fl_flags & FL_LEASE) {
1830                out += sprintf(out, "LEASE  ");
1831                if (fl->fl_type & F_INPROGRESS)
1832                        out += sprintf(out, "BREAKING  ");
1833                else if (fl->fl_file)
1834                        out += sprintf(out, "ACTIVE    ");
1835                else
1836                        out += sprintf(out, "BREAKER   ");
1837        } else {
1838                out += sprintf(out, "UNKNOWN UNKNOWN  ");
1839        }
1840#ifdef MSNFS
1841        if (fl->fl_type & LOCK_MAND) {
1842                out += sprintf(out, "%s ",
1843                               (fl->fl_type & LOCK_READ)
1844                               ? (fl->fl_type & LOCK_WRITE) ? "RW   " : "READ "
1845                               : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
1846        } else
1847#endif
1848                out += sprintf(out, "%s ",
1849                               (fl->fl_type & F_INPROGRESS)
1850                               ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ "
1851                               : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ ");
1852        out += sprintf(out, "%d %s:%ld ",
1853                     fl->fl_pid,
1854                     inode ? kdevname(inode->i_dev) : "<none>",
1855                     inode ? inode->i_ino : 0);
1856        out += sprintf(out, "%Ld ", fl->fl_start);
1857        if (fl->fl_end == OFFSET_MAX)
1858                out += sprintf(out, "EOF ");
1859        else
1860                out += sprintf(out, "%Ld ", fl->fl_end);
1861        sprintf(out, "%08lx %08lx %08lx %08lx %08lx\n",
1862                (long)fl, (long)fl->fl_link.prev, (long)fl->fl_link.next,
1863                (long)fl->fl_next, (long)fl->fl_block.next);
1864}
1865
1866static void move_lock_status(char **p, off_t* pos, off_t offset)
1867{
1868        int len;
1869        len = strlen(*p);
1870        if(*pos >= offset) {
1871                /* the complete line is valid */
1872                *p += len;
1873                *pos += len;
1874                return;
1875        }
1876        if(*pos+len > offset) {
1877                /* use the second part of the line */
1878                int i = offset-*pos;
1879                memmove(*p,*p+i,len-i);
1880                *p += len-i;
1881                *pos += len;
1882                return;
1883        }
1884        /* discard the complete line */
1885        *pos += len;
1886}
1887
1888/**
1889 *      get_locks_status        -       reports lock usage in /proc/locks
1890 *      @buffer: address in userspace to write into
1891 *      @start: ?
1892 *      @offset: how far we are through the buffer
1893 *      @length: how much to read
1894 */
1895
1896int get_locks_status(char *buffer, char **start, off_t offset, int length)
1897{
1898        struct list_head *tmp;
1899        char *q = buffer;
1900        off_t pos = 0;
1901        int i = 0;
1902
1903        lock_kernel();
1904        list_for_each(tmp, &file_lock_list) {
1905                struct list_head *btmp;
1906                struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
1907                lock_get_status(q, fl, ++i, "");
1908                move_lock_status(&q, &pos, offset);
1909
1910                if(pos >= offset+length)
1911                        goto done;
1912
1913                list_for_each(btmp, &fl->fl_block) {
1914                        struct file_lock *bfl = list_entry(btmp,
1915                                        struct file_lock, fl_block);
1916                        lock_get_status(q, bfl, i, " ->");
1917                        move_lock_status(&q, &pos, offset);
1918
1919                        if(pos >= offset+length)
1920                                goto done;
1921                }
1922        }
1923done:
1924        unlock_kernel();
1925        *start = buffer;
1926        if(q-buffer < length)
1927                return (q-buffer);
1928        return length;
1929}
1930
1931#ifdef MSNFS
1932/**
1933 *      lock_may_read - checks that the region is free of locks
1934 *      @inode: the inode that is being read
1935 *      @start: the first byte to read
1936 *      @len: the number of bytes to read
1937 *
1938 *      Emulates Windows locking requirements.  Whole-file
1939 *      mandatory locks (share modes) can prohibit a read and
1940 *      byte-range POSIX locks can prohibit a read if they overlap.
1941 *
1942 *      N.B. this function is only ever called
1943 *      from knfsd and ownership of locks is never checked.
1944 */
1945int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
1946{
1947        struct file_lock *fl;
1948        int result = 1;
1949        lock_kernel();
1950        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
1951                if (fl->fl_flags == FL_POSIX) {
1952                        if (fl->fl_type == F_RDLCK)
1953                                continue;
1954                        if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
1955                                continue;
1956                } else if (fl->fl_flags == FL_FLOCK) {
1957                        if (!(fl->fl_type & LOCK_MAND))
1958                                continue;
1959                        if (fl->fl_type & LOCK_READ)
1960                                continue;
1961                } else
1962                        continue;
1963                result = 0;
1964                break;
1965        }
1966        unlock_kernel();
1967        return result;
1968}
1969
1970/**
1971 *      lock_may_write - checks that the region is free of locks
1972 *      @inode: the inode that is being written
1973 *      @start: the first byte to write
1974 *      @len: the number of bytes to write
1975 *
1976 *      Emulates Windows locking requirements.  Whole-file
1977 *      mandatory locks (share modes) can prohibit a write and
1978 *      byte-range POSIX locks can prohibit a write if they overlap.
1979 *
1980 *      N.B. this function is only ever called
1981 *      from knfsd and ownership of locks is never checked.
1982 */
1983int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
1984{
1985        struct file_lock *fl;
1986        int result = 1;
1987        lock_kernel();
1988        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
1989                if (fl->fl_flags == FL_POSIX) {
1990                        if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
1991                                continue;
1992                } else if (fl->fl_flags == FL_FLOCK) {
1993                        if (!(fl->fl_type & LOCK_MAND))
1994                                continue;
1995                        if (fl->fl_type & LOCK_WRITE)
1996                                continue;
1997                } else
1998                        continue;
1999                result = 0;
2000                break;
2001        }
2002        unlock_kernel();
2003        return result;
2004}
2005#endif
2006
2007static int __init filelock_init(void)
2008{
2009        filelock_cache = kmem_cache_create("file_lock_cache",
2010                        sizeof(struct file_lock), 0, 0, init_once, NULL);
2011        if (!filelock_cache)
2012                panic("cannot create file lock slab cache");
2013        return 0;
2014}
2015
2016module_init(filelock_init)
2017
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.