linux-old/fs/locks.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/locks.c
   3 *
   4 *  Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
   5 *  Doug Evans (dje@spiff.uucp), August 07, 1992
   6 *
   7 *  Deadlock detection added.
   8 *  FIXME: one thing isn't handled yet:
   9 *      - mandatory locks (requires lots of changes elsewhere)
  10 *  Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994.
  11 *
  12 *  Miscellaneous edits, and a total rewrite of posix_lock_file() code.
  13 *  Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
  14 *  
  15 *  Converted file_lock_table to a linked list from an array, which eliminates
  16 *  the limits on how many active file locks are open.
  17 *  Chad Page (pageone@netcom.com), November 27, 1994
  18 * 
  19 *  Removed dependency on file descriptors. dup()'ed file descriptors now
  20 *  get the same locks as the original file descriptors, and a close() on
  21 *  any file descriptor removes ALL the locks on the file for the current
  22 *  process. Since locks still depend on the process id, locks are inherited
  23 *  after an exec() but not after a fork(). This agrees with POSIX, and both
  24 *  BSD and SVR4 practice.
  25 *  Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995
  26 *
  27 *  Scrapped free list which is redundant now that we allocate locks
  28 *  dynamically with kmalloc()/kfree().
  29 *  Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995
  30 *
  31 *  Implemented two lock personalities - FL_FLOCK and FL_POSIX.
  32 *
  33 *  FL_POSIX locks are created with calls to fcntl() and lockf() through the
  34 *  fcntl() system call. They have the semantics described above.
  35 *
  36 *  FL_FLOCK locks are created with calls to flock(), through the flock()
  37 *  system call, which is new. Old C libraries implement flock() via fcntl()
  38 *  and will continue to use the old, broken implementation.
  39 *
  40 *  FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated
  41 *  with a file pointer (filp). As a result they can be shared by a parent
  42 *  process and its children after a fork(). They are removed when the last
  43 *  file descriptor referring to the file pointer is closed (unless explicitly
  44 *  unlocked). 
  45 *
  46 *  FL_FLOCK locks never deadlock, an existing lock is always removed before
  47 *  upgrading from shared to exclusive (or vice versa). When this happens
  48 *  any processes blocked by the current lock are woken up and allowed to
  49 *  run before the new lock is applied.
  50 *  Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
  51 *
  52 *  Removed some race conditions in flock_lock_file(), marked other possible
  53 *  races. Just grep for FIXME to see them. 
  54 *  Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
  55 *
  56 *  Addressed Dmitry's concerns. Deadlock checking no longer recursive.
  57 *  Lock allocation changed to GFP_ATOMIC as we can't afford to sleep
  58 *  once we've checked for blocking and deadlocking.
  59 *  Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996.
  60 *
  61 *  Initial implementation of mandatory locks. SunOS turned out to be
  62 *  a rotten model, so I implemented the "obvious" semantics.
  63 *  See 'linux/Documentation/mandatory.txt' for details.
  64 *  Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
  65 *
  66 *  Don't allow mandatory locks on mmap()'ed files. Added simple functions to
  67 *  check if a file has mandatory locks, used by mmap(), open() and creat() to
  68 *  see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference
  69 *  Manual, Section 2.
  70 *  Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996.
  71 *
  72 *  Tidied up block list handling. Added '/proc/locks' interface.
  73 *  Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996.
  74 *
  75 *  Fixed deadlock condition for pathological code that mixes calls to
  76 *  flock() and fcntl().
  77 *  Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996.
  78 *
  79 *  Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use
  80 *  for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to
  81 *  guarantee sensible behaviour in the case where file system modules might
  82 *  be compiled with different options than the kernel itself.
  83 *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
  84 *
  85 *  Added a couple of missing wake_up() calls. Thanks to Thomas Meckel
  86 *  (Thomas.Meckel@mni.fh-giessen.de) for spotting this.
  87 *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
  88 *
  89 *  Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK
  90 *  locks. Changed process synchronisation to avoid dereferencing locks that
  91 *  have already been freed.
  92 *  Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996.
  93 *
  94 *  Made the block list a circular list to minimise searching in the list.
  95 *  Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996.
  96 *
  97 *  Made mandatory locking a mount option. Default is not to allow mandatory
  98 *  locking.
  99 *  Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996.
 100 *
 101 *  Some adaptations for NFS support.
 102 *  Olaf Kirch (okir@monad.swb.de), Dec 1996,
 103 *
 104 *  Fixed /proc/locks interface so that we can't overrun the buffer we are handed.
 105 *  Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997.
 106 */
 107
 108#include <linux/malloc.h>
 109#include <linux/file.h>
 110#include <linux/smp_lock.h>
 111
 112#include <asm/uaccess.h>
 113
 114#define OFFSET_MAX      ((off_t)LONG_MAX)       /* FIXME: move elsewhere? */
 115
 116static int flock_make_lock(struct file *filp, struct file_lock *fl,
 117                               unsigned int cmd);
 118static int posix_make_lock(struct file *filp, struct file_lock *fl,
 119                               struct flock *l);
 120static int flock_locks_conflict(struct file_lock *caller_fl,
 121                                struct file_lock *sys_fl);
 122static int posix_locks_conflict(struct file_lock *caller_fl,
 123                                struct file_lock *sys_fl);
 124static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl);
 125static int flock_lock_file(struct file *filp, struct file_lock *caller,
 126                           unsigned int wait);
 127static int posix_locks_deadlock(struct file_lock *caller,
 128                                struct file_lock *blocker);
 129
 130static struct file_lock *locks_empty_lock(void);
 131static struct file_lock *locks_init_lock(struct file_lock *,
 132                                         struct file_lock *);
 133static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl);
 134static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait);
 135static char *lock_get_status(struct file_lock *fl, int id, char *pfx);
 136
 137static void locks_insert_block(struct file_lock *blocker, struct file_lock *waiter);
 138static void locks_delete_block(struct file_lock *blocker, struct file_lock *waiter);
 139static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait);
 140
 141struct file_lock *file_lock_table = NULL;
 142
 143/* Allocate a new lock, and initialize its fields from fl.
 144 * The lock is not inserted into any lists until locks_insert_lock() or 
 145 * locks_insert_block() are called.
 146 */
 147static inline struct file_lock *locks_alloc_lock(struct file_lock *fl)
 148{
 149        return locks_init_lock(locks_empty_lock(), fl);
 150}
 151
 152/* Free lock not inserted in any queue.
 153 */
 154static inline void locks_free_lock(struct file_lock *fl)
 155{
 156        if (waitqueue_active(&fl->fl_wait))
 157                panic("Attempting to free lock with active wait queue");
 158
 159        if (fl->fl_nextblock != NULL || fl->fl_prevblock != NULL)
 160                panic("Attempting to free lock with active block list");
 161                
 162        kfree(fl);
 163        return;
 164}
 165
 166/* Check if two locks overlap each other.
 167 */
 168static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 169{
 170        return ((fl1->fl_end >= fl2->fl_start) &&
 171                (fl2->fl_end >= fl1->fl_start));
 172}
 173
 174/*
 175 * Check whether two locks have the same owner
 176 * N.B. Do we need the test on PID as well as owner?
 177 * (Clone tasks should be considered as one "owner".)
 178 */
 179static inline int
 180locks_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 181{
 182        return (fl1->fl_owner == fl2->fl_owner) &&
 183               (fl1->fl_pid   == fl2->fl_pid);
 184}
 185
 186/* Insert waiter into blocker's block list.
 187 * We use a circular list so that processes can be easily woken up in
 188 * the order they blocked. The documentation doesn't require this but
 189 * it seems like the reasonable thing to do.
 190 */
 191static void locks_insert_block(struct file_lock *blocker, 
 192                               struct file_lock *waiter)
 193{
 194        struct file_lock *prevblock;
 195
 196        if (waiter->fl_prevblock) {
 197                printk(KERN_ERR "locks_insert_block: remove duplicated lock "
 198                        "(pid=%d %ld-%ld type=%d)\n",
 199                        waiter->fl_pid, waiter->fl_start,
 200                        waiter->fl_end, waiter->fl_type);
 201                locks_delete_block(waiter->fl_prevblock, waiter);
 202        }
 203
 204        if (blocker->fl_prevblock == NULL)
 205                /* No previous waiters - list is empty */
 206                prevblock = blocker;
 207        else
 208                /* Previous waiters exist - add to end of list */
 209                prevblock = blocker->fl_prevblock;
 210
 211        prevblock->fl_nextblock = waiter;
 212        blocker->fl_prevblock = waiter;
 213        waiter->fl_nextblock = blocker;
 214        waiter->fl_prevblock = prevblock;
 215        
 216        return;
 217}
 218
 219/* Remove waiter from blocker's block list.
 220 * When blocker ends up pointing to itself then the list is empty.
 221 */
 222static void locks_delete_block(struct file_lock *blocker,
 223                               struct file_lock *waiter)
 224{
 225        struct file_lock *nextblock;
 226        struct file_lock *prevblock;
 227        
 228        nextblock = waiter->fl_nextblock;
 229        prevblock = waiter->fl_prevblock;
 230
 231        if (nextblock == NULL)
 232                return;
 233        
 234        nextblock->fl_prevblock = prevblock;
 235        prevblock->fl_nextblock = nextblock;
 236
 237        waiter->fl_prevblock = waiter->fl_nextblock = NULL;
 238        if (blocker->fl_nextblock == blocker)
 239                /* No more locks on blocker's blocked list */
 240                blocker->fl_prevblock = blocker->fl_nextblock = NULL;
 241        return;
 242}
 243
 244/* The following two are for the benefit of lockd.
 245 */
 246void
 247posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
 248{
 249        locks_insert_block(blocker, waiter);
 250        return;
 251}
 252
 253void
 254posix_unblock_lock(struct file_lock *waiter)
 255{
 256        if (waiter->fl_prevblock)
 257                locks_delete_block(waiter->fl_prevblock, waiter);
 258        return;
 259}
 260
 261/* Wake up processes blocked waiting for blocker.
 262 * If told to wait then schedule the processes until the block list
 263 * is empty, otherwise empty the block list ourselves.
 264 */
 265static void locks_wake_up_blocks(struct file_lock *blocker, unsigned int wait)
 266{
 267        struct file_lock *waiter;
 268
 269        while ((waiter = blocker->fl_nextblock) != NULL) {
 270                /* N.B. Is it possible for the notify function to block?? */
 271                if (!wait) {
 272                        /* Remove waiter from the block list, because by the
 273                         * time it wakes up blocker won't exist any more.
 274                         */
 275                        locks_delete_block(blocker, waiter);
 276                }
 277                if (waiter->fl_notify)
 278                        waiter->fl_notify(waiter);
 279                else
 280                        wake_up(&waiter->fl_wait);
 281                if (wait) {
 282                        /* Let the blocked process remove waiter from the
 283                         * block list when it gets scheduled.
 284                         */
 285                        current->policy |= SCHED_YIELD;
 286                        schedule();
 287                }
 288        }
 289        return;
 290}
 291
 292/* flock() system call entry point. Apply a FL_FLOCK style lock to
 293 * an open file descriptor.
 294 */
 295asmlinkage int sys_flock(unsigned int fd, unsigned int cmd)
 296{
 297        struct file_lock file_lock;
 298        struct file *filp;
 299        int error;
 300
 301        lock_kernel();
 302        error = -EBADF;
 303        filp = fget(fd);
 304        if (!filp)
 305                goto out;
 306        error = -EINVAL;
 307        if (!flock_make_lock(filp, &file_lock, cmd))
 308                goto out_putf;
 309        error = -EBADF;
 310        if ((file_lock.fl_type != F_UNLCK) && !(filp->f_mode & 3))
 311                goto out_putf;
 312        error = flock_lock_file(filp, &file_lock,
 313                                (cmd & (LOCK_UN | LOCK_NB)) ? 0 : 1);
 314out_putf:
 315        fput(filp);
 316out:
 317        unlock_kernel();
 318        return (error);
 319}
 320
 321/* Report the first existing lock that would conflict with l.
 322 * This implements the F_GETLK command of fcntl().
 323 */
 324int fcntl_getlk(unsigned int fd, struct flock *l)
 325{
 326        struct file *filp;
 327        struct file_lock *fl,file_lock;
 328        struct flock flock;
 329        int error;
 330
 331        error = -EFAULT;
 332        if (copy_from_user(&flock, l, sizeof(flock)))
 333                goto out;
 334        error = -EINVAL;
 335        if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
 336                goto out;
 337
 338        error = -EBADF;
 339        filp = fget(fd);
 340        if (!filp)
 341                goto out;
 342
 343        error = -EINVAL;
 344        if (!filp->f_dentry || !filp->f_dentry->d_inode || !filp->f_op)
 345                goto out_putf;
 346
 347        if (!posix_make_lock(filp, &file_lock, &flock))
 348                goto out_putf;
 349
 350        if (filp->f_op->lock) {
 351                error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 352                if (error < 0)
 353                        goto out_putf;
 354                else if (error == LOCK_USE_CLNT)
 355                  /* Bypass for NFS with no locking - 2.0.36 compat */
 356                  fl = posix_test_lock(filp, &file_lock);
 357                else
 358                  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 359        } else {
 360                fl = posix_test_lock(filp, &file_lock);
 361        }
 362 
 363        flock.l_type = F_UNLCK;
 364        if (fl != NULL) {
 365                flock.l_pid = fl->fl_pid;
 366                flock.l_start = fl->fl_start;
 367                flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
 368                        fl->fl_end - fl->fl_start + 1;
 369                flock.l_whence = 0;
 370                flock.l_type = fl->fl_type;
 371        }
 372        error = -EFAULT;
 373        if (!copy_to_user(l, &flock, sizeof(flock)))
 374                error = 0;
 375  
 376out_putf:
 377        fput(filp);
 378out:
 379        return error;
 380}
 381
 382/* Apply the lock described by l to an open file descriptor.
 383 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
 384 */
 385int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
 386{
 387        struct file *filp;
 388        struct file_lock file_lock;
 389        struct flock flock;
 390        struct dentry * dentry;
 391        struct inode *inode;
 392        int error;
 393
 394        /*
 395         * This might block, so we do it before checking the inode.
 396         */
 397        error = -EFAULT;
 398        if (copy_from_user(&flock, l, sizeof(flock)))
 399                goto out;
 400
 401        /* Get arguments and validate them ...
 402         */
 403
 404        error = -EBADF;
 405        filp = fget(fd);
 406        if (!filp)
 407                goto out;
 408
 409        error = -EINVAL;
 410        if (!(dentry = filp->f_dentry))
 411                goto out_putf;
 412        if (!(inode = dentry->d_inode))
 413                goto out_putf;
 414        if (!filp->f_op)
 415                goto out_putf;
 416
 417        /* Don't allow mandatory locks on files that may be memory mapped
 418         * and shared.
 419         */
 420        if (IS_MANDLOCK(inode) &&
 421            (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
 422            inode->i_mmap_shared) {
 423                error = -EAGAIN;
 424                goto out_putf;
 425        }
 426
 427        error = -EINVAL;
 428        if (!posix_make_lock(filp, &file_lock, &flock))
 429                goto out_putf;
 430        
 431        error = -EBADF;
 432        switch (flock.l_type) {
 433        case F_RDLCK:
 434                if (!(filp->f_mode & FMODE_READ))
 435                        goto out_putf;
 436                break;
 437        case F_WRLCK:
 438                if (!(filp->f_mode & FMODE_WRITE))
 439                        goto out_putf;
 440                break;
 441        case F_UNLCK:
 442                break;
 443        case F_SHLCK:
 444        case F_EXLCK:
 445#ifdef __sparc__
 446/* warn a bit for now, but don't overdo it */
 447{
 448        static int count = 0;
 449        if (!count) {
 450                count=1;
 451                printk(KERN_WARNING
 452                       "fcntl_setlk() called by process %d (%s) with broken flock() emulation\n",
 453                       current->pid, current->comm);
 454        }
 455}
 456                if (!(filp->f_mode & 3))
 457                        goto out_putf;
 458                break;
 459#endif
 460        default:
 461                error = -EINVAL;
 462                goto out_putf;
 463        }
 464
 465        if (filp->f_op->lock != NULL) {
 466                error = filp->f_op->lock(filp, cmd, &file_lock);
 467                if (error < 0)
 468                        goto out_putf;
 469        }
 470        error = posix_lock_file(filp, &file_lock, cmd == F_SETLKW);
 471
 472out_putf:
 473        fput(filp);
 474out:
 475        return error;
 476}
 477
 478/*
 479 * This function is called when the file is being removed
 480 * from the task's fd array.
 481 */
 482void locks_remove_posix(struct file *filp, fl_owner_t owner)
 483{
 484        struct inode * inode = filp->f_dentry->d_inode;
 485        struct file_lock file_lock, *fl;
 486        struct file_lock **before;
 487
 488        /*
 489         * For POSIX locks we free all locks on this file for the given task.
 490         */
 491repeat:
 492        before = &inode->i_flock;
 493        while ((fl = *before) != NULL) {
 494                if ((fl->fl_flags & FL_POSIX) && fl->fl_owner == owner) {
 495                        int (*lock)(struct file *, int, struct file_lock *);
 496                        lock = NULL;
 497                        if(filp->f_op)
 498                                lock = filp->f_op->lock;
 499                        if (lock) {
 500                                file_lock = *fl;
 501                                file_lock.fl_type = F_UNLCK;
 502                        }
 503                        locks_delete_lock(before, 0);
 504                        if (lock) {
 505                                lock(filp, F_SETLK, &file_lock);
 506                                /* List may have changed: */
 507                                goto repeat;
 508                        }
 509                        continue;
 510                }
 511                before = &fl->fl_next;
 512        }
 513}
 514
 515/*
 516 * This function is called on the last close of an open file.
 517 */
 518void locks_remove_flock(struct file *filp)
 519{
 520        struct inode * inode = filp->f_dentry->d_inode; 
 521        struct file_lock file_lock, *fl;
 522        struct file_lock **before;
 523
 524repeat:
 525        before = &inode->i_flock;
 526        while ((fl = *before) != NULL) {
 527                if ((fl->fl_flags & FL_FLOCK) && fl->fl_file == filp) {
 528                        int (*lock)(struct file *, int, struct file_lock *);
 529                        lock = NULL;
 530                        if (filp->f_op)
 531                                lock = filp->f_op->lock;
 532                        if (lock) {
 533                                file_lock = *fl;
 534                                file_lock.fl_type = F_UNLCK;
 535                        }
 536                        locks_delete_lock(before, 0);
 537                        if (lock) {
 538                                lock(filp, F_SETLK, &file_lock);
 539                                /* List may have changed: */
 540                                goto repeat;
 541                        }
 542                        continue;
 543                }
 544                before = &fl->fl_next;
 545        }
 546}
 547
 548struct file_lock *
 549posix_test_lock(struct file *filp, struct file_lock *fl)
 550{
 551        struct file_lock *cfl;
 552
 553        for (cfl = filp->f_dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
 554                if (!(cfl->fl_flags & FL_POSIX))
 555                        continue;
 556                if (posix_locks_conflict(cfl, fl))
 557                        break;
 558        }
 559
 560        return (cfl);
 561}
 562
 563int locks_verify_locked(struct inode *inode)
 564{
 565        /* Candidates for mandatory locking have the setgid bit set
 566         * but no group execute bit -  an otherwise meaningless combination.
 567         */
 568        if (IS_MANDLOCK(inode) &&
 569            (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 570                return (locks_mandatory_locked(inode));
 571        return (0);
 572}
 573
 574int locks_verify_area(int read_write, struct inode *inode, struct file *filp,
 575                      loff_t offset, size_t count)
 576{
 577        /* Candidates for mandatory locking have the setgid bit set
 578         * but no group execute bit -  an otherwise meaningless combination.
 579         */
 580        if (IS_MANDLOCK(inode) &&
 581            (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 582                return (locks_mandatory_area(read_write, inode, filp, offset,
 583                                             count));
 584        return (0);
 585}
 586
 587int locks_mandatory_locked(struct inode *inode)
 588{
 589        fl_owner_t owner = current->files;
 590        struct file_lock *fl;
 591
 592        /* Search the lock list for this inode for any POSIX locks.
 593         */
 594        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 595                if (!(fl->fl_flags & FL_POSIX))
 596                        continue;
 597                if (fl->fl_owner != owner)
 598                        return (-EAGAIN);
 599        }
 600        return (0);
 601}
 602
 603int locks_mandatory_area(int read_write, struct inode *inode,
 604                         struct file *filp, loff_t offset,
 605                         size_t count)
 606{
 607        struct file_lock *fl;
 608        struct file_lock tfl;
 609
 610        memset(&tfl, 0, sizeof(tfl));
 611
 612        tfl.fl_file = filp;
 613        tfl.fl_flags = FL_POSIX | FL_ACCESS;
 614        tfl.fl_owner = current->files;
 615        tfl.fl_pid = current->pid;
 616        tfl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
 617        tfl.fl_start = offset;
 618        tfl.fl_end = offset + count - 1;
 619
 620repeat:
 621        /* Search the lock list for this inode for locks that conflict with
 622         * the proposed read/write.
 623         */
 624        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 625                if (!(fl->fl_flags & FL_POSIX))
 626                        continue;
 627                /* Block for writes against a "read" lock,
 628                 * and both reads and writes against a "write" lock.
 629                 */
 630                if (posix_locks_conflict(fl, &tfl)) {
 631                        if (filp && (filp->f_flags & O_NONBLOCK))
 632                                return (-EAGAIN);
 633                        if (signal_pending(current))
 634                                return (-ERESTARTSYS);
 635                        if (posix_locks_deadlock(&tfl, fl))
 636                                return (-EDEADLK);
 637
 638                        locks_insert_block(fl, &tfl);
 639                        interruptible_sleep_on(&tfl.fl_wait);
 640                        locks_delete_block(fl, &tfl);
 641
 642                        if (signal_pending(current))
 643                                return (-ERESTARTSYS);
 644                        /* If we've been sleeping someone might have
 645                         * changed the permissions behind our back.
 646                         */
 647                        if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID)
 648                                break;
 649                        goto repeat;
 650                }
 651        }
 652        return (0);
 653}
 654
 655/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
 656 * style lock.
 657 */
 658static int posix_make_lock(struct file *filp, struct file_lock *fl,
 659                           struct flock *l)
 660{
 661        off_t start;
 662
 663        memset(fl, 0, sizeof(*fl));
 664        
 665        fl->fl_flags = FL_POSIX;
 666
 667        switch (l->l_type) {
 668        case F_RDLCK:
 669        case F_WRLCK:
 670        case F_UNLCK:
 671                fl->fl_type = l->l_type;
 672                break;
 673        default:
 674                return (0);
 675        }
 676
 677        switch (l->l_whence) {
 678        case 0: /*SEEK_SET*/
 679                start = 0;
 680                break;
 681        case 1: /*SEEK_CUR*/
 682                start = filp->f_pos;
 683                break;
 684        case 2: /*SEEK_END*/
 685                start = filp->f_dentry->d_inode->i_size;
 686                break;
 687        default:
 688                return (0);
 689        }
 690
 691        if (((start += l->l_start) < 0) || (l->l_len < 0))
 692                return (0);
 693        fl->fl_end = start + l->l_len - 1;
 694        if (l->l_len > 0 && fl->fl_end < 0)
 695                return (0);
 696        fl->fl_start = start;   /* we record the absolute position */
 697        if (l->l_len == 0)
 698                fl->fl_end = OFFSET_MAX;
 699        
 700        fl->fl_file = filp;
 701        fl->fl_owner = current->files;
 702        fl->fl_pid = current->pid;
 703
 704        return (1);
 705}
 706
 707/* Verify a call to flock() and fill in a file_lock structure with
 708 * an appropriate FLOCK lock.
 709 */
 710static int flock_make_lock(struct file *filp, struct file_lock *fl,
 711                           unsigned int cmd)
 712{
 713        memset(fl, 0, sizeof(*fl));
 714
 715        if (!filp->f_dentry)    /* just in case */
 716                return (0);
 717
 718        switch (cmd & ~LOCK_NB) {
 719        case LOCK_SH:
 720                fl->fl_type = F_RDLCK;
 721                break;
 722        case LOCK_EX:
 723                fl->fl_type = F_WRLCK;
 724                break;
 725        case LOCK_UN:
 726                fl->fl_type = F_UNLCK;
 727                break;
 728        default:
 729                return (0);
 730        }
 731
 732        fl->fl_flags = FL_FLOCK;
 733        fl->fl_start = 0;
 734        fl->fl_end = OFFSET_MAX;
 735        fl->fl_file = filp;
 736        fl->fl_owner = NULL;
 737        fl->fl_pid = current->pid;
 738        
 739        return (1);
 740}
 741
 742/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
 743 * checking before calling the locks_conflict().
 744 */
 745static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 746{
 747        /* POSIX locks owned by the same process do not conflict with
 748         * each other.
 749         */
 750        if (!(sys_fl->fl_flags & FL_POSIX) ||
 751            locks_same_owner(caller_fl, sys_fl))
 752                return (0);
 753
 754        return (locks_conflict(caller_fl, sys_fl));
 755}
 756
 757/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
 758 * checking before calling the locks_conflict().
 759 */
 760static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 761{
 762        /* FLOCK locks referring to the same filp do not conflict with
 763         * each other.
 764         */
 765        if (!(sys_fl->fl_flags & FL_FLOCK) ||
 766            (caller_fl->fl_file == sys_fl->fl_file))
 767                return (0);
 768
 769        return (locks_conflict(caller_fl, sys_fl));
 770}
 771
 772/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
 773 * checks for overlapping locks and shared/exclusive status.
 774 */
 775static int locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 776{
 777        if (!locks_overlap(caller_fl, sys_fl))
 778                return (0);
 779
 780        switch (caller_fl->fl_type) {
 781        case F_RDLCK:
 782                return (sys_fl->fl_type == F_WRLCK);
 783                
 784        case F_WRLCK:
 785                return (1);
 786
 787        default:
 788                printk("locks_conflict(): impossible lock type - %d\n",
 789                       caller_fl->fl_type);
 790                break;
 791        }
 792        return (0);     /* This should never happen */
 793}
 794
 795/* This function tests for deadlock condition before putting a process to
 796 * sleep. The detection scheme is no longer recursive. Recursive was neat,
 797 * but dangerous - we risked stack corruption if the lock data was bad, or
 798 * if the recursion was too deep for any other reason.
 799 *
 800 * We rely on the fact that a task can only be on one lock's wait queue
 801 * at a time. When we find blocked_task on a wait queue we can re-search
 802 * with blocked_task equal to that queue's owner, until either blocked_task
 803 * isn't found, or blocked_task is found on a queue owned by my_task.
 804 *
 805 * Note: the above assumption may not be true when handling lock requests
 806 * from a broken NFS client. But broken NFS clients have a lot more to
 807 * worry about than proper deadlock detection anyway... --okir
 808 */
 809static int posix_locks_deadlock(struct file_lock *caller_fl,
 810                                struct file_lock *block_fl)
 811{
 812        struct file_lock *fl;
 813        struct file_lock *bfl;
 814        void             *caller_owner, *blocked_owner;
 815        unsigned int     caller_pid, blocked_pid;
 816
 817        caller_owner = caller_fl->fl_owner;
 818        caller_pid = caller_fl->fl_pid;
 819        blocked_owner = block_fl->fl_owner;
 820        blocked_pid = block_fl->fl_pid;
 821
 822next_task:
 823        if (caller_owner == blocked_owner && caller_pid == blocked_pid)
 824                return (1);
 825        for (fl = file_lock_table; fl != NULL; fl = fl->fl_nextlink) {
 826                if (fl->fl_owner == NULL || fl->fl_nextblock == NULL)
 827                        continue;
 828                for (bfl = fl->fl_nextblock; bfl != fl; bfl = bfl->fl_nextblock) {
 829                        if (bfl->fl_owner == blocked_owner &&
 830                            bfl->fl_pid == blocked_pid) {
 831                                if (fl->fl_owner == caller_owner &&
 832                                    fl->fl_pid == caller_pid) {
 833                                        return (1);
 834                                }
 835                                blocked_owner = fl->fl_owner;
 836                                blocked_pid = fl->fl_pid;
 837                                goto next_task;
 838                        }
 839                }
 840        }
 841        return (0);
 842}
 843
 844/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks at
 845 * the head of the list, but that's secret knowledge known only to the next
 846 * two functions.
 847 */
 848static int flock_lock_file(struct file *filp, struct file_lock *caller,
 849                           unsigned int wait)
 850{
 851        struct file_lock *fl;
 852        struct file_lock *new_fl = NULL;
 853        struct file_lock **before;
 854        struct inode * inode = filp->f_dentry->d_inode;
 855        int error, change;
 856        int unlock = (caller->fl_type == F_UNLCK);
 857
 858        /*
 859         * If we need a new lock, get it in advance to avoid races.
 860         */
 861        if (!unlock) {
 862                error = -ENOLCK;
 863                new_fl = locks_alloc_lock(caller);
 864                if (!new_fl)
 865                        goto out;
 866        }
 867
 868        error = 0;
 869search:
 870        change = 0;
 871        before = &inode->i_flock;
 872        while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) {
 873                if (caller->fl_file == fl->fl_file) {
 874                        if (caller->fl_type == fl->fl_type)
 875                                goto out;
 876                        change = 1;
 877                        break;
 878                }
 879                before = &fl->fl_next;
 880        }
 881        /* change means that we are changing the type of an existing lock, or
 882         * or else unlocking it.
 883         */
 884        if (change) {
 885                /* N.B. What if the wait argument is false? */
 886                locks_delete_lock(before, !unlock);
 887                /*
 888                 * If we waited, another lock may have been added ...
 889                 */
 890                if (!unlock)
 891                        goto search;
 892        }
 893        if (unlock)
 894                goto out;
 895
 896repeat:
 897        /* Check signals each time we start */
 898        error = -ERESTARTSYS;
 899        if (signal_pending(current))
 900                goto out;
 901        for (fl = inode->i_flock; (fl != NULL) && (fl->fl_flags & FL_FLOCK);
 902             fl = fl->fl_next) {
 903                if (!flock_locks_conflict(new_fl, fl))
 904                        continue;
 905                error = -EAGAIN;
 906                if (!wait)
 907                        goto out;
 908                locks_insert_block(fl, new_fl);
 909                interruptible_sleep_on(&new_fl->fl_wait);
 910                locks_delete_block(fl, new_fl);
 911                goto repeat;
 912        }
 913        locks_insert_lock(&inode->i_flock, new_fl);
 914        new_fl = NULL;
 915        error = 0;
 916
 917out:
 918        if (new_fl)
 919                locks_free_lock(new_fl);
 920        return error;
 921}
 922
 923/* Add a POSIX style lock to a file.
 924 * We merge adjacent locks whenever possible. POSIX locks are sorted by owner
 925 * task, then by starting address
 926 *
 927 * Kai Petzke writes:
 928 * To make freeing a lock much faster, we keep a pointer to the lock before the
 929 * actual one. But the real gain of the new coding was, that lock_it() and
 930 * unlock_it() became one function.
 931 *
 932 * To all purists: Yes, I use a few goto's. Just pass on to the next function.
 933 */
 934
 935int posix_lock_file(struct file *filp, struct file_lock *caller,
 936                           unsigned int wait)
 937{
 938        struct file_lock *fl;
 939        struct file_lock *new_fl, *new_fl2;
 940        struct file_lock *left = NULL;
 941        struct file_lock *right = NULL;
 942        struct file_lock **before;
 943        struct inode * inode = filp->f_dentry->d_inode;
 944        int error, added = 0;
 945
 946        /*
 947         * We may need two file_lock structures for this operation,
 948         * so we get them in advance to avoid races.
 949         */
 950        new_fl  = locks_empty_lock();
 951        new_fl2 = locks_empty_lock();
 952        error = -ENOLCK; /* "no luck" */
 953        if (!(new_fl && new_fl2))
 954                goto out;
 955
 956        if (caller->fl_type != F_UNLCK) {
 957  repeat:
 958                for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 959                        if (!(fl->fl_flags & FL_POSIX))
 960                                continue;
 961                        if (!posix_locks_conflict(caller, fl))
 962                                continue;
 963                        error = -EAGAIN;
 964                        if (!wait)
 965                                goto out;
 966                        error = -EDEADLK;
 967                        if (posix_locks_deadlock(caller, fl))
 968                                goto out;
 969                        error = -ERESTARTSYS;
 970                        if (signal_pending(current))
 971                                goto out;
 972                        locks_insert_block(fl, caller);
 973                        interruptible_sleep_on(&caller->fl_wait);
 974                        locks_delete_block(fl, caller);
 975                        goto repeat;
 976                }
 977        }
 978
 979        /*
 980         * We've allocated the new locks in advance, so there are no
 981         * errors possible (and no blocking operations) from here on.
 982         * 
 983         * Find the first old lock with the same owner as the new lock.
 984         */
 985        
 986        before = &inode->i_flock;
 987
 988        /* First skip locks owned by other processes.
 989         */
 990        while ((fl = *before) && (!(fl->fl_flags & FL_POSIX) ||
 991                                  !locks_same_owner(caller, fl))) {
 992                before = &fl->fl_next;
 993        }
 994
 995        /* Process locks with this owner.
 996         */
 997        while ((fl = *before) && locks_same_owner(caller, fl)) {
 998                /* Detect adjacent or overlapping regions (if same lock type)
 999                 */
1000                if (caller->fl_type == fl->fl_type) {
1001                        if (fl->fl_end < caller->fl_start - 1)
1002                                goto next_lock;
1003                        /* If the next lock in the list has entirely bigger
1004                         * addresses than the new one, insert the lock here.
1005                         */
1006                        if (fl->fl_start > caller->fl_end + 1)
1007                                break;
1008
1009                        /* If we come here, the new and old lock are of the
1010                         * same type and adjacent or overlapping. Make one
1011                         * lock yielding from the lower start address of both
1012                         * locks to the higher end address.
1013                         */
1014                        if (fl->fl_start > caller->fl_start)
1015                                fl->fl_start = caller->fl_start;
1016                        else
1017                                caller->fl_start = fl->fl_start;
1018                        if (fl->fl_end < caller->fl_end)
1019                                fl->fl_end = caller->fl_end;
1020                        else
1021                                caller->fl_end = fl->fl_end;
1022                        if (added) {
1023                                locks_delete_lock(before, 0);
1024                                continue;
1025                        }
1026                        caller = fl;
1027                        added = 1;
1028                }
1029                else {
1030                        /* Processing for different lock types is a bit
1031                         * more complex.
1032                         */
1033                        if (fl->fl_end < caller->fl_start)
1034                                goto next_lock;
1035                        if (fl->fl_start > caller->fl_end)
1036                                break;
1037                        if (caller->fl_type == F_UNLCK)
1038                                added = 1;
1039                        if (fl->fl_start < caller->fl_start)
1040                                left = fl;
1041                        /* If the next lock in the list has a higher end
1042                         * address than the new one, insert the new one here.
1043                         */
1044                        if (fl->fl_end > caller->fl_end) {
1045                                right = fl;
1046                                break;
1047                        }
1048                        if (fl->fl_start >= caller->fl_start) {
1049                                /* The new lock completely replaces an old
1050                                 * one (This may happen several times).
1051                                 */
1052                                if (added) {
1053                                        locks_delete_lock(before, 0);
1054                                        continue;
1055                                }
1056                                /* Replace the old lock with the new one.
1057                                 * Wake up anybody waiting for the old one,
1058                                 * as the change in lock type might satisfy
1059                                 * their needs.
1060                                 */
1061                                locks_wake_up_blocks(fl, 0);
1062                                fl->fl_start = caller->fl_start;
1063                                fl->fl_end = caller->fl_end;
1064                                fl->fl_type = caller->fl_type;
1065                                fl->fl_u = caller->fl_u;
1066                                caller = fl;
1067                                added = 1;
1068                        }
1069                }
1070                /* Go on to next lock.
1071                 */
1072        next_lock:
1073                before = &fl->fl_next;
1074        }
1075
1076        error = 0;
1077        if (!added) {
1078                if (caller->fl_type == F_UNLCK)
1079                        goto out;
1080                locks_init_lock(new_fl, caller);
1081                locks_insert_lock(before, new_fl);
1082                new_fl = NULL;
1083        }
1084        if (right) {
1085                if (left == right) {
1086                        /* The new lock breaks the old one in two pieces,
1087                         * so we have to use the second new lock (in this
1088                         * case, even F_UNLCK may fail!).
1089                         */
1090                        left = locks_init_lock(new_fl2, right);
1091                        locks_insert_lock(before, left);
1092                        new_fl2 = NULL;
1093                }
1094                right->fl_start = caller->fl_end + 1;
1095                locks_wake_up_blocks(right, 0);
1096        }
1097        if (left) {
1098                left->fl_end = caller->fl_start - 1;
1099                locks_wake_up_blocks(left, 0);
1100        }
1101out:
1102        /*
1103         * Free any unused locks.  (They haven't
1104         * ever been used, so we use kfree().)
1105         */
1106        if (new_fl)
1107                kfree(new_fl);
1108        if (new_fl2)
1109                kfree(new_fl2);
1110        return error;
1111}
1112
1113/*
1114 * Allocate an empty lock structure. We can use GFP_KERNEL now that
1115 * all allocations are done in advance.
1116 */
1117static struct file_lock *locks_empty_lock(void)
1118{
1119        /* Okay, let's make a new file_lock structure... */
1120        return ((struct file_lock *) kmalloc(sizeof(struct file_lock),
1121                                                GFP_KERNEL));
1122}
1123
1124/*
1125 * Initialize a new lock from an existing file_lock structure.
1126 */
1127static struct file_lock *locks_init_lock(struct file_lock *new,
1128                                         struct file_lock *fl)
1129{
1130        if (new) {
1131                memset(new, 0, sizeof(*new));
1132                new->fl_owner = fl->fl_owner;
1133                new->fl_pid = fl->fl_pid;
1134                new->fl_file = fl->fl_file;
1135                new->fl_flags = fl->fl_flags;
1136                new->fl_type = fl->fl_type;
1137                new->fl_start = fl->fl_start;
1138                new->fl_end = fl->fl_end;
1139                new->fl_notify = fl->fl_notify;
1140                new->fl_insert = fl->fl_insert;
1141                new->fl_remove = fl->fl_remove;
1142                new->fl_u = fl->fl_u;
1143        }
1144        return new;
1145}
1146
1147/* Insert file lock fl into an inode's lock list at the position indicated
1148 * by pos. At the same time add the lock to the global file lock list.
1149 */
1150static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
1151{
1152        fl->fl_nextlink = file_lock_table;
1153        fl->fl_prevlink = NULL;
1154        if (file_lock_table != NULL)
1155                file_lock_table->fl_prevlink = fl;
1156        file_lock_table = fl;
1157        fl->fl_next = *pos;     /* insert into file's list */
1158        *pos = fl;
1159
1160        if (fl->fl_insert)
1161                fl->fl_insert(fl);
1162
1163        return;
1164}
1165
1166/* Delete a lock and free it.
1167 * First remove our lock from the active lock lists. Then call
1168 * locks_wake_up_blocks() to wake up processes that are blocked
1169 * waiting for this lock. Finally free the lock structure.
1170 */
1171static void locks_delete_lock(struct file_lock **thisfl_p, unsigned int wait)
1172{
1173        struct file_lock *thisfl;
1174        struct file_lock *prevfl;
1175        struct file_lock *nextfl;
1176        
1177        thisfl = *thisfl_p;
1178        *thisfl_p = thisfl->fl_next;
1179
1180        prevfl = thisfl->fl_prevlink;
1181        nextfl = thisfl->fl_nextlink;
1182
1183        if (nextfl != NULL)
1184                nextfl->fl_prevlink = prevfl;
1185
1186        if (prevfl != NULL)
1187                prevfl->fl_nextlink = nextfl;
1188        else
1189                file_lock_table = nextfl;
1190
1191        if (thisfl->fl_remove)
1192                thisfl->fl_remove(thisfl);
1193        
1194        locks_wake_up_blocks(thisfl, wait);
1195        locks_free_lock(thisfl);
1196
1197        return;
1198}
1199
1200
1201static char *lock_get_status(struct file_lock *fl, int id, char *pfx)
1202{
1203        static char temp[155];
1204        char *p = temp;
1205        struct inode *inode;
1206
1207        inode = fl->fl_file->f_dentry->d_inode;
1208
1209        p += sprintf(p, "%d:%s ", id, pfx);
1210        if (fl->fl_flags & FL_POSIX) {
1211                p += sprintf(p, "%6s %s ",
1212                             (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
1213                             (IS_MANDLOCK(inode) &&
1214                              (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ?
1215                             "MANDATORY" : "ADVISORY ");
1216        }
1217        else {
1218                p += sprintf(p, "FLOCK  ADVISORY  ");
1219        }
1220        p += sprintf(p, "%s ", (fl->fl_type == F_RDLCK) ? "READ " : "WRITE");
1221        p += sprintf(p, "%d %s:%ld %ld %ld ",
1222                     fl->fl_pid,
1223                     kdevname(inode->i_dev), inode->i_ino, fl->fl_start,
1224                     fl->fl_end);
1225        sprintf(p, "%08lx %08lx %08lx %08lx %08lx\n",
1226                (long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink,
1227                (long)fl->fl_next, (long)fl->fl_nextblock);
1228        return (temp);
1229}
1230
1231static inline int copy_lock_status(char *p, char **q, off_t pos, int len,
1232                                   off_t offset, off_t length)
1233{
1234        off_t i;
1235
1236        i = pos - offset;
1237        if (i > 0) {
1238                if (i >= length) {
1239                        i = len + length - i;
1240                        memcpy(*q, p, i);
1241                        *q += i;
1242                        return (0);
1243                }
1244                if (i < len) {
1245                        p += len - i;
1246                }
1247                else
1248                        i = len;
1249                memcpy(*q, p, i);
1250                *q += i;
1251        }
1252        
1253        return (1);
1254}
1255
1256int get_locks_status(char *buffer, char **start, off_t offset, off_t length)
1257{
1258        struct file_lock *fl;
1259        struct file_lock *bfl;
1260        char *p;
1261        char *q = buffer;
1262        off_t i, len, pos = 0;
1263
1264        for (fl = file_lock_table, i = 1; fl != NULL; fl = fl->fl_nextlink, i++) {
1265                p = lock_get_status(fl, i, "");
1266                len = strlen(p);
1267                pos += len;
1268                if (!copy_lock_status(p, &q, pos, len, offset, length))
1269                        goto done;
1270                if ((bfl = fl->fl_nextblock) == NULL)
1271                        continue;
1272                do {
1273                        p = lock_get_status(bfl, i, " ->");
1274                        len = strlen(p);
1275                        pos += len;
1276                        if (!copy_lock_status(p, &q, pos, len, offset, length))
1277                                goto done;
1278                } while ((bfl = bfl->fl_nextblock) != fl);
1279        }
1280done:
1281        if (q != buffer)
1282                *start = buffer;
1283        return (q - buffer);
1284}
1285
1286
1287
1288
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.