linux/ipc/sem.c
<<
>>
Prefs
   1/*
   2 * linux/ipc/sem.c
   3 * Copyright (C) 1992 Krishna Balasubramanian
   4 * Copyright (C) 1995 Eric Schenk, Bruno Haible
   5 *
   6 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
   7 *
   8 * SMP-threaded, sysctl's added
   9 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
  10 * Enforced range limit on SEM_UNDO
  11 * (c) 2001 Red Hat Inc
  12 * Lockless wakeup
  13 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
  14 * Further wakeup optimizations, documentation
  15 * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
  16 *
  17 * support for audit of ipc object properties and permission changes
  18 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  19 *
  20 * namespaces support
  21 * OpenVZ, SWsoft Inc.
  22 * Pavel Emelianov <xemul@openvz.org>
  23 *
  24 * Implementation notes: (May 2010)
  25 * This file implements System V semaphores.
  26 *
  27 * User space visible behavior:
  28 * - FIFO ordering for semop() operations (just FIFO, not starvation
  29 *   protection)
  30 * - multiple semaphore operations that alter the same semaphore in
  31 *   one semop() are handled.
  32 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
  33 *   SETALL calls.
  34 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
  35 * - undo adjustments at process exit are limited to 0..SEMVMX.
  36 * - namespace are supported.
  37 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
  38 *   to /proc/sys/kernel/sem.
  39 * - statistics about the usage are reported in /proc/sysvipc/sem.
  40 *
  41 * Internals:
  42 * - scalability:
  43 *   - all global variables are read-mostly.
  44 *   - semop() calls and semctl(RMID) are synchronized by RCU.
  45 *   - most operations do write operations (actually: spin_lock calls) to
  46 *     the per-semaphore array structure.
  47 *   Thus: Perfect SMP scaling between independent semaphore arrays.
  48 *         If multiple semaphores in one array are used, then cache line
  49 *         trashing on the semaphore array spinlock will limit the scaling.
  50 * - semncnt and semzcnt are calculated on demand in count_semcnt()
  51 * - the task that performs a successful semop() scans the list of all
  52 *   sleeping tasks and completes any pending operations that can be fulfilled.
  53 *   Semaphores are actively given to waiting tasks (necessary for FIFO).
  54 *   (see update_queue())
  55 * - To improve the scalability, the actual wake-up calls are performed after
  56 *   dropping all locks. (see wake_up_sem_queue_prepare(),
  57 *   wake_up_sem_queue_do())
  58 * - All work is done by the waker, the woken up task does not have to do
  59 *   anything - not even acquiring a lock or dropping a refcount.
  60 * - A woken up task may not even touch the semaphore array anymore, it may
  61 *   have been destroyed already by a semctl(RMID).
  62 * - The synchronizations between wake-ups due to a timeout/signal and a
  63 *   wake-up due to a completed semaphore operation is achieved by using an
  64 *   intermediate state (IN_WAKEUP).
  65 * - UNDO values are stored in an array (one per process and per
  66 *   semaphore array, lazily allocated). For backwards compatibility, multiple
  67 *   modes for the UNDO variables are supported (per process, per thread)
  68 *   (see copy_semundo, CLONE_SYSVSEM)
  69 * - There are two lists of the pending operations: a per-array list
  70 *   and per-semaphore list (stored in the array). This allows to achieve FIFO
  71 *   ordering without always scanning all pending operations.
  72 *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
  73 */
  74
  75#include <linux/slab.h>
  76#include <linux/spinlock.h>
  77#include <linux/init.h>
  78#include <linux/proc_fs.h>
  79#include <linux/time.h>
  80#include <linux/security.h>
  81#include <linux/syscalls.h>
  82#include <linux/audit.h>
  83#include <linux/capability.h>
  84#include <linux/seq_file.h>
  85#include <linux/rwsem.h>
  86#include <linux/nsproxy.h>
  87#include <linux/ipc_namespace.h>
  88
  89#include <linux/uaccess.h>
  90#include "util.h"
  91
  92/* One semaphore structure for each semaphore in the system. */
  93struct sem {
  94        int     semval;         /* current value */
  95        int     sempid;         /* pid of last operation */
  96        spinlock_t      lock;   /* spinlock for fine-grained semtimedop */
  97        struct list_head pending_alter; /* pending single-sop operations */
  98                                        /* that alter the semaphore */
  99        struct list_head pending_const; /* pending single-sop operations */
 100                                        /* that do not alter the semaphore*/
 101        time_t  sem_otime;      /* candidate for sem_otime */
 102} ____cacheline_aligned_in_smp;
 103
 104/* One queue for each sleeping process in the system. */
 105struct sem_queue {
 106        struct list_head        list;    /* queue of pending operations */
 107        struct task_struct      *sleeper; /* this process */
 108        struct sem_undo         *undo;   /* undo structure */
 109        int                     pid;     /* process id of requesting process */
 110        int                     status;  /* completion status of operation */
 111        struct sembuf           *sops;   /* array of pending operations */
 112        struct sembuf           *blocking; /* the operation that blocked */
 113        int                     nsops;   /* number of operations */
 114        int                     alter;   /* does *sops alter the array? */
 115};
 116
 117/* Each task has a list of undo requests. They are executed automatically
 118 * when the process exits.
 119 */
 120struct sem_undo {
 121        struct list_head        list_proc;      /* per-process list: *
 122                                                 * all undos from one process
 123                                                 * rcu protected */
 124        struct rcu_head         rcu;            /* rcu struct for sem_undo */
 125        struct sem_undo_list    *ulp;           /* back ptr to sem_undo_list */
 126        struct list_head        list_id;        /* per semaphore array list:
 127                                                 * all undos for one array */
 128        int                     semid;          /* semaphore set identifier */
 129        short                   *semadj;        /* array of adjustments */
 130                                                /* one per semaphore */
 131};
 132
 133/* sem_undo_list controls shared access to the list of sem_undo structures
 134 * that may be shared among all a CLONE_SYSVSEM task group.
 135 */
 136struct sem_undo_list {
 137        atomic_t                refcnt;
 138        spinlock_t              lock;
 139        struct list_head        list_proc;
 140};
 141
 142
 143#define sem_ids(ns)     ((ns)->ids[IPC_SEM_IDS])
 144
 145#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid)
 146
 147static int newary(struct ipc_namespace *, struct ipc_params *);
 148static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
 149#ifdef CONFIG_PROC_FS
 150static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 151#endif
 152
 153#define SEMMSL_FAST     256 /* 512 bytes on stack */
 154#define SEMOPM_FAST     64  /* ~ 372 bytes on stack */
 155
 156/*
 157 * Locking:
 158 *      sem_undo.id_next,
 159 *      sem_array.complex_count,
 160 *      sem_array.pending{_alter,_cont},
 161 *      sem_array.sem_undo: global sem_lock() for read/write
 162 *      sem_undo.proc_next: only "current" is allowed to read/write that field.
 163 *
 164 *      sem_array.sem_base[i].pending_{const,alter}:
 165 *              global or semaphore sem_lock() for read/write
 166 */
 167
 168#define sc_semmsl       sem_ctls[0]
 169#define sc_semmns       sem_ctls[1]
 170#define sc_semopm       sem_ctls[2]
 171#define sc_semmni       sem_ctls[3]
 172
 173void sem_init_ns(struct ipc_namespace *ns)
 174{
 175        ns->sc_semmsl = SEMMSL;
 176        ns->sc_semmns = SEMMNS;
 177        ns->sc_semopm = SEMOPM;
 178        ns->sc_semmni = SEMMNI;
 179        ns->used_sems = 0;
 180        ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
 181}
 182
 183#ifdef CONFIG_IPC_NS
 184void sem_exit_ns(struct ipc_namespace *ns)
 185{
 186        free_ipcs(ns, &sem_ids(ns), freeary);
 187        idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
 188}
 189#endif
 190
 191void __init sem_init(void)
 192{
 193        sem_init_ns(&init_ipc_ns);
 194        ipc_init_proc_interface("sysvipc/sem",
 195                                "       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
 196                                IPC_SEM_IDS, sysvipc_sem_proc_show);
 197}
 198
 199/**
 200 * unmerge_queues - unmerge queues, if possible.
 201 * @sma: semaphore array
 202 *
 203 * The function unmerges the wait queues if complex_count is 0.
 204 * It must be called prior to dropping the global semaphore array lock.
 205 */
 206static void unmerge_queues(struct sem_array *sma)
 207{
 208        struct sem_queue *q, *tq;
 209
 210        /* complex operations still around? */
 211        if (sma->complex_count)
 212                return;
 213        /*
 214         * We will switch back to simple mode.
 215         * Move all pending operation back into the per-semaphore
 216         * queues.
 217         */
 218        list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
 219                struct sem *curr;
 220                curr = &sma->sem_base[q->sops[0].sem_num];
 221
 222                list_add_tail(&q->list, &curr->pending_alter);
 223        }
 224        INIT_LIST_HEAD(&sma->pending_alter);
 225}
 226
 227/**
 228 * merge_queues - merge single semop queues into global queue
 229 * @sma: semaphore array
 230 *
 231 * This function merges all per-semaphore queues into the global queue.
 232 * It is necessary to achieve FIFO ordering for the pending single-sop
 233 * operations when a multi-semop operation must sleep.
 234 * Only the alter operations must be moved, the const operations can stay.
 235 */
 236static void merge_queues(struct sem_array *sma)
 237{
 238        int i;
 239        for (i = 0; i < sma->sem_nsems; i++) {
 240                struct sem *sem = sma->sem_base + i;
 241
 242                list_splice_init(&sem->pending_alter, &sma->pending_alter);
 243        }
 244}
 245
 246static void sem_rcu_free(struct rcu_head *head)
 247{
 248        struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
 249        struct sem_array *sma = ipc_rcu_to_struct(p);
 250
 251        security_sem_free(sma);
 252        ipc_rcu_free(head);
 253}
 254
 255/*
 256 * Wait until all currently ongoing simple ops have completed.
 257 * Caller must own sem_perm.lock.
 258 * New simple ops cannot start, because simple ops first check
 259 * that sem_perm.lock is free.
 260 * that a) sem_perm.lock is free and b) complex_count is 0.
 261 */
 262static void sem_wait_array(struct sem_array *sma)
 263{
 264        int i;
 265        struct sem *sem;
 266
 267        if (sma->complex_count)  {
 268                /* The thread that increased sma->complex_count waited on
 269                 * all sem->lock locks. Thus we don't need to wait again.
 270                 */
 271                return;
 272        }
 273
 274        for (i = 0; i < sma->sem_nsems; i++) {
 275                sem = sma->sem_base + i;
 276                spin_unlock_wait(&sem->lock);
 277        }
 278}
 279
 280/*
 281 * If the request contains only one semaphore operation, and there are
 282 * no complex transactions pending, lock only the semaphore involved.
 283 * Otherwise, lock the entire semaphore array, since we either have
 284 * multiple semaphores in our own semops, or we need to look at
 285 * semaphores from other pending complex operations.
 286 */
 287static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 288                              int nsops)
 289{
 290        struct sem *sem;
 291
 292        if (nsops != 1) {
 293                /* Complex operation - acquire a full lock */
 294                ipc_lock_object(&sma->sem_perm);
 295
 296                /* And wait until all simple ops that are processed
 297                 * right now have dropped their locks.
 298                 */
 299                sem_wait_array(sma);
 300                return -1;
 301        }
 302
 303        /*
 304         * Only one semaphore affected - try to optimize locking.
 305         * The rules are:
 306         * - optimized locking is possible if no complex operation
 307         *   is either enqueued or processed right now.
 308         * - The test for enqueued complex ops is simple:
 309         *      sma->complex_count != 0
 310         * - Testing for complex ops that are processed right now is
 311         *   a bit more difficult. Complex ops acquire the full lock
 312         *   and first wait that the running simple ops have completed.
 313         *   (see above)
 314         *   Thus: If we own a simple lock and the global lock is free
 315         *      and complex_count is now 0, then it will stay 0 and
 316         *      thus just locking sem->lock is sufficient.
 317         */
 318        sem = sma->sem_base + sops->sem_num;
 319
 320        if (sma->complex_count == 0) {
 321                /*
 322                 * It appears that no complex operation is around.
 323                 * Acquire the per-semaphore lock.
 324                 */
 325                spin_lock(&sem->lock);
 326
 327                /* Then check that the global lock is free */
 328                if (!spin_is_locked(&sma->sem_perm.lock)) {
 329                        /*
 330                         * The ipc object lock check must be visible on all
 331                         * cores before rechecking the complex count.  Otherwise
 332                         * we can race with  another thread that does:
 333                         *      complex_count++;
 334                         *      spin_unlock(sem_perm.lock);
 335                         */
 336                        smp_rmb();
 337
 338                        /*
 339                         * Now repeat the test of complex_count:
 340                         * It can't change anymore until we drop sem->lock.
 341                         * Thus: if is now 0, then it will stay 0.
 342                         */
 343                        if (sma->complex_count == 0) {
 344                                /* fast path successful! */
 345                                return sops->sem_num;
 346                        }
 347                }
 348                spin_unlock(&sem->lock);
 349        }
 350
 351        /* slow path: acquire the full lock */
 352        ipc_lock_object(&sma->sem_perm);
 353
 354        if (sma->complex_count == 0) {
 355                /* False alarm:
 356                 * There is no complex operation, thus we can switch
 357                 * back to the fast path.
 358                 */
 359                spin_lock(&sem->lock);
 360                ipc_unlock_object(&sma->sem_perm);
 361                return sops->sem_num;
 362        } else {
 363                /* Not a false alarm, thus complete the sequence for a
 364                 * full lock.
 365                 */
 366                sem_wait_array(sma);
 367                return -1;
 368        }
 369}
 370
 371static inline void sem_unlock(struct sem_array *sma, int locknum)
 372{
 373        if (locknum == -1) {
 374                unmerge_queues(sma);
 375                ipc_unlock_object(&sma->sem_perm);
 376        } else {
 377                struct sem *sem = sma->sem_base + locknum;
 378                spin_unlock(&sem->lock);
 379        }
 380}
 381
 382/*
 383 * sem_lock_(check_) routines are called in the paths where the rwsem
 384 * is not held.
 385 *
 386 * The caller holds the RCU read lock.
 387 */
 388static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
 389                        int id, struct sembuf *sops, int nsops, int *locknum)
 390{
 391        struct kern_ipc_perm *ipcp;
 392        struct sem_array *sma;
 393
 394        ipcp = ipc_obtain_object(&sem_ids(ns), id);
 395        if (IS_ERR(ipcp))
 396                return ERR_CAST(ipcp);
 397
 398        sma = container_of(ipcp, struct sem_array, sem_perm);
 399        *locknum = sem_lock(sma, sops, nsops);
 400
 401        /* ipc_rmid() may have already freed the ID while sem_lock
 402         * was spinning: verify that the structure is still valid
 403         */
 404        if (ipc_valid_object(ipcp))
 405                return container_of(ipcp, struct sem_array, sem_perm);
 406
 407        sem_unlock(sma, *locknum);
 408        return ERR_PTR(-EINVAL);
 409}
 410
 411static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
 412{
 413        struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id);
 414
 415        if (IS_ERR(ipcp))
 416                return ERR_CAST(ipcp);
 417
 418        return container_of(ipcp, struct sem_array, sem_perm);
 419}
 420
 421static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
 422                                                        int id)
 423{
 424        struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
 425
 426        if (IS_ERR(ipcp))
 427                return ERR_CAST(ipcp);
 428
 429        return container_of(ipcp, struct sem_array, sem_perm);
 430}
 431
 432static inline void sem_lock_and_putref(struct sem_array *sma)
 433{
 434        sem_lock(sma, NULL, -1);
 435        ipc_rcu_putref(sma, ipc_rcu_free);
 436}
 437
 438static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
 439{
 440        ipc_rmid(&sem_ids(ns), &s->sem_perm);
 441}
 442
 443/*
 444 * Lockless wakeup algorithm:
 445 * Without the check/retry algorithm a lockless wakeup is possible:
 446 * - queue.status is initialized to -EINTR before blocking.
 447 * - wakeup is performed by
 448 *      * unlinking the queue entry from the pending list
 449 *      * setting queue.status to IN_WAKEUP
 450 *        This is the notification for the blocked thread that a
 451 *        result value is imminent.
 452 *      * call wake_up_process
 453 *      * set queue.status to the final value.
 454 * - the previously blocked thread checks queue.status:
 455 *      * if it's IN_WAKEUP, then it must wait until the value changes
 456 *      * if it's not -EINTR, then the operation was completed by
 457 *        update_queue. semtimedop can return queue.status without
 458 *        performing any operation on the sem array.
 459 *      * otherwise it must acquire the spinlock and check what's up.
 460 *
 461 * The two-stage algorithm is necessary to protect against the following
 462 * races:
 463 * - if queue.status is set after wake_up_process, then the woken up idle
 464 *   thread could race forward and try (and fail) to acquire sma->lock
 465 *   before update_queue had a chance to set queue.status
 466 * - if queue.status is written before wake_up_process and if the
 467 *   blocked process is woken up by a signal between writing
 468 *   queue.status and the wake_up_process, then the woken up
 469 *   process could return from semtimedop and die by calling
 470 *   sys_exit before wake_up_process is called. Then wake_up_process
 471 *   will oops, because the task structure is already invalid.
 472 *   (yes, this happened on s390 with sysv msg).
 473 *
 474 */
 475#define IN_WAKEUP       1
 476
 477/**
 478 * newary - Create a new semaphore set
 479 * @ns: namespace
 480 * @params: ptr to the structure that contains key, semflg and nsems
 481 *
 482 * Called with sem_ids.rwsem held (as a writer)
 483 */
 484static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 485{
 486        int id;
 487        int retval;
 488        struct sem_array *sma;
 489        int size;
 490        key_t key = params->key;
 491        int nsems = params->u.nsems;
 492        int semflg = params->flg;
 493        int i;
 494
 495        if (!nsems)
 496                return -EINVAL;
 497        if (ns->used_sems + nsems > ns->sc_semmns)
 498                return -ENOSPC;
 499
 500        size = sizeof(*sma) + nsems * sizeof(struct sem);
 501        sma = ipc_rcu_alloc(size);
 502        if (!sma)
 503                return -ENOMEM;
 504
 505        memset(sma, 0, size);
 506
 507        sma->sem_perm.mode = (semflg & S_IRWXUGO);
 508        sma->sem_perm.key = key;
 509
 510        sma->sem_perm.security = NULL;
 511        retval = security_sem_alloc(sma);
 512        if (retval) {
 513                ipc_rcu_putref(sma, ipc_rcu_free);
 514                return retval;
 515        }
 516
 517        sma->sem_base = (struct sem *) &sma[1];
 518
 519        for (i = 0; i < nsems; i++) {
 520                INIT_LIST_HEAD(&sma->sem_base[i].pending_alter);
 521                INIT_LIST_HEAD(&sma->sem_base[i].pending_const);
 522                spin_lock_init(&sma->sem_base[i].lock);
 523        }
 524
 525        sma->complex_count = 0;
 526        INIT_LIST_HEAD(&sma->pending_alter);
 527        INIT_LIST_HEAD(&sma->pending_const);
 528        INIT_LIST_HEAD(&sma->list_id);
 529        sma->sem_nsems = nsems;
 530        sma->sem_ctime = get_seconds();
 531
 532        id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
 533        if (id < 0) {
 534                ipc_rcu_putref(sma, sem_rcu_free);
 535                return id;
 536        }
 537        ns->used_sems += nsems;
 538
 539        sem_unlock(sma, -1);
 540        rcu_read_unlock();
 541
 542        return sma->sem_perm.id;
 543}
 544
 545
 546/*
 547 * Called with sem_ids.rwsem and ipcp locked.
 548 */
 549static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
 550{
 551        struct sem_array *sma;
 552
 553        sma = container_of(ipcp, struct sem_array, sem_perm);
 554        return security_sem_associate(sma, semflg);
 555}
 556
 557/*
 558 * Called with sem_ids.rwsem and ipcp locked.
 559 */
 560static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
 561                                struct ipc_params *params)
 562{
 563        struct sem_array *sma;
 564
 565        sma = container_of(ipcp, struct sem_array, sem_perm);
 566        if (params->u.nsems > sma->sem_nsems)
 567                return -EINVAL;
 568
 569        return 0;
 570}
 571
 572SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
 573{
 574        struct ipc_namespace *ns;
 575        static const struct ipc_ops sem_ops = {
 576                .getnew = newary,
 577                .associate = sem_security,
 578                .more_checks = sem_more_checks,
 579        };
 580        struct ipc_params sem_params;
 581
 582        ns = current->nsproxy->ipc_ns;
 583
 584        if (nsems < 0 || nsems > ns->sc_semmsl)
 585                return -EINVAL;
 586
 587        sem_params.key = key;
 588        sem_params.flg = semflg;
 589        sem_params.u.nsems = nsems;
 590
 591        return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 592}
 593
 594/**
 595 * perform_atomic_semop - Perform (if possible) a semaphore operation
 596 * @sma: semaphore array
 597 * @q: struct sem_queue that describes the operation
 598 *
 599 * Returns 0 if the operation was possible.
 600 * Returns 1 if the operation is impossible, the caller must sleep.
 601 * Negative values are error codes.
 602 */
 603static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
 604{
 605        int result, sem_op, nsops, pid;
 606        struct sembuf *sop;
 607        struct sem *curr;
 608        struct sembuf *sops;
 609        struct sem_undo *un;
 610
 611        sops = q->sops;
 612        nsops = q->nsops;
 613        un = q->undo;
 614
 615        for (sop = sops; sop < sops + nsops; sop++) {
 616                curr = sma->sem_base + sop->sem_num;
 617                sem_op = sop->sem_op;
 618                result = curr->semval;
 619
 620                if (!sem_op && result)
 621                        goto would_block;
 622
 623                result += sem_op;
 624                if (result < 0)
 625                        goto would_block;
 626                if (result > SEMVMX)
 627                        goto out_of_range;
 628
 629                if (sop->sem_flg & SEM_UNDO) {
 630                        int undo = un->semadj[sop->sem_num] - sem_op;
 631                        /* Exceeding the undo range is an error. */
 632                        if (undo < (-SEMAEM - 1) || undo > SEMAEM)
 633                                goto out_of_range;
 634                        un->semadj[sop->sem_num] = undo;
 635                }
 636
 637                curr->semval = result;
 638        }
 639
 640        sop--;
 641        pid = q->pid;
 642        while (sop >= sops) {
 643                sma->sem_base[sop->sem_num].sempid = pid;
 644                sop--;
 645        }
 646
 647        return 0;
 648
 649out_of_range:
 650        result = -ERANGE;
 651        goto undo;
 652
 653would_block:
 654        q->blocking = sop;
 655
 656        if (sop->sem_flg & IPC_NOWAIT)
 657                result = -EAGAIN;
 658        else
 659                result = 1;
 660
 661undo:
 662        sop--;
 663        while (sop >= sops) {
 664                sem_op = sop->sem_op;
 665                sma->sem_base[sop->sem_num].semval -= sem_op;
 666                if (sop->sem_flg & SEM_UNDO)
 667                        un->semadj[sop->sem_num] += sem_op;
 668                sop--;
 669        }
 670
 671        return result;
 672}
 673
 674/** wake_up_sem_queue_prepare(q, error): Prepare wake-up
 675 * @q: queue entry that must be signaled
 676 * @error: Error value for the signal
 677 *
 678 * Prepare the wake-up of the queue entry q.
 679 */
 680static void wake_up_sem_queue_prepare(struct list_head *pt,
 681                                struct sem_queue *q, int error)
 682{
 683        if (list_empty(pt)) {
 684                /*
 685                 * Hold preempt off so that we don't get preempted and have the
 686                 * wakee busy-wait until we're scheduled back on.
 687                 */
 688                preempt_disable();
 689        }
 690        q->status = IN_WAKEUP;
 691        q->pid = error;
 692
 693        list_add_tail(&q->list, pt);
 694}
 695
 696/**
 697 * wake_up_sem_queue_do - do the actual wake-up
 698 * @pt: list of tasks to be woken up
 699 *
 700 * Do the actual wake-up.
 701 * The function is called without any locks held, thus the semaphore array
 702 * could be destroyed already and the tasks can disappear as soon as the
 703 * status is set to the actual return code.
 704 */
 705static void wake_up_sem_queue_do(struct list_head *pt)
 706{
 707        struct sem_queue *q, *t;
 708        int did_something;
 709
 710        did_something = !list_empty(pt);
 711        list_for_each_entry_safe(q, t, pt, list) {
 712                wake_up_process(q->sleeper);
 713                /* q can disappear immediately after writing q->status. */
 714                smp_wmb();
 715                q->status = q->pid;
 716        }
 717        if (did_something)
 718                preempt_enable();
 719}
 720
 721static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
 722{
 723        list_del(&q->list);
 724        if (q->nsops > 1)
 725                sma->complex_count--;
 726}
 727
 728/** check_restart(sma, q)
 729 * @sma: semaphore array
 730 * @q: the operation that just completed
 731 *
 732 * update_queue is O(N^2) when it restarts scanning the whole queue of
 733 * waiting operations. Therefore this function checks if the restart is
 734 * really necessary. It is called after a previously waiting operation
 735 * modified the array.
 736 * Note that wait-for-zero operations are handled without restart.
 737 */
 738static int check_restart(struct sem_array *sma, struct sem_queue *q)
 739{
 740        /* pending complex alter operations are too difficult to analyse */
 741        if (!list_empty(&sma->pending_alter))
 742                return 1;
 743
 744        /* we were a sleeping complex operation. Too difficult */
 745        if (q->nsops > 1)
 746                return 1;
 747
 748        /* It is impossible that someone waits for the new value:
 749         * - complex operations always restart.
 750         * - wait-for-zero are handled seperately.
 751         * - q is a previously sleeping simple operation that
 752         *   altered the array. It must be a decrement, because
 753         *   simple increments never sleep.
 754         * - If there are older (higher priority) decrements
 755         *   in the queue, then they have observed the original
 756         *   semval value and couldn't proceed. The operation
 757         *   decremented to value - thus they won't proceed either.
 758         */
 759        return 0;
 760}
 761
 762/**
 763 * wake_const_ops - wake up non-alter tasks
 764 * @sma: semaphore array.
 765 * @semnum: semaphore that was modified.
 766 * @pt: list head for the tasks that must be woken up.
 767 *
 768 * wake_const_ops must be called after a semaphore in a semaphore array
 769 * was set to 0. If complex const operations are pending, wake_const_ops must
 770 * be called with semnum = -1, as well as with the number of each modified
 771 * semaphore.
 772 * The tasks that must be woken up are added to @pt. The return code
 773 * is stored in q->pid.
 774 * The function returns 1 if at least one operation was completed successfully.
 775 */
 776static int wake_const_ops(struct sem_array *sma, int semnum,
 777                                struct list_head *pt)
 778{
 779        struct sem_queue *q;
 780        struct list_head *walk;
 781        struct list_head *pending_list;
 782        int semop_completed = 0;
 783
 784        if (semnum == -1)
 785                pending_list = &sma->pending_const;
 786        else
 787                pending_list = &sma->sem_base[semnum].pending_const;
 788
 789        walk = pending_list->next;
 790        while (walk != pending_list) {
 791                int error;
 792
 793                q = container_of(walk, struct sem_queue, list);
 794                walk = walk->next;
 795
 796                error = perform_atomic_semop(sma, q);
 797
 798                if (error <= 0) {
 799                        /* operation completed, remove from queue & wakeup */
 800
 801                        unlink_queue(sma, q);
 802
 803                        wake_up_sem_queue_prepare(pt, q, error);
 804                        if (error == 0)
 805                                semop_completed = 1;
 806                }
 807        }
 808        return semop_completed;
 809}
 810
 811/**
 812 * do_smart_wakeup_zero - wakeup all wait for zero tasks
 813 * @sma: semaphore array
 814 * @sops: operations that were performed
 815 * @nsops: number of operations
 816 * @pt: list head of the tasks that must be woken up.
 817 *
 818 * Checks all required queue for wait-for-zero operations, based
 819 * on the actual changes that were performed on the semaphore array.
 820 * The function returns 1 if at least one operation was completed successfully.
 821 */
 822static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
 823                                        int nsops, struct list_head *pt)
 824{
 825        int i;
 826        int semop_completed = 0;
 827        int got_zero = 0;
 828
 829        /* first: the per-semaphore queues, if known */
 830        if (sops) {
 831                for (i = 0; i < nsops; i++) {
 832                        int num = sops[i].sem_num;
 833
 834                        if (sma->sem_base[num].semval == 0) {
 835                                got_zero = 1;
 836                                semop_completed |= wake_const_ops(sma, num, pt);
 837                        }
 838                }
 839        } else {
 840                /*
 841                 * No sops means modified semaphores not known.
 842                 * Assume all were changed.
 843                 */
 844                for (i = 0; i < sma->sem_nsems; i++) {
 845                        if (sma->sem_base[i].semval == 0) {
 846                                got_zero = 1;
 847                                semop_completed |= wake_const_ops(sma, i, pt);
 848                        }
 849                }
 850        }
 851        /*
 852         * If one of the modified semaphores got 0,
 853         * then check the global queue, too.
 854         */
 855        if (got_zero)
 856                semop_completed |= wake_const_ops(sma, -1, pt);
 857
 858        return semop_completed;
 859}
 860
 861
 862/**
 863 * update_queue - look for tasks that can be completed.
 864 * @sma: semaphore array.
 865 * @semnum: semaphore that was modified.
 866 * @pt: list head for the tasks that must be woken up.
 867 *
 868 * update_queue must be called after a semaphore in a semaphore array
 869 * was modified. If multiple semaphores were modified, update_queue must
 870 * be called with semnum = -1, as well as with the number of each modified
 871 * semaphore.
 872 * The tasks that must be woken up are added to @pt. The return code
 873 * is stored in q->pid.
 874 * The function internally checks if const operations can now succeed.
 875 *
 876 * The function return 1 if at least one semop was completed successfully.
 877 */
 878static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
 879{
 880        struct sem_queue *q;
 881        struct list_head *walk;
 882        struct list_head *pending_list;
 883        int semop_completed = 0;
 884
 885        if (semnum == -1)
 886                pending_list = &sma->pending_alter;
 887        else
 888                pending_list = &sma->sem_base[semnum].pending_alter;
 889
 890again:
 891        walk = pending_list->next;
 892        while (walk != pending_list) {
 893                int error, restart;
 894
 895                q = container_of(walk, struct sem_queue, list);
 896                walk = walk->next;
 897
 898                /* If we are scanning the single sop, per-semaphore list of
 899                 * one semaphore and that semaphore is 0, then it is not
 900                 * necessary to scan further: simple increments
 901                 * that affect only one entry succeed immediately and cannot
 902                 * be in the  per semaphore pending queue, and decrements
 903                 * cannot be successful if the value is already 0.
 904                 */
 905                if (semnum != -1 && sma->sem_base[semnum].semval == 0)
 906                        break;
 907
 908                error = perform_atomic_semop(sma, q);
 909
 910                /* Does q->sleeper still need to sleep? */
 911                if (error > 0)
 912                        continue;
 913
 914                unlink_queue(sma, q);
 915
 916                if (error) {
 917                        restart = 0;
 918                } else {
 919                        semop_completed = 1;
 920                        do_smart_wakeup_zero(sma, q->sops, q->nsops, pt);
 921                        restart = check_restart(sma, q);
 922                }
 923
 924                wake_up_sem_queue_prepare(pt, q, error);
 925                if (restart)
 926                        goto again;
 927        }
 928        return semop_completed;
 929}
 930
 931/**
 932 * set_semotime - set sem_otime
 933 * @sma: semaphore array
 934 * @sops: operations that modified the array, may be NULL
 935 *
 936 * sem_otime is replicated to avoid cache line trashing.
 937 * This function sets one instance to the current time.
 938 */
 939static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 940{
 941        if (sops == NULL) {
 942                sma->sem_base[0].sem_otime = get_seconds();
 943        } else {
 944                sma->sem_base[sops[0].sem_num].sem_otime =
 945                                                        get_seconds();
 946        }
 947}
 948
 949/**
 950 * do_smart_update - optimized update_queue
 951 * @sma: semaphore array
 952 * @sops: operations that were performed
 953 * @nsops: number of operations
 954 * @otime: force setting otime
 955 * @pt: list head of the tasks that must be woken up.
 956 *
 957 * do_smart_update() does the required calls to update_queue and wakeup_zero,
 958 * based on the actual changes that were performed on the semaphore array.
 959 * Note that the function does not do the actual wake-up: the caller is
 960 * responsible for calling wake_up_sem_queue_do(@pt).
 961 * It is safe to perform this call after dropping all locks.
 962 */
 963static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
 964                        int otime, struct list_head *pt)
 965{
 966        int i;
 967
 968        otime |= do_smart_wakeup_zero(sma, sops, nsops, pt);
 969
 970        if (!list_empty(&sma->pending_alter)) {
 971                /* semaphore array uses the global queue - just process it. */
 972                otime |= update_queue(sma, -1, pt);
 973        } else {
 974                if (!sops) {
 975                        /*
 976                         * No sops, thus the modified semaphores are not
 977                         * known. Check all.
 978                         */
 979                        for (i = 0; i < sma->sem_nsems; i++)
 980                                otime |= update_queue(sma, i, pt);
 981                } else {
 982                        /*
 983                         * Check the semaphores that were increased:
 984                         * - No complex ops, thus all sleeping ops are
 985                         *   decrease.
 986                         * - if we decreased the value, then any sleeping
 987                         *   semaphore ops wont be able to run: If the
 988                         *   previous value was too small, then the new
 989                         *   value will be too small, too.
 990                         */
 991                        for (i = 0; i < nsops; i++) {
 992                                if (sops[i].sem_op > 0) {
 993                                        otime |= update_queue(sma,
 994                                                        sops[i].sem_num, pt);
 995                                }
 996                        }
 997                }
 998        }
 999        if (otime)
1000                set_semotime(sma, sops);
1001}
1002
1003/*
1004 * check_qop: Test if a queued operation sleeps on the semaphore semnum
1005 */
1006static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q,
1007                        bool count_zero)
1008{
1009        struct sembuf *sop = q->blocking;
1010
1011        /*
1012         * Linux always (since 0.99.10) reported a task as sleeping on all
1013         * semaphores. This violates SUS, therefore it was changed to the
1014         * standard compliant behavior.
1015         * Give the administrators a chance to notice that an application
1016         * might misbehave because it relies on the Linux behavior.
1017         */
1018        pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"
1019                        "The task %s (%d) triggered the difference, watch for misbehavior.\n",
1020                        current->comm, task_pid_nr(current));
1021
1022        if (sop->sem_num != semnum)
1023                return 0;
1024
1025        if (count_zero && sop->sem_op == 0)
1026                return 1;
1027        if (!count_zero && sop->sem_op < 0)
1028                return 1;
1029
1030        return 0;
1031}
1032
1033/* The following counts are associated to each semaphore:
1034 *   semncnt        number of tasks waiting on semval being nonzero
1035 *   semzcnt        number of tasks waiting on semval being zero
1036 *
1037 * Per definition, a task waits only on the semaphore of the first semop
1038 * that cannot proceed, even if additional operation would block, too.
1039 */
1040static int count_semcnt(struct sem_array *sma, ushort semnum,
1041                        bool count_zero)
1042{
1043        struct list_head *l;
1044        struct sem_queue *q;
1045        int semcnt;
1046
1047        semcnt = 0;
1048        /* First: check the simple operations. They are easy to evaluate */
1049        if (count_zero)
1050                l = &sma->sem_base[semnum].pending_const;
1051        else
1052                l = &sma->sem_base[semnum].pending_alter;
1053
1054        list_for_each_entry(q, l, list) {
1055                /* all task on a per-semaphore list sleep on exactly
1056                 * that semaphore
1057                 */
1058                semcnt++;
1059        }
1060
1061        /* Then: check the complex operations. */
1062        list_for_each_entry(q, &sma->pending_alter, list) {
1063                semcnt += check_qop(sma, semnum, q, count_zero);
1064        }
1065        if (count_zero) {
1066                list_for_each_entry(q, &sma->pending_const, list) {
1067                        semcnt += check_qop(sma, semnum, q, count_zero);
1068                }
1069        }
1070        return semcnt;
1071}
1072
1073/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
1074 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
1075 * remains locked on exit.
1076 */
1077static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1078{
1079        struct sem_undo *un, *tu;
1080        struct sem_queue *q, *tq;
1081        struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
1082        struct list_head tasks;
1083        int i;
1084
1085        /* Free the existing undo structures for this semaphore set.  */
1086        ipc_assert_locked_object(&sma->sem_perm);
1087        list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
1088                list_del(&un->list_id);
1089                spin_lock(&un->ulp->lock);
1090                un->semid = -1;
1091                list_del_rcu(&un->list_proc);
1092                spin_unlock(&un->ulp->lock);
1093                kfree_rcu(un, rcu);
1094        }
1095
1096        /* Wake up all pending processes and let them fail with EIDRM. */
1097        INIT_LIST_HEAD(&tasks);
1098        list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
1099                unlink_queue(sma, q);
1100                wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
1101        }
1102
1103        list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
1104                unlink_queue(sma, q);
1105                wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
1106        }
1107        for (i = 0; i < sma->sem_nsems; i++) {
1108                struct sem *sem = sma->sem_base + i;
1109                list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
1110                        unlink_queue(sma, q);
1111                        wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
1112                }
1113                list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
1114                        unlink_queue(sma, q);
1115                        wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
1116                }
1117        }
1118
1119        /* Remove the semaphore set from the IDR */
1120        sem_rmid(ns, sma);
1121        sem_unlock(sma, -1);
1122        rcu_read_unlock();
1123
1124        wake_up_sem_queue_do(&tasks);
1125        ns->used_sems -= sma->sem_nsems;
1126        ipc_rcu_putref(sma, sem_rcu_free);
1127}
1128
1129static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
1130{
1131        switch (version) {
1132        case IPC_64:
1133                return copy_to_user(buf, in, sizeof(*in));
1134        case IPC_OLD:
1135            {
1136                struct semid_ds out;
1137
1138                memset(&out, 0, sizeof(out));
1139
1140                ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
1141
1142                out.sem_otime   = in->sem_otime;
1143                out.sem_ctime   = in->sem_ctime;
1144                out.sem_nsems   = in->sem_nsems;
1145
1146                return copy_to_user(buf, &out, sizeof(out));
1147            }
1148        default:
1149                return -EINVAL;
1150        }
1151}
1152
1153static time_t get_semotime(struct sem_array *sma)
1154{
1155        int i;
1156        time_t res;
1157
1158        res = sma->sem_base[0].sem_otime;
1159        for (i = 1; i < sma->sem_nsems; i++) {
1160                time_t to = sma->sem_base[i].sem_otime;
1161
1162                if (to > res)
1163                        res = to;
1164        }
1165        return res;
1166}
1167
1168static int semctl_nolock(struct ipc_namespace *ns, int semid,
1169                         int cmd, int version, void __user *p)
1170{
1171        int err;
1172        struct sem_array *sma;
1173
1174        switch (cmd) {
1175        case IPC_INFO:
1176        case SEM_INFO:
1177        {
1178                struct seminfo seminfo;
1179                int max_id;
1180
1181                err = security_sem_semctl(NULL, cmd);
1182                if (err)
1183                        return err;
1184
1185                memset(&seminfo, 0, sizeof(seminfo));
1186                seminfo.semmni = ns->sc_semmni;
1187                seminfo.semmns = ns->sc_semmns;
1188                seminfo.semmsl = ns->sc_semmsl;
1189                seminfo.semopm = ns->sc_semopm;
1190                seminfo.semvmx = SEMVMX;
1191                seminfo.semmnu = SEMMNU;
1192                seminfo.semmap = SEMMAP;
1193                seminfo.semume = SEMUME;
1194                down_read(&sem_ids(ns).rwsem);
1195                if (cmd == SEM_INFO) {
1196                        seminfo.semusz = sem_ids(ns).in_use;
1197                        seminfo.semaem = ns->used_sems;
1198                } else {
1199                        seminfo.semusz = SEMUSZ;
1200                        seminfo.semaem = SEMAEM;
1201                }
1202                max_id = ipc_get_maxid(&sem_ids(ns));
1203                up_read(&sem_ids(ns).rwsem);
1204                if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
1205                        return -EFAULT;
1206                return (max_id < 0) ? 0 : max_id;
1207        }
1208        case IPC_STAT:
1209        case SEM_STAT:
1210        {
1211                struct semid64_ds tbuf;
1212                int id = 0;
1213
1214                memset(&tbuf, 0, sizeof(tbuf));
1215
1216                rcu_read_lock();
1217                if (cmd == SEM_STAT) {
1218                        sma = sem_obtain_object(ns, semid);
1219                        if (IS_ERR(sma)) {
1220                                err = PTR_ERR(sma);
1221                                goto out_unlock;
1222                        }
1223                        id = sma->sem_perm.id;
1224                } else {
1225                        sma = sem_obtain_object_check(ns, semid);
1226                        if (IS_ERR(sma)) {
1227                                err = PTR_ERR(sma);
1228                                goto out_unlock;
1229                        }
1230                }
1231
1232                err = -EACCES;
1233                if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
1234                        goto out_unlock;
1235
1236                err = security_sem_semctl(sma, cmd);
1237                if (err)
1238                        goto out_unlock;
1239
1240                kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
1241                tbuf.sem_otime = get_semotime(sma);
1242                tbuf.sem_ctime = sma->sem_ctime;
1243                tbuf.sem_nsems = sma->sem_nsems;
1244                rcu_read_unlock();
1245                if (copy_semid_to_user(p, &tbuf, version))
1246                        return -EFAULT;
1247                return id;
1248        }
1249        default:
1250                return -EINVAL;
1251        }
1252out_unlock:
1253        rcu_read_unlock();
1254        return err;
1255}
1256
1257static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1258                unsigned long arg)
1259{
1260        struct sem_undo *un;
1261        struct sem_array *sma;
1262        struct sem *curr;
1263        int err;
1264        struct list_head tasks;
1265        int val;
1266#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
1267        /* big-endian 64bit */
1268        val = arg >> 32;
1269#else
1270        /* 32bit or little-endian 64bit */
1271        val = arg;
1272#endif
1273
1274        if (val > SEMVMX || val < 0)
1275                return -ERANGE;
1276
1277        INIT_LIST_HEAD(&tasks);
1278
1279        rcu_read_lock();
1280        sma = sem_obtain_object_check(ns, semid);
1281        if (IS_ERR(sma)) {
1282                rcu_read_unlock();
1283                return PTR_ERR(sma);
1284        }
1285
1286        if (semnum < 0 || semnum >= sma->sem_nsems) {
1287                rcu_read_unlock();
1288                return -EINVAL;
1289        }
1290
1291
1292        if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
1293                rcu_read_unlock();
1294                return -EACCES;
1295        }
1296
1297        err = security_sem_semctl(sma, SETVAL);
1298        if (err) {
1299                rcu_read_unlock();
1300                return -EACCES;
1301        }
1302
1303        sem_lock(sma, NULL, -1);
1304
1305        if (!ipc_valid_object(&sma->sem_perm)) {
1306                sem_unlock(sma, -1);
1307                rcu_read_unlock();
1308                return -EIDRM;
1309        }
1310
1311        curr = &sma->sem_base[semnum];
1312
1313        ipc_assert_locked_object(&sma->sem_perm);
1314        list_for_each_entry(un, &sma->list_id, list_id)
1315                un->semadj[semnum] = 0;
1316
1317        curr->semval = val;
1318        curr->sempid = task_tgid_vnr(current);
1319        sma->sem_ctime = get_seconds();
1320        /* maybe some queued-up processes were waiting for this */
1321        do_smart_update(sma, NULL, 0, 0, &tasks);
1322        sem_unlock(sma, -1);
1323        rcu_read_unlock();
1324        wake_up_sem_queue_do(&tasks);
1325        return 0;
1326}
1327
1328static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1329                int cmd, void __user *p)
1330{
1331        struct sem_array *sma;
1332        struct sem *curr;
1333        int err, nsems;
1334        ushort fast_sem_io[SEMMSL_FAST];
1335        ushort *sem_io = fast_sem_io;
1336        struct list_head tasks;
1337
1338        INIT_LIST_HEAD(&tasks);
1339
1340        rcu_read_lock();
1341        sma = sem_obtain_object_check(ns, semid);
1342        if (IS_ERR(sma)) {
1343                rcu_read_unlock();
1344                return PTR_ERR(sma);
1345        }
1346
1347        nsems = sma->sem_nsems;
1348
1349        err = -EACCES;
1350        if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
1351                goto out_rcu_wakeup;
1352
1353        err = security_sem_semctl(sma, cmd);
1354        if (err)
1355                goto out_rcu_wakeup;
1356
1357        err = -EACCES;
1358        switch (cmd) {
1359        case GETALL:
1360        {
1361                ushort __user *array = p;
1362                int i;
1363
1364                sem_lock(sma, NULL, -1);
1365                if (!ipc_valid_object(&sma->sem_perm)) {
1366                        err = -EIDRM;
1367                        goto out_unlock;
1368                }
1369                if (nsems > SEMMSL_FAST) {
1370                        if (!ipc_rcu_getref(sma)) {
1371                                err = -EIDRM;
1372                                goto out_unlock;
1373                        }
1374                        sem_unlock(sma, -1);
1375                        rcu_read_unlock();
1376                        sem_io = ipc_alloc(sizeof(ushort)*nsems);
1377                        if (sem_io == NULL) {
1378                                ipc_rcu_putref(sma, ipc_rcu_free);
1379                                return -ENOMEM;
1380                        }
1381
1382                        rcu_read_lock();
1383                        sem_lock_and_putref(sma);
1384                        if (!ipc_valid_object(&sma->sem_perm)) {
1385                                err = -EIDRM;
1386                                goto out_unlock;
1387                        }
1388                }
1389                for (i = 0; i < sma->sem_nsems; i++)
1390                        sem_io[i] = sma->sem_base[i].semval;
1391                sem_unlock(sma, -1);
1392                rcu_read_unlock();
1393                err = 0;
1394                if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
1395                        err = -EFAULT;
1396                goto out_free;
1397        }
1398        case SETALL:
1399        {
1400                int i;
1401                struct sem_undo *un;
1402
1403                if (!ipc_rcu_getref(sma)) {
1404                        err = -EIDRM;
1405                        goto out_rcu_wakeup;
1406                }
1407                rcu_read_unlock();
1408
1409                if (nsems > SEMMSL_FAST) {
1410                        sem_io = ipc_alloc(sizeof(ushort)*nsems);
1411                        if (sem_io == NULL) {
1412                                ipc_rcu_putref(sma, ipc_rcu_free);
1413                                return -ENOMEM;
1414                        }
1415                }
1416
1417                if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1418                        ipc_rcu_putref(sma, ipc_rcu_free);
1419                        err = -EFAULT;
1420                        goto out_free;
1421                }
1422
1423                for (i = 0; i < nsems; i++) {
1424                        if (sem_io[i] > SEMVMX) {
1425                                ipc_rcu_putref(sma, ipc_rcu_free);
1426                                err = -ERANGE;
1427                                goto out_free;
1428                        }
1429                }
1430                rcu_read_lock();
1431                sem_lock_and_putref(sma);
1432                if (!ipc_valid_object(&sma->sem_perm)) {
1433                        err = -EIDRM;
1434                        goto out_unlock;
1435                }
1436
1437                for (i = 0; i < nsems; i++)
1438                        sma->sem_base[i].semval = sem_io[i];
1439
1440                ipc_assert_locked_object(&sma->sem_perm);
1441                list_for_each_entry(un, &sma->list_id, list_id) {
1442                        for (i = 0; i < nsems; i++)
1443                                un->semadj[i] = 0;
1444                }
1445                sma->sem_ctime = get_seconds();
1446                /* maybe some queued-up processes were waiting for this */
1447                do_smart_update(sma, NULL, 0, 0, &tasks);
1448                err = 0;
1449                goto out_unlock;
1450        }
1451        /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
1452        }
1453        err = -EINVAL;
1454        if (semnum < 0 || semnum >= nsems)
1455                goto out_rcu_wakeup;
1456
1457        sem_lock(sma, NULL, -1);
1458        if (!ipc_valid_object(&sma->sem_perm)) {
1459                err = -EIDRM;
1460                goto out_unlock;
1461        }
1462        curr = &sma->sem_base[semnum];
1463
1464        switch (cmd) {
1465        case GETVAL:
1466                err = curr->semval;
1467                goto out_unlock;
1468        case GETPID:
1469                err = curr->sempid;
1470                goto out_unlock;
1471        case GETNCNT:
1472                err = count_semcnt(sma, semnum, 0);
1473                goto out_unlock;
1474        case GETZCNT:
1475                err = count_semcnt(sma, semnum, 1);
1476                goto out_unlock;
1477        }
1478
1479out_unlock:
1480        sem_unlock(sma, -1);
1481out_rcu_wakeup:
1482        rcu_read_unlock();
1483        wake_up_sem_queue_do(&tasks);
1484out_free:
1485        if (sem_io != fast_sem_io)
1486                ipc_free(sem_io, sizeof(ushort)*nsems);
1487        return err;
1488}
1489
1490static inline unsigned long
1491copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
1492{
1493        switch (version) {
1494        case IPC_64:
1495                if (copy_from_user(out, buf, sizeof(*out)))
1496                        return -EFAULT;
1497                return 0;
1498        case IPC_OLD:
1499            {
1500                struct semid_ds tbuf_old;
1501
1502                if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
1503                        return -EFAULT;
1504
1505                out->sem_perm.uid       = tbuf_old.sem_perm.uid;
1506                out->sem_perm.gid       = tbuf_old.sem_perm.gid;
1507                out->sem_perm.mode      = tbuf_old.sem_perm.mode;
1508
1509                return 0;
1510            }
1511        default:
1512                return -EINVAL;
1513        }
1514}
1515
1516/*
1517 * This function handles some semctl commands which require the rwsem
1518 * to be held in write mode.
1519 * NOTE: no locks must be held, the rwsem is taken inside this function.
1520 */
1521static int semctl_down(struct ipc_namespace *ns, int semid,
1522                       int cmd, int version, void __user *p)
1523{
1524        struct sem_array *sma;
1525        int err;
1526        struct semid64_ds semid64;
1527        struct kern_ipc_perm *ipcp;
1528
1529        if (cmd == IPC_SET) {
1530                if (copy_semid_from_user(&semid64, p, version))
1531                        return -EFAULT;
1532        }
1533
1534        down_write(&sem_ids(ns).rwsem);
1535        rcu_read_lock();
1536
1537        ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
1538                                      &semid64.sem_perm, 0);
1539        if (IS_ERR(ipcp)) {
1540                err = PTR_ERR(ipcp);
1541                goto out_unlock1;
1542        }
1543
1544        sma = container_of(ipcp, struct sem_array, sem_perm);
1545
1546        err = security_sem_semctl(sma, cmd);
1547        if (err)
1548                goto out_unlock1;
1549
1550        switch (cmd) {
1551        case IPC_RMID:
1552                sem_lock(sma, NULL, -1);
1553                /* freeary unlocks the ipc object and rcu */
1554                freeary(ns, ipcp);
1555                goto out_up;
1556        case IPC_SET:
1557                sem_lock(sma, NULL, -1);
1558                err = ipc_update_perm(&semid64.sem_perm, ipcp);
1559                if (err)
1560                        goto out_unlock0;
1561                sma->sem_ctime = get_seconds();
1562                break;
1563        default:
1564                err = -EINVAL;
1565                goto out_unlock1;
1566        }
1567
1568out_unlock0:
1569        sem_unlock(sma, -1);
1570out_unlock1:
1571        rcu_read_unlock();
1572out_up:
1573        up_write(&sem_ids(ns).rwsem);
1574        return err;
1575}
1576
1577SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1578{
1579        int version;
1580        struct ipc_namespace *ns;
1581        void __user *p = (void __user *)arg;
1582
1583        if (semid < 0)
1584                return -EINVAL;
1585
1586        version = ipc_parse_version(&cmd);
1587        ns = current->nsproxy->ipc_ns;
1588
1589        switch (cmd) {
1590        case IPC_INFO:
1591        case SEM_INFO:
1592        case IPC_STAT:
1593        case SEM_STAT:
1594                return semctl_nolock(ns, semid, cmd, version, p);
1595        case GETALL:
1596        case GETVAL:
1597        case GETPID:
1598        case GETNCNT:
1599        case GETZCNT:
1600        case SETALL:
1601                return semctl_main(ns, semid, semnum, cmd, p);
1602        case SETVAL:
1603                return semctl_setval(ns, semid, semnum, arg);
1604        case IPC_RMID:
1605        case IPC_SET:
1606                return semctl_down(ns, semid, cmd, version, p);
1607        default:
1608                return -EINVAL;
1609        }
1610}
1611
1612/* If the task doesn't already have a undo_list, then allocate one
1613 * here.  We guarantee there is only one thread using this undo list,
1614 * and current is THE ONE
1615 *
1616 * If this allocation and assignment succeeds, but later
1617 * portions of this code fail, there is no need to free the sem_undo_list.
1618 * Just let it stay associated with the task, and it'll be freed later
1619 * at exit time.
1620 *
1621 * This can block, so callers must hold no locks.
1622 */
1623static inline int get_undo_list(struct sem_undo_list **undo_listp)
1624{
1625        struct sem_undo_list *undo_list;
1626
1627        undo_list = current->sysvsem.undo_list;
1628        if (!undo_list) {
1629                undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
1630                if (undo_list == NULL)
1631                        return -ENOMEM;
1632                spin_lock_init(&undo_list->lock);
1633                atomic_set(&undo_list->refcnt, 1);
1634                INIT_LIST_HEAD(&undo_list->list_proc);
1635
1636                current->sysvsem.undo_list = undo_list;
1637        }
1638        *undo_listp = undo_list;
1639        return 0;
1640}
1641
1642static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
1643{
1644        struct sem_undo *un;
1645
1646        list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
1647                if (un->semid == semid)
1648                        return un;
1649        }
1650        return NULL;
1651}
1652
1653static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
1654{
1655        struct sem_undo *un;
1656
1657        assert_spin_locked(&ulp->lock);
1658
1659        un = __lookup_undo(ulp, semid);
1660        if (un) {
1661                list_del_rcu(&un->list_proc);
1662                list_add_rcu(&un->list_proc, &ulp->list_proc);
1663        }
1664        return un;
1665}
1666
1667/**
1668 * find_alloc_undo - lookup (and if not present create) undo array
1669 * @ns: namespace
1670 * @semid: semaphore array id
1671 *
1672 * The function looks up (and if not present creates) the undo structure.
1673 * The size of the undo structure depends on the size of the semaphore
1674 * array, thus the alloc path is not that straightforward.
1675 * Lifetime-rules: sem_undo is rcu-protected, on success, the function
1676 * performs a rcu_read_lock().
1677 */
1678static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1679{
1680        struct sem_array *sma;
1681        struct sem_undo_list *ulp;
1682        struct sem_undo *un, *new;
1683        int nsems, error;
1684
1685        error = get_undo_list(&ulp);
1686        if (error)
1687                return ERR_PTR(error);
1688
1689        rcu_read_lock();
1690        spin_lock(&ulp->lock);
1691        un = lookup_undo(ulp, semid);
1692        spin_unlock(&ulp->lock);
1693        if (likely(un != NULL))
1694                goto out;
1695
1696        /* no undo structure around - allocate one. */
1697        /* step 1: figure out the size of the semaphore array */
1698        sma = sem_obtain_object_check(ns, semid);
1699        if (IS_ERR(sma)) {
1700                rcu_read_unlock();
1701                return ERR_CAST(sma);
1702        }
1703
1704        nsems = sma->sem_nsems;
1705        if (!ipc_rcu_getref(sma)) {
1706                rcu_read_unlock();
1707                un = ERR_PTR(-EIDRM);
1708                goto out;
1709        }
1710        rcu_read_unlock();
1711
1712        /* step 2: allocate new undo structure */
1713        new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1714        if (!new) {
1715                ipc_rcu_putref(sma, ipc_rcu_free);
1716                return ERR_PTR(-ENOMEM);
1717        }
1718
1719        /* step 3: Acquire the lock on semaphore array */
1720        rcu_read_lock();
1721        sem_lock_and_putref(sma);
1722        if (!ipc_valid_object(&sma->sem_perm)) {
1723                sem_unlock(sma, -1);
1724                rcu_read_unlock();
1725                kfree(new);
1726                un = ERR_PTR(-EIDRM);
1727                goto out;
1728        }
1729        spin_lock(&ulp->lock);
1730
1731        /*
1732         * step 4: check for races: did someone else allocate the undo struct?
1733         */
1734        un = lookup_undo(ulp, semid);
1735        if (un) {
1736                kfree(new);
1737                goto success;
1738        }
1739        /* step 5: initialize & link new undo structure */
1740        new->semadj = (short *) &new[1];
1741        new->ulp = ulp;
1742        new->semid = semid;
1743        assert_spin_locked(&ulp->lock);
1744        list_add_rcu(&new->list_proc, &ulp->list_proc);
1745        ipc_assert_locked_object(&sma->sem_perm);
1746        list_add(&new->list_id, &sma->list_id);
1747        un = new;
1748
1749success:
1750        spin_unlock(&ulp->lock);
1751        sem_unlock(sma, -1);
1752out:
1753        return un;
1754}
1755
1756
1757/**
1758 * get_queue_result - retrieve the result code from sem_queue
1759 * @q: Pointer to queue structure
1760 *
1761 * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
1762 * q->status, then we must loop until the value is replaced with the final
1763 * value: This may happen if a task is woken up by an unrelated event (e.g.
1764 * signal) and in parallel the task is woken up by another task because it got
1765 * the requested semaphores.
1766 *
1767 * The function can be called with or without holding the semaphore spinlock.
1768 */
1769static int get_queue_result(struct sem_queue *q)
1770{
1771        int error;
1772
1773        error = q->status;
1774        while (unlikely(error == IN_WAKEUP)) {
1775                cpu_relax();
1776                error = q->status;
1777        }
1778
1779        return error;
1780}
1781
1782SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
1783                unsigned, nsops, const struct timespec __user *, timeout)
1784{
1785        int error = -EINVAL;
1786        struct sem_array *sma;
1787        struct sembuf fast_sops[SEMOPM_FAST];
1788        struct sembuf *sops = fast_sops, *sop;
1789        struct sem_undo *un;
1790        int undos = 0, alter = 0, max, locknum;
1791        struct sem_queue queue;
1792        unsigned long jiffies_left = 0;
1793        struct ipc_namespace *ns;
1794        struct list_head tasks;
1795
1796        ns = current->nsproxy->ipc_ns;
1797
1798        if (nsops < 1 || semid < 0)
1799                return -EINVAL;
1800        if (nsops > ns->sc_semopm)
1801                return -E2BIG;
1802        if (nsops > SEMOPM_FAST) {
1803                sops = kmalloc(sizeof(*sops)*nsops, GFP_KERNEL);
1804                if (sops == NULL)
1805                        return -ENOMEM;
1806        }
1807        if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
1808                error =  -EFAULT;
1809                goto out_free;
1810        }
1811        if (timeout) {
1812                struct timespec _timeout;
1813                if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) {
1814                        error = -EFAULT;
1815                        goto out_free;
1816                }
1817                if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 ||
1818                        _timeout.tv_nsec >= 1000000000L) {
1819                        error = -EINVAL;
1820                        goto out_free;
1821                }
1822                jiffies_left = timespec_to_jiffies(&_timeout);
1823        }
1824        max = 0;
1825        for (sop = sops; sop < sops + nsops; sop++) {
1826                if (sop->sem_num >= max)
1827                        max = sop->sem_num;
1828                if (sop->sem_flg & SEM_UNDO)
1829                        undos = 1;
1830                if (sop->sem_op != 0)
1831                        alter = 1;
1832        }
1833
1834        INIT_LIST_HEAD(&tasks);
1835
1836        if (undos) {
1837                /* On success, find_alloc_undo takes the rcu_read_lock */
1838                un = find_alloc_undo(ns, semid);
1839                if (IS_ERR(un)) {
1840                        error = PTR_ERR(un);
1841                        goto out_free;
1842                }
1843        } else {
1844                un = NULL;
1845                rcu_read_lock();
1846        }
1847
1848        sma = sem_obtain_object_check(ns, semid);
1849        if (IS_ERR(sma)) {
1850                rcu_read_unlock();
1851                error = PTR_ERR(sma);
1852                goto out_free;
1853        }
1854
1855        error = -EFBIG;
1856        if (max >= sma->sem_nsems)
1857                goto out_rcu_wakeup;
1858
1859        error = -EACCES;
1860        if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
1861                goto out_rcu_wakeup;
1862
1863        error = security_sem_semop(sma, sops, nsops, alter);
1864        if (error)
1865                goto out_rcu_wakeup;
1866
1867        error = -EIDRM;
1868        locknum = sem_lock(sma, sops, nsops);
1869        /*
1870         * We eventually might perform the following check in a lockless
1871         * fashion, considering ipc_valid_object() locking constraints.
1872         * If nsops == 1 and there is no contention for sem_perm.lock, then
1873         * only a per-semaphore lock is held and it's OK to proceed with the
1874         * check below. More details on the fine grained locking scheme
1875         * entangled here and why it's RMID race safe on comments at sem_lock()
1876         */
1877        if (!ipc_valid_object(&sma->sem_perm))
1878                goto out_unlock_free;
1879        /*
1880         * semid identifiers are not unique - find_alloc_undo may have
1881         * allocated an undo structure, it was invalidated by an RMID
1882         * and now a new array with received the same id. Check and fail.
1883         * This case can be detected checking un->semid. The existence of
1884         * "un" itself is guaranteed by rcu.
1885         */
1886        if (un && un->semid == -1)
1887                goto out_unlock_free;
1888
1889        queue.sops = sops;
1890        queue.nsops = nsops;
1891        queue.undo = un;
1892        queue.pid = task_tgid_vnr(current);
1893        queue.alter = alter;
1894
1895        error = perform_atomic_semop(sma, &queue);
1896        if (error == 0) {
1897                /* If the operation was successful, then do
1898                 * the required updates.
1899                 */
1900                if (alter)
1901                        do_smart_update(sma, sops, nsops, 1, &tasks);
1902                else
1903                        set_semotime(sma, sops);
1904        }
1905        if (error <= 0)
1906                goto out_unlock_free;
1907
1908        /* We need to sleep on this operation, so we put the current
1909         * task into the pending queue and go to sleep.
1910         */
1911
1912        if (nsops == 1) {
1913                struct sem *curr;
1914                curr = &sma->sem_base[sops->sem_num];
1915
1916                if (alter) {
1917                        if (sma->complex_count) {
1918                                list_add_tail(&queue.list,
1919                                                &sma->pending_alter);
1920                        } else {
1921
1922                                list_add_tail(&queue.list,
1923                                                &curr->pending_alter);
1924                        }
1925                } else {
1926                        list_add_tail(&queue.list, &curr->pending_const);
1927                }
1928        } else {
1929                if (!sma->complex_count)
1930                        merge_queues(sma);
1931
1932                if (alter)
1933                        list_add_tail(&queue.list, &sma->pending_alter);
1934                else
1935                        list_add_tail(&queue.list, &sma->pending_const);
1936
1937                sma->complex_count++;
1938        }
1939
1940        queue.status = -EINTR;
1941        queue.sleeper = current;
1942
1943sleep_again:
1944        current->state = TASK_INTERRUPTIBLE;
1945        sem_unlock(sma, locknum);
1946        rcu_read_unlock();
1947
1948        if (timeout)
1949                jiffies_left = schedule_timeout(jiffies_left);
1950        else
1951                schedule();
1952
1953        error = get_queue_result(&queue);
1954
1955        if (error != -EINTR) {
1956                /* fast path: update_queue already obtained all requested
1957                 * resources.
1958                 * Perform a smp_mb(): User space could assume that semop()
1959                 * is a memory barrier: Without the mb(), the cpu could
1960                 * speculatively read in user space stale data that was
1961                 * overwritten by the previous owner of the semaphore.
1962                 */
1963                smp_mb();
1964
1965                goto out_free;
1966        }
1967
1968        rcu_read_lock();
1969        sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum);
1970
1971        /*
1972         * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
1973         */
1974        error = get_queue_result(&queue);
1975
1976        /*
1977         * Array removed? If yes, leave without sem_unlock().
1978         */
1979        if (IS_ERR(sma)) {
1980                rcu_read_unlock();
1981                goto out_free;
1982        }
1983
1984
1985        /*
1986         * If queue.status != -EINTR we are woken up by another process.
1987         * Leave without unlink_queue(), but with sem_unlock().
1988         */
1989        if (error != -EINTR)
1990                goto out_unlock_free;
1991
1992        /*
1993         * If an interrupt occurred we have to clean up the queue
1994         */
1995        if (timeout && jiffies_left == 0)
1996                error = -EAGAIN;
1997
1998        /*
1999         * If the wakeup was spurious, just retry
2000         */
2001        if (error == -EINTR && !signal_pending(current))
2002                goto sleep_again;
2003
2004        unlink_queue(sma, &queue);
2005
2006out_unlock_free:
2007        sem_unlock(sma, locknum);
2008out_rcu_wakeup:
2009        rcu_read_unlock();
2010        wake_up_sem_queue_do(&tasks);
2011out_free:
2012        if (sops != fast_sops)
2013                kfree(sops);
2014        return error;
2015}
2016
2017SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
2018                unsigned, nsops)
2019{
2020        return sys_semtimedop(semid, tsops, nsops, NULL);
2021}
2022
2023/* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
2024 * parent and child tasks.
2025 */
2026
2027int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
2028{
2029        struct sem_undo_list *undo_list;
2030        int error;
2031
2032        if (clone_flags & CLONE_SYSVSEM) {
2033                error = get_undo_list(&undo_list);
2034                if (error)
2035                        return error;
2036                atomic_inc(&undo_list->refcnt);
2037                tsk->sysvsem.undo_list = undo_list;
2038        } else
2039                tsk->sysvsem.undo_list = NULL;
2040
2041        return 0;
2042}
2043
2044/*
2045 * add semadj values to semaphores, free undo structures.
2046 * undo structures are not freed when semaphore arrays are destroyed
2047 * so some of them may be out of date.
2048 * IMPLEMENTATION NOTE: There is some confusion over whether the
2049 * set of adjustments that needs to be done should be done in an atomic
2050 * manner or not. That is, if we are attempting to decrement the semval
2051 * should we queue up and wait until we can do so legally?
2052 * The original implementation attempted to do this (queue and wait).
2053 * The current implementation does not do so. The POSIX standard
2054 * and SVID should be consulted to determine what behavior is mandated.
2055 */
2056void exit_sem(struct task_struct *tsk)
2057{
2058        struct sem_undo_list *ulp;
2059
2060        ulp = tsk->sysvsem.undo_list;
2061        if (!ulp)
2062                return;
2063        tsk->sysvsem.undo_list = NULL;
2064
2065        if (!atomic_dec_and_test(&ulp->refcnt))
2066                return;
2067
2068        for (;;) {
2069                struct sem_array *sma;
2070                struct sem_undo *un;
2071                struct list_head tasks;
2072                int semid, i;
2073
2074                rcu_read_lock();
2075                un = list_entry_rcu(ulp->list_proc.next,
2076                                    struct sem_undo, list_proc);
2077                if (&un->list_proc == &ulp->list_proc)
2078                        semid = -1;
2079                 else
2080                        semid = un->semid;
2081
2082                if (semid == -1) {
2083                        rcu_read_unlock();
2084                        break;
2085                }
2086
2087                sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
2088                /* exit_sem raced with IPC_RMID, nothing to do */
2089                if (IS_ERR(sma)) {
2090                        rcu_read_unlock();
2091                        continue;
2092                }
2093
2094                sem_lock(sma, NULL, -1);
2095                /* exit_sem raced with IPC_RMID, nothing to do */
2096                if (!ipc_valid_object(&sma->sem_perm)) {
2097                        sem_unlock(sma, -1);
2098                        rcu_read_unlock();
2099                        continue;
2100                }
2101                un = __lookup_undo(ulp, semid);
2102                if (un == NULL) {
2103                        /* exit_sem raced with IPC_RMID+semget() that created
2104                         * exactly the same semid. Nothing to do.
2105                         */
2106                        sem_unlock(sma, -1);
2107                        rcu_read_unlock();
2108                        continue;
2109                }
2110
2111                /* remove un from the linked lists */
2112                ipc_assert_locked_object(&sma->sem_perm);
2113                list_del(&un->list_id);
2114
2115                spin_lock(&ulp->lock);
2116                list_del_rcu(&un->list_proc);
2117                spin_unlock(&ulp->lock);
2118
2119                /* perform adjustments registered in un */
2120                for (i = 0; i < sma->sem_nsems; i++) {
2121                        struct sem *semaphore = &sma->sem_base[i];
2122                        if (un->semadj[i]) {
2123                                semaphore->semval += un->semadj[i];
2124                                /*
2125                                 * Range checks of the new semaphore value,
2126                                 * not defined by sus:
2127                                 * - Some unices ignore the undo entirely
2128                                 *   (e.g. HP UX 11i 11.22, Tru64 V5.1)
2129                                 * - some cap the value (e.g. FreeBSD caps
2130                                 *   at 0, but doesn't enforce SEMVMX)
2131                                 *
2132                                 * Linux caps the semaphore value, both at 0
2133                                 * and at SEMVMX.
2134                                 *
2135                                 *      Manfred <manfred@colorfullife.com>
2136                                 */
2137                                if (semaphore->semval < 0)
2138                                        semaphore->semval = 0;
2139                                if (semaphore->semval > SEMVMX)
2140                                        semaphore->semval = SEMVMX;
2141                                semaphore->sempid = task_tgid_vnr(current);
2142                        }
2143                }
2144                /* maybe some queued-up processes were waiting for this */
2145                INIT_LIST_HEAD(&tasks);
2146                do_smart_update(sma, NULL, 0, 1, &tasks);
2147                sem_unlock(sma, -1);
2148                rcu_read_unlock();
2149                wake_up_sem_queue_do(&tasks);
2150
2151                kfree_rcu(un, rcu);
2152        }
2153        kfree(ulp);
2154}
2155
2156#ifdef CONFIG_PROC_FS
2157static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2158{
2159        struct user_namespace *user_ns = seq_user_ns(s);
2160        struct sem_array *sma = it;
2161        time_t sem_otime;
2162
2163        /*
2164         * The proc interface isn't aware of sem_lock(), it calls
2165         * ipc_lock_object() directly (in sysvipc_find_ipc).
2166         * In order to stay compatible with sem_lock(), we must wait until
2167         * all simple semop() calls have left their critical regions.
2168         */
2169        sem_wait_array(sma);
2170
2171        sem_otime = get_semotime(sma);
2172
2173        return seq_printf(s,
2174                          "%10d %10d  %4o %10u %5u %5u %5u %5u %10lu %10lu\n",
2175                          sma->sem_perm.key,
2176                          sma->sem_perm.id,
2177                          sma->sem_perm.mode,
2178                          sma->sem_nsems,
2179                          from_kuid_munged(user_ns, sma->sem_perm.uid),
2180                          from_kgid_munged(user_ns, sma->sem_perm.gid),
2181                          from_kuid_munged(user_ns, sma->sem_perm.cuid),
2182                          from_kgid_munged(user_ns, sma->sem_perm.cgid),
2183                          sem_otime,
2184                          sma->sem_ctime);
2185}
2186#endif
2187
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.