linux/kernel/rcuclassic.c
<<
>>
Prefs
   1/*
   2 * Read-Copy Update mechanism for mutual exclusion
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright IBM Corporation, 2001
  19 *
  20 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
  21 *          Manfred Spraul <manfred@colorfullife.com>
  22 *
  23 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
  24 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  25 * Papers:
  26 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
  27 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  28 *
  29 * For detailed explanation of Read-Copy Update mechanism see -
  30 *              Documentation/RCU
  31 *
  32 */
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/init.h>
  36#include <linux/spinlock.h>
  37#include <linux/smp.h>
  38#include <linux/rcupdate.h>
  39#include <linux/interrupt.h>
  40#include <linux/sched.h>
  41#include <asm/atomic.h>
  42#include <linux/bitops.h>
  43#include <linux/module.h>
  44#include <linux/completion.h>
  45#include <linux/moduleparam.h>
  46#include <linux/percpu.h>
  47#include <linux/notifier.h>
  48#include <linux/cpu.h>
  49#include <linux/mutex.h>
  50
  51#ifdef CONFIG_DEBUG_LOCK_ALLOC
  52static struct lock_class_key rcu_lock_key;
  53struct lockdep_map rcu_lock_map =
  54        STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
  55EXPORT_SYMBOL_GPL(rcu_lock_map);
  56#endif
  57
  58
  59/* Definition for rcupdate control block. */
  60static struct rcu_ctrlblk rcu_ctrlblk = {
  61        .cur = -300,
  62        .completed = -300,
  63        .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
  64        .cpumask = CPU_MASK_NONE,
  65};
  66static struct rcu_ctrlblk rcu_bh_ctrlblk = {
  67        .cur = -300,
  68        .completed = -300,
  69        .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
  70        .cpumask = CPU_MASK_NONE,
  71};
  72
  73DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
  74DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
  75
  76static int blimit = 10;
  77static int qhimark = 10000;
  78static int qlowmark = 100;
  79
  80#ifdef CONFIG_SMP
  81static void force_quiescent_state(struct rcu_data *rdp,
  82                        struct rcu_ctrlblk *rcp)
  83{
  84        int cpu;
  85        cpumask_t cpumask;
  86        set_need_resched();
  87        if (unlikely(!rcp->signaled)) {
  88                rcp->signaled = 1;
  89                /*
  90                 * Don't send IPI to itself. With irqs disabled,
  91                 * rdp->cpu is the current cpu.
  92                 */
  93                cpumask = rcp->cpumask;
  94                cpu_clear(rdp->cpu, cpumask);
  95                for_each_cpu_mask(cpu, cpumask)
  96                        smp_send_reschedule(cpu);
  97        }
  98}
  99#else
 100static inline void force_quiescent_state(struct rcu_data *rdp,
 101                        struct rcu_ctrlblk *rcp)
 102{
 103        set_need_resched();
 104}
 105#endif
 106
 107/**
 108 * call_rcu - Queue an RCU callback for invocation after a grace period.
 109 * @head: structure to be used for queueing the RCU updates.
 110 * @func: actual update function to be invoked after the grace period
 111 *
 112 * The update function will be invoked some time after a full grace
 113 * period elapses, in other words after all currently executing RCU
 114 * read-side critical sections have completed.  RCU read-side critical
 115 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 116 * and may be nested.
 117 */
 118void call_rcu(struct rcu_head *head,
 119                                void (*func)(struct rcu_head *rcu))
 120{
 121        unsigned long flags;
 122        struct rcu_data *rdp;
 123
 124        head->func = func;
 125        head->next = NULL;
 126        local_irq_save(flags);
 127        rdp = &__get_cpu_var(rcu_data);
 128        *rdp->nxttail = head;
 129        rdp->nxttail = &head->next;
 130        if (unlikely(++rdp->qlen > qhimark)) {
 131                rdp->blimit = INT_MAX;
 132                force_quiescent_state(rdp, &rcu_ctrlblk);
 133        }
 134        local_irq_restore(flags);
 135}
 136EXPORT_SYMBOL_GPL(call_rcu);
 137
 138/**
 139 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
 140 * @head: structure to be used for queueing the RCU updates.
 141 * @func: actual update function to be invoked after the grace period
 142 *
 143 * The update function will be invoked some time after a full grace
 144 * period elapses, in other words after all currently executing RCU
 145 * read-side critical sections have completed. call_rcu_bh() assumes
 146 * that the read-side critical sections end on completion of a softirq
 147 * handler. This means that read-side critical sections in process
 148 * context must not be interrupted by softirqs. This interface is to be
 149 * used when most of the read-side critical sections are in softirq context.
 150 * RCU read-side critical sections are delimited by rcu_read_lock() and
 151 * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
 152 * and rcu_read_unlock_bh(), if in process context. These may be nested.
 153 */
 154void call_rcu_bh(struct rcu_head *head,
 155                                void (*func)(struct rcu_head *rcu))
 156{
 157        unsigned long flags;
 158        struct rcu_data *rdp;
 159
 160        head->func = func;
 161        head->next = NULL;
 162        local_irq_save(flags);
 163        rdp = &__get_cpu_var(rcu_bh_data);
 164        *rdp->nxttail = head;
 165        rdp->nxttail = &head->next;
 166
 167        if (unlikely(++rdp->qlen > qhimark)) {
 168                rdp->blimit = INT_MAX;
 169                force_quiescent_state(rdp, &rcu_bh_ctrlblk);
 170        }
 171
 172        local_irq_restore(flags);
 173}
 174EXPORT_SYMBOL_GPL(call_rcu_bh);
 175
 176/*
 177 * Return the number of RCU batches processed thus far.  Useful
 178 * for debug and statistics.
 179 */
 180long rcu_batches_completed(void)
 181{
 182        return rcu_ctrlblk.completed;
 183}
 184EXPORT_SYMBOL_GPL(rcu_batches_completed);
 185
 186/*
 187 * Return the number of RCU batches processed thus far.  Useful
 188 * for debug and statistics.
 189 */
 190long rcu_batches_completed_bh(void)
 191{
 192        return rcu_bh_ctrlblk.completed;
 193}
 194EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 195
 196/* Raises the softirq for processing rcu_callbacks. */
 197static inline void raise_rcu_softirq(void)
 198{
 199        raise_softirq(RCU_SOFTIRQ);
 200        /*
 201         * The smp_mb() here is required to ensure that this cpu's
 202         * __rcu_process_callbacks() reads the most recently updated
 203         * value of rcu->cur.
 204         */
 205        smp_mb();
 206}
 207
 208/*
 209 * Invoke the completed RCU callbacks. They are expected to be in
 210 * a per-cpu list.
 211 */
 212static void rcu_do_batch(struct rcu_data *rdp)
 213{
 214        struct rcu_head *next, *list;
 215        int count = 0;
 216
 217        list = rdp->donelist;
 218        while (list) {
 219                next = list->next;
 220                prefetch(next);
 221                list->func(list);
 222                list = next;
 223                if (++count >= rdp->blimit)
 224                        break;
 225        }
 226        rdp->donelist = list;
 227
 228        local_irq_disable();
 229        rdp->qlen -= count;
 230        local_irq_enable();
 231        if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
 232                rdp->blimit = blimit;
 233
 234        if (!rdp->donelist)
 235                rdp->donetail = &rdp->donelist;
 236        else
 237                raise_rcu_softirq();
 238}
 239
 240/*
 241 * Grace period handling:
 242 * The grace period handling consists out of two steps:
 243 * - A new grace period is started.
 244 *   This is done by rcu_start_batch. The start is not broadcasted to
 245 *   all cpus, they must pick this up by comparing rcp->cur with
 246 *   rdp->quiescbatch. All cpus are recorded  in the
 247 *   rcu_ctrlblk.cpumask bitmap.
 248 * - All cpus must go through a quiescent state.
 249 *   Since the start of the grace period is not broadcasted, at least two
 250 *   calls to rcu_check_quiescent_state are required:
 251 *   The first call just notices that a new grace period is running. The
 252 *   following calls check if there was a quiescent state since the beginning
 253 *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
 254 *   the bitmap is empty, then the grace period is completed.
 255 *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
 256 *   period (if necessary).
 257 */
 258/*
 259 * Register a new batch of callbacks, and start it up if there is currently no
 260 * active batch and the batch to be registered has not already occurred.
 261 * Caller must hold rcu_ctrlblk.lock.
 262 */
 263static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 264{
 265        if (rcp->next_pending &&
 266                        rcp->completed == rcp->cur) {
 267                rcp->next_pending = 0;
 268                /*
 269                 * next_pending == 0 must be visible in
 270                 * __rcu_process_callbacks() before it can see new value of cur.
 271                 */
 272                smp_wmb();
 273                rcp->cur++;
 274
 275                /*
 276                 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
 277                 * Barrier  Otherwise it can cause tickless idle CPUs to be
 278                 * included in rcp->cpumask, which will extend graceperiods
 279                 * unnecessarily.
 280                 */
 281                smp_mb();
 282                cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
 283
 284                rcp->signaled = 0;
 285        }
 286}
 287
 288/*
 289 * cpu went through a quiescent state since the beginning of the grace period.
 290 * Clear it from the cpu mask and complete the grace period if it was the last
 291 * cpu. Start another grace period if someone has further entries pending
 292 */
 293static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 294{
 295        cpu_clear(cpu, rcp->cpumask);
 296        if (cpus_empty(rcp->cpumask)) {
 297                /* batch completed ! */
 298                rcp->completed = rcp->cur;
 299                rcu_start_batch(rcp);
 300        }
 301}
 302
 303/*
 304 * Check if the cpu has gone through a quiescent state (say context
 305 * switch). If so and if it already hasn't done so in this RCU
 306 * quiescent cycle, then indicate that it has done so.
 307 */
 308static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
 309                                        struct rcu_data *rdp)
 310{
 311        if (rdp->quiescbatch != rcp->cur) {
 312                /* start new grace period: */
 313                rdp->qs_pending = 1;
 314                rdp->passed_quiesc = 0;
 315                rdp->quiescbatch = rcp->cur;
 316                return;
 317        }
 318
 319        /* Grace period already completed for this cpu?
 320         * qs_pending is checked instead of the actual bitmap to avoid
 321         * cacheline trashing.
 322         */
 323        if (!rdp->qs_pending)
 324                return;
 325
 326        /*
 327         * Was there a quiescent state since the beginning of the grace
 328         * period? If no, then exit and wait for the next call.
 329         */
 330        if (!rdp->passed_quiesc)
 331                return;
 332        rdp->qs_pending = 0;
 333
 334        spin_lock(&rcp->lock);
 335        /*
 336         * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
 337         * during cpu startup. Ignore the quiescent state.
 338         */
 339        if (likely(rdp->quiescbatch == rcp->cur))
 340                cpu_quiet(rdp->cpu, rcp);
 341
 342        spin_unlock(&rcp->lock);
 343}
 344
 345
 346#ifdef CONFIG_HOTPLUG_CPU
 347
 348/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
 349 * locking requirements, the list it's pulling from has to belong to a cpu
 350 * which is dead and hence not processing interrupts.
 351 */
 352static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
 353                                struct rcu_head **tail)
 354{
 355        local_irq_disable();
 356        *this_rdp->nxttail = list;
 357        if (list)
 358                this_rdp->nxttail = tail;
 359        local_irq_enable();
 360}
 361
 362static void __rcu_offline_cpu(struct rcu_data *this_rdp,
 363                                struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 364{
 365        /* if the cpu going offline owns the grace period
 366         * we can block indefinitely waiting for it, so flush
 367         * it here
 368         */
 369        spin_lock_bh(&rcp->lock);
 370        if (rcp->cur != rcp->completed)
 371                cpu_quiet(rdp->cpu, rcp);
 372        spin_unlock_bh(&rcp->lock);
 373        rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
 374        rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
 375        rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
 376}
 377
 378static void rcu_offline_cpu(int cpu)
 379{
 380        struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
 381        struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
 382
 383        __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
 384                                        &per_cpu(rcu_data, cpu));
 385        __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
 386                                        &per_cpu(rcu_bh_data, cpu));
 387        put_cpu_var(rcu_data);
 388        put_cpu_var(rcu_bh_data);
 389}
 390
 391#else
 392
 393static void rcu_offline_cpu(int cpu)
 394{
 395}
 396
 397#endif
 398
 399/*
 400 * This does the RCU processing work from softirq context.
 401 */
 402static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
 403                                        struct rcu_data *rdp)
 404{
 405        if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
 406                *rdp->donetail = rdp->curlist;
 407                rdp->donetail = rdp->curtail;
 408                rdp->curlist = NULL;
 409                rdp->curtail = &rdp->curlist;
 410        }
 411
 412        if (rdp->nxtlist && !rdp->curlist) {
 413                local_irq_disable();
 414                rdp->curlist = rdp->nxtlist;
 415                rdp->curtail = rdp->nxttail;
 416                rdp->nxtlist = NULL;
 417                rdp->nxttail = &rdp->nxtlist;
 418                local_irq_enable();
 419
 420                /*
 421                 * start the next batch of callbacks
 422                 */
 423
 424                /* determine batch number */
 425                rdp->batch = rcp->cur + 1;
 426                /* see the comment and corresponding wmb() in
 427                 * the rcu_start_batch()
 428                 */
 429                smp_rmb();
 430
 431                if (!rcp->next_pending) {
 432                        /* and start it/schedule start if it's a new batch */
 433                        spin_lock(&rcp->lock);
 434                        rcp->next_pending = 1;
 435                        rcu_start_batch(rcp);
 436                        spin_unlock(&rcp->lock);
 437                }
 438        }
 439
 440        rcu_check_quiescent_state(rcp, rdp);
 441        if (rdp->donelist)
 442                rcu_do_batch(rdp);
 443}
 444
 445static void rcu_process_callbacks(struct softirq_action *unused)
 446{
 447        __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
 448        __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
 449}
 450
 451static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 452{
 453        /* This cpu has pending rcu entries and the grace period
 454         * for them has completed.
 455         */
 456        if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
 457                return 1;
 458
 459        /* This cpu has no pending entries, but there are new entries */
 460        if (!rdp->curlist && rdp->nxtlist)
 461                return 1;
 462
 463        /* This cpu has finished callbacks to invoke */
 464        if (rdp->donelist)
 465                return 1;
 466
 467        /* The rcu core waits for a quiescent state from the cpu */
 468        if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
 469                return 1;
 470
 471        /* nothing to do */
 472        return 0;
 473}
 474
 475/*
 476 * Check to see if there is any immediate RCU-related work to be done
 477 * by the current CPU, returning 1 if so.  This function is part of the
 478 * RCU implementation; it is -not- an exported member of the RCU API.
 479 */
 480int rcu_pending(int cpu)
 481{
 482        return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
 483                __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
 484}
 485
 486/*
 487 * Check to see if any future RCU-related work will need to be done
 488 * by the current CPU, even if none need be done immediately, returning
 489 * 1 if so.  This function is part of the RCU implementation; it is -not-
 490 * an exported member of the RCU API.
 491 */
 492int rcu_needs_cpu(int cpu)
 493{
 494        struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
 495        struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
 496
 497        return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
 498}
 499
 500void rcu_check_callbacks(int cpu, int user)
 501{
 502        if (user ||
 503            (idle_cpu(cpu) && !in_softirq() &&
 504                                hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
 505                rcu_qsctr_inc(cpu);
 506                rcu_bh_qsctr_inc(cpu);
 507        } else if (!in_softirq())
 508                rcu_bh_qsctr_inc(cpu);
 509        raise_rcu_softirq();
 510}
 511
 512static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
 513                                                struct rcu_data *rdp)
 514{
 515        memset(rdp, 0, sizeof(*rdp));
 516        rdp->curtail = &rdp->curlist;
 517        rdp->nxttail = &rdp->nxtlist;
 518        rdp->donetail = &rdp->donelist;
 519        rdp->quiescbatch = rcp->completed;
 520        rdp->qs_pending = 0;
 521        rdp->cpu = cpu;
 522        rdp->blimit = blimit;
 523}
 524
 525static void __cpuinit rcu_online_cpu(int cpu)
 526{
 527        struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
 528        struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
 529
 530        rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
 531        rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
 532        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
 533}
 534
 535static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 536                                unsigned long action, void *hcpu)
 537{
 538        long cpu = (long)hcpu;
 539
 540        switch (action) {
 541        case CPU_UP_PREPARE:
 542        case CPU_UP_PREPARE_FROZEN:
 543                rcu_online_cpu(cpu);
 544                break;
 545        case CPU_DEAD:
 546        case CPU_DEAD_FROZEN:
 547                rcu_offline_cpu(cpu);
 548                break;
 549        default:
 550                break;
 551        }
 552        return NOTIFY_OK;
 553}
 554
 555static struct notifier_block __cpuinitdata rcu_nb = {
 556        .notifier_call  = rcu_cpu_notify,
 557};
 558
 559/*
 560 * Initializes rcu mechanism.  Assumed to be called early.
 561 * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
 562 * Note that rcu_qsctr and friends are implicitly
 563 * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
 564 */
 565void __init __rcu_init(void)
 566{
 567        rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
 568                        (void *)(long)smp_processor_id());
 569        /* Register notifier for non-boot CPUs */
 570        register_cpu_notifier(&rcu_nb);
 571}
 572
 573module_param(blimit, int, 0);
 574module_param(qhimark, int, 0);
 575module_param(qlowmark, int, 0);
 576
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.