linux/kernel/rcupreempt.c
<<
>>
Prefs
   1/*
   2 * Read-Copy Update mechanism for mutual exclusion, realtime implementation
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 *
  18 * Copyright IBM Corporation, 2006
  19 *
  20 * Authors: Paul E. McKenney <paulmck@us.ibm.com>
  21 *              With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
  22 *              for pushing me away from locks and towards counters, and
  23 *              to Suparna Bhattacharya for pushing me completely away
  24 *              from atomic instructions on the read side.
  25 *
  26 *  - Added handling of Dynamic Ticks
  27 *      Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
  28 *                     - Steven Rostedt <srostedt@redhat.com>
  29 *
  30 * Papers:  http://www.rdrop.com/users/paulmck/RCU
  31 *
  32 * Design Document: http://lwn.net/Articles/253651/
  33 *
  34 * For detailed explanation of Read-Copy Update mechanism see -
  35 *              Documentation/RCU/ *.txt
  36 *
  37 */
  38#include <linux/types.h>
  39#include <linux/kernel.h>
  40#include <linux/init.h>
  41#include <linux/spinlock.h>
  42#include <linux/smp.h>
  43#include <linux/rcupdate.h>
  44#include <linux/interrupt.h>
  45#include <linux/sched.h>
  46#include <asm/atomic.h>
  47#include <linux/bitops.h>
  48#include <linux/module.h>
  49#include <linux/completion.h>
  50#include <linux/moduleparam.h>
  51#include <linux/percpu.h>
  52#include <linux/notifier.h>
  53#include <linux/rcupdate.h>
  54#include <linux/cpu.h>
  55#include <linux/random.h>
  56#include <linux/delay.h>
  57#include <linux/byteorder/swabb.h>
  58#include <linux/cpumask.h>
  59#include <linux/rcupreempt_trace.h>
  60
  61/*
  62 * Macro that prevents the compiler from reordering accesses, but does
  63 * absolutely -nothing- to prevent CPUs from reordering.  This is used
  64 * only to mediate communication between mainline code and hardware
  65 * interrupt and NMI handlers.
  66 */
  67#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  68
  69/*
  70 * PREEMPT_RCU data structures.
  71 */
  72
  73/*
  74 * GP_STAGES specifies the number of times the state machine has
  75 * to go through the all the rcu_try_flip_states (see below)
  76 * in a single Grace Period.
  77 *
  78 * GP in GP_STAGES stands for Grace Period ;)
  79 */
  80#define GP_STAGES    2
  81struct rcu_data {
  82        spinlock_t      lock;           /* Protect rcu_data fields. */
  83        long            completed;      /* Number of last completed batch. */
  84        int             waitlistcount;
  85        struct tasklet_struct rcu_tasklet;
  86        struct rcu_head *nextlist;
  87        struct rcu_head **nexttail;
  88        struct rcu_head *waitlist[GP_STAGES];
  89        struct rcu_head **waittail[GP_STAGES];
  90        struct rcu_head *donelist;
  91        struct rcu_head **donetail;
  92        long rcu_flipctr[2];
  93#ifdef CONFIG_RCU_TRACE
  94        struct rcupreempt_trace trace;
  95#endif /* #ifdef CONFIG_RCU_TRACE */
  96};
  97
  98/*
  99 * States for rcu_try_flip() and friends.
 100 */
 101
 102enum rcu_try_flip_states {
 103
 104        /*
 105         * Stay here if nothing is happening. Flip the counter if somthing
 106         * starts happening. Denoted by "I"
 107         */
 108        rcu_try_flip_idle_state,
 109
 110        /*
 111         * Wait here for all CPUs to notice that the counter has flipped. This
 112         * prevents the old set of counters from ever being incremented once
 113         * we leave this state, which in turn is necessary because we cannot
 114         * test any individual counter for zero -- we can only check the sum.
 115         * Denoted by "A".
 116         */
 117        rcu_try_flip_waitack_state,
 118
 119        /*
 120         * Wait here for the sum of the old per-CPU counters to reach zero.
 121         * Denoted by "Z".
 122         */
 123        rcu_try_flip_waitzero_state,
 124
 125        /*
 126         * Wait here for each of the other CPUs to execute a memory barrier.
 127         * This is necessary to ensure that these other CPUs really have
 128         * completed executing their RCU read-side critical sections, despite
 129         * their CPUs wildly reordering memory. Denoted by "M".
 130         */
 131        rcu_try_flip_waitmb_state,
 132};
 133
 134struct rcu_ctrlblk {
 135        spinlock_t      fliplock;       /* Protect state-machine transitions. */
 136        long            completed;      /* Number of last completed batch. */
 137        enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
 138                                                        the rcu state machine */
 139};
 140
 141static DEFINE_PER_CPU(struct rcu_data, rcu_data);
 142static struct rcu_ctrlblk rcu_ctrlblk = {
 143        .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
 144        .completed = 0,
 145        .rcu_try_flip_state = rcu_try_flip_idle_state,
 146};
 147
 148
 149#ifdef CONFIG_RCU_TRACE
 150static char *rcu_try_flip_state_names[] =
 151        { "idle", "waitack", "waitzero", "waitmb" };
 152#endif /* #ifdef CONFIG_RCU_TRACE */
 153
 154static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
 155
 156/*
 157 * Enum and per-CPU flag to determine when each CPU has seen
 158 * the most recent counter flip.
 159 */
 160
 161enum rcu_flip_flag_values {
 162        rcu_flip_seen,          /* Steady/initial state, last flip seen. */
 163                                /* Only GP detector can update. */
 164        rcu_flipped             /* Flip just completed, need confirmation. */
 165                                /* Only corresponding CPU can update. */
 166};
 167static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
 168                                                                = rcu_flip_seen;
 169
 170/*
 171 * Enum and per-CPU flag to determine when each CPU has executed the
 172 * needed memory barrier to fence in memory references from its last RCU
 173 * read-side critical section in the just-completed grace period.
 174 */
 175
 176enum rcu_mb_flag_values {
 177        rcu_mb_done,            /* Steady/initial state, no mb()s required. */
 178                                /* Only GP detector can update. */
 179        rcu_mb_needed           /* Flip just completed, need an mb(). */
 180                                /* Only corresponding CPU can update. */
 181};
 182static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
 183                                                                = rcu_mb_done;
 184
 185/*
 186 * RCU_DATA_ME: find the current CPU's rcu_data structure.
 187 * RCU_DATA_CPU: find the specified CPU's rcu_data structure.
 188 */
 189#define RCU_DATA_ME()           (&__get_cpu_var(rcu_data))
 190#define RCU_DATA_CPU(cpu)       (&per_cpu(rcu_data, cpu))
 191
 192/*
 193 * Helper macro for tracing when the appropriate rcu_data is not
 194 * cached in a local variable, but where the CPU number is so cached.
 195 */
 196#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
 197
 198/*
 199 * Helper macro for tracing when the appropriate rcu_data is not
 200 * cached in a local variable.
 201 */
 202#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
 203
 204/*
 205 * Helper macro for tracing when the appropriate rcu_data is pointed
 206 * to by a local variable.
 207 */
 208#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
 209
 210/*
 211 * Return the number of RCU batches processed thus far.  Useful
 212 * for debug and statistics.
 213 */
 214long rcu_batches_completed(void)
 215{
 216        return rcu_ctrlblk.completed;
 217}
 218EXPORT_SYMBOL_GPL(rcu_batches_completed);
 219
 220void __rcu_read_lock(void)
 221{
 222        int idx;
 223        struct task_struct *t = current;
 224        int nesting;
 225
 226        nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
 227        if (nesting != 0) {
 228
 229                /* An earlier rcu_read_lock() covers us, just count it. */
 230
 231                t->rcu_read_lock_nesting = nesting + 1;
 232
 233        } else {
 234                unsigned long flags;
 235
 236                /*
 237                 * We disable interrupts for the following reasons:
 238                 * - If we get scheduling clock interrupt here, and we
 239                 *   end up acking the counter flip, it's like a promise
 240                 *   that we will never increment the old counter again.
 241                 *   Thus we will break that promise if that
 242                 *   scheduling clock interrupt happens between the time
 243                 *   we pick the .completed field and the time that we
 244                 *   increment our counter.
 245                 *
 246                 * - We don't want to be preempted out here.
 247                 *
 248                 * NMIs can still occur, of course, and might themselves
 249                 * contain rcu_read_lock().
 250                 */
 251
 252                local_irq_save(flags);
 253
 254                /*
 255                 * Outermost nesting of rcu_read_lock(), so increment
 256                 * the current counter for the current CPU.  Use volatile
 257                 * casts to prevent the compiler from reordering.
 258                 */
 259
 260                idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
 261                ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
 262
 263                /*
 264                 * Now that the per-CPU counter has been incremented, we
 265                 * are protected from races with rcu_read_lock() invoked
 266                 * from NMI handlers on this CPU.  We can therefore safely
 267                 * increment the nesting counter, relieving further NMIs
 268                 * of the need to increment the per-CPU counter.
 269                 */
 270
 271                ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
 272
 273                /*
 274                 * Now that we have preventing any NMIs from storing
 275                 * to the ->rcu_flipctr_idx, we can safely use it to
 276                 * remember which counter to decrement in the matching
 277                 * rcu_read_unlock().
 278                 */
 279
 280                ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
 281                local_irq_restore(flags);
 282        }
 283}
 284EXPORT_SYMBOL_GPL(__rcu_read_lock);
 285
 286void __rcu_read_unlock(void)
 287{
 288        int idx;
 289        struct task_struct *t = current;
 290        int nesting;
 291
 292        nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
 293        if (nesting > 1) {
 294
 295                /*
 296                 * We are still protected by the enclosing rcu_read_lock(),
 297                 * so simply decrement the counter.
 298                 */
 299
 300                t->rcu_read_lock_nesting = nesting - 1;
 301
 302        } else {
 303                unsigned long flags;
 304
 305                /*
 306                 * Disable local interrupts to prevent the grace-period
 307                 * detection state machine from seeing us half-done.
 308                 * NMIs can still occur, of course, and might themselves
 309                 * contain rcu_read_lock() and rcu_read_unlock().
 310                 */
 311
 312                local_irq_save(flags);
 313
 314                /*
 315                 * Outermost nesting of rcu_read_unlock(), so we must
 316                 * decrement the current counter for the current CPU.
 317                 * This must be done carefully, because NMIs can
 318                 * occur at any point in this code, and any rcu_read_lock()
 319                 * and rcu_read_unlock() pairs in the NMI handlers
 320                 * must interact non-destructively with this code.
 321                 * Lots of volatile casts, and -very- careful ordering.
 322                 *
 323                 * Changes to this code, including this one, must be
 324                 * inspected, validated, and tested extremely carefully!!!
 325                 */
 326
 327                /*
 328                 * First, pick up the index.
 329                 */
 330
 331                idx = ACCESS_ONCE(t->rcu_flipctr_idx);
 332
 333                /*
 334                 * Now that we have fetched the counter index, it is
 335                 * safe to decrement the per-task RCU nesting counter.
 336                 * After this, any interrupts or NMIs will increment and
 337                 * decrement the per-CPU counters.
 338                 */
 339                ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
 340
 341                /*
 342                 * It is now safe to decrement this task's nesting count.
 343                 * NMIs that occur after this statement will route their
 344                 * rcu_read_lock() calls through this "else" clause, and
 345                 * will thus start incrementing the per-CPU counter on
 346                 * their own.  They will also clobber ->rcu_flipctr_idx,
 347                 * but that is OK, since we have already fetched it.
 348                 */
 349
 350                ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
 351                local_irq_restore(flags);
 352        }
 353}
 354EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 355
 356/*
 357 * If a global counter flip has occurred since the last time that we
 358 * advanced callbacks, advance them.  Hardware interrupts must be
 359 * disabled when calling this function.
 360 */
 361static void __rcu_advance_callbacks(struct rcu_data *rdp)
 362{
 363        int cpu;
 364        int i;
 365        int wlc = 0;
 366
 367        if (rdp->completed != rcu_ctrlblk.completed) {
 368                if (rdp->waitlist[GP_STAGES - 1] != NULL) {
 369                        *rdp->donetail = rdp->waitlist[GP_STAGES - 1];
 370                        rdp->donetail = rdp->waittail[GP_STAGES - 1];
 371                        RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
 372                }
 373                for (i = GP_STAGES - 2; i >= 0; i--) {
 374                        if (rdp->waitlist[i] != NULL) {
 375                                rdp->waitlist[i + 1] = rdp->waitlist[i];
 376                                rdp->waittail[i + 1] = rdp->waittail[i];
 377                                wlc++;
 378                        } else {
 379                                rdp->waitlist[i + 1] = NULL;
 380                                rdp->waittail[i + 1] =
 381                                        &rdp->waitlist[i + 1];
 382                        }
 383                }
 384                if (rdp->nextlist != NULL) {
 385                        rdp->waitlist[0] = rdp->nextlist;
 386                        rdp->waittail[0] = rdp->nexttail;
 387                        wlc++;
 388                        rdp->nextlist = NULL;
 389                        rdp->nexttail = &rdp->nextlist;
 390                        RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
 391                } else {
 392                        rdp->waitlist[0] = NULL;
 393                        rdp->waittail[0] = &rdp->waitlist[0];
 394                }
 395                rdp->waitlistcount = wlc;
 396                rdp->completed = rcu_ctrlblk.completed;
 397        }
 398
 399        /*
 400         * Check to see if this CPU needs to report that it has seen
 401         * the most recent counter flip, thereby declaring that all
 402         * subsequent rcu_read_lock() invocations will respect this flip.
 403         */
 404
 405        cpu = raw_smp_processor_id();
 406        if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
 407                smp_mb();  /* Subsequent counter accesses must see new value */
 408                per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
 409                smp_mb();  /* Subsequent RCU read-side critical sections */
 410                           /*  seen -after- acknowledgement. */
 411        }
 412}
 413
 414#ifdef CONFIG_NO_HZ
 415
 416DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
 417static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
 418static DEFINE_PER_CPU(int, rcu_update_flag);
 419
 420/**
 421 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
 422 *
 423 * If the CPU was idle with dynamic ticks active, this updates the
 424 * dynticks_progress_counter to let the RCU handling know that the
 425 * CPU is active.
 426 */
 427void rcu_irq_enter(void)
 428{
 429        int cpu = smp_processor_id();
 430
 431        if (per_cpu(rcu_update_flag, cpu))
 432                per_cpu(rcu_update_flag, cpu)++;
 433
 434        /*
 435         * Only update if we are coming from a stopped ticks mode
 436         * (dynticks_progress_counter is even).
 437         */
 438        if (!in_interrupt() &&
 439            (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
 440                /*
 441                 * The following might seem like we could have a race
 442                 * with NMI/SMIs. But this really isn't a problem.
 443                 * Here we do a read/modify/write, and the race happens
 444                 * when an NMI/SMI comes in after the read and before
 445                 * the write. But NMI/SMIs will increment this counter
 446                 * twice before returning, so the zero bit will not
 447                 * be corrupted by the NMI/SMI which is the most important
 448                 * part.
 449                 *
 450                 * The only thing is that we would bring back the counter
 451                 * to a postion that it was in during the NMI/SMI.
 452                 * But the zero bit would be set, so the rest of the
 453                 * counter would again be ignored.
 454                 *
 455                 * On return from the IRQ, the counter may have the zero
 456                 * bit be 0 and the counter the same as the return from
 457                 * the NMI/SMI. If the state machine was so unlucky to
 458                 * see that, it still doesn't matter, since all
 459                 * RCU read-side critical sections on this CPU would
 460                 * have already completed.
 461                 */
 462                per_cpu(dynticks_progress_counter, cpu)++;
 463                /*
 464                 * The following memory barrier ensures that any
 465                 * rcu_read_lock() primitives in the irq handler
 466                 * are seen by other CPUs to follow the above
 467                 * increment to dynticks_progress_counter. This is
 468                 * required in order for other CPUs to correctly
 469                 * determine when it is safe to advance the RCU
 470                 * grace-period state machine.
 471                 */
 472                smp_mb(); /* see above block comment. */
 473                /*
 474                 * Since we can't determine the dynamic tick mode from
 475                 * the dynticks_progress_counter after this routine,
 476                 * we use a second flag to acknowledge that we came
 477                 * from an idle state with ticks stopped.
 478                 */
 479                per_cpu(rcu_update_flag, cpu)++;
 480                /*
 481                 * If we take an NMI/SMI now, they will also increment
 482                 * the rcu_update_flag, and will not update the
 483                 * dynticks_progress_counter on exit. That is for
 484                 * this IRQ to do.
 485                 */
 486        }
 487}
 488
 489/**
 490 * rcu_irq_exit - Called from exiting Hard irq context.
 491 *
 492 * If the CPU was idle with dynamic ticks active, update the
 493 * dynticks_progress_counter to put let the RCU handling be
 494 * aware that the CPU is going back to idle with no ticks.
 495 */
 496void rcu_irq_exit(void)
 497{
 498        int cpu = smp_processor_id();
 499
 500        /*
 501         * rcu_update_flag is set if we interrupted the CPU
 502         * when it was idle with ticks stopped.
 503         * Once this occurs, we keep track of interrupt nesting
 504         * because a NMI/SMI could also come in, and we still
 505         * only want the IRQ that started the increment of the
 506         * dynticks_progress_counter to be the one that modifies
 507         * it on exit.
 508         */
 509        if (per_cpu(rcu_update_flag, cpu)) {
 510                if (--per_cpu(rcu_update_flag, cpu))
 511                        return;
 512
 513                /* This must match the interrupt nesting */
 514                WARN_ON(in_interrupt());
 515
 516                /*
 517                 * If an NMI/SMI happens now we are still
 518                 * protected by the dynticks_progress_counter being odd.
 519                 */
 520
 521                /*
 522                 * The following memory barrier ensures that any
 523                 * rcu_read_unlock() primitives in the irq handler
 524                 * are seen by other CPUs to preceed the following
 525                 * increment to dynticks_progress_counter. This
 526                 * is required in order for other CPUs to determine
 527                 * when it is safe to advance the RCU grace-period
 528                 * state machine.
 529                 */
 530                smp_mb(); /* see above block comment. */
 531                per_cpu(dynticks_progress_counter, cpu)++;
 532                WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
 533        }
 534}
 535
 536static void dyntick_save_progress_counter(int cpu)
 537{
 538        per_cpu(rcu_dyntick_snapshot, cpu) =
 539                per_cpu(dynticks_progress_counter, cpu);
 540}
 541
 542static inline int
 543rcu_try_flip_waitack_needed(int cpu)
 544{
 545        long curr;
 546        long snap;
 547
 548        curr = per_cpu(dynticks_progress_counter, cpu);
 549        snap = per_cpu(rcu_dyntick_snapshot, cpu);
 550        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
 551
 552        /*
 553         * If the CPU remained in dynticks mode for the entire time
 554         * and didn't take any interrupts, NMIs, SMIs, or whatever,
 555         * then it cannot be in the middle of an rcu_read_lock(), so
 556         * the next rcu_read_lock() it executes must use the new value
 557         * of the counter.  So we can safely pretend that this CPU
 558         * already acknowledged the counter.
 559         */
 560
 561        if ((curr == snap) && ((curr & 0x1) == 0))
 562                return 0;
 563
 564        /*
 565         * If the CPU passed through or entered a dynticks idle phase with
 566         * no active irq handlers, then, as above, we can safely pretend
 567         * that this CPU already acknowledged the counter.
 568         */
 569
 570        if ((curr - snap) > 2 || (curr & 0x1) == 0)
 571                return 0;
 572
 573        /* We need this CPU to explicitly acknowledge the counter flip. */
 574
 575        return 1;
 576}
 577
 578static inline int
 579rcu_try_flip_waitmb_needed(int cpu)
 580{
 581        long curr;
 582        long snap;
 583
 584        curr = per_cpu(dynticks_progress_counter, cpu);
 585        snap = per_cpu(rcu_dyntick_snapshot, cpu);
 586        smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
 587
 588        /*
 589         * If the CPU remained in dynticks mode for the entire time
 590         * and didn't take any interrupts, NMIs, SMIs, or whatever,
 591         * then it cannot have executed an RCU read-side critical section
 592         * during that time, so there is no need for it to execute a
 593         * memory barrier.
 594         */
 595
 596        if ((curr == snap) && ((curr & 0x1) == 0))
 597                return 0;
 598
 599        /*
 600         * If the CPU either entered or exited an outermost interrupt,
 601         * SMI, NMI, or whatever handler, then we know that it executed
 602         * a memory barrier when doing so.  So we don't need another one.
 603         */
 604        if (curr != snap)
 605                return 0;
 606
 607        /* We need the CPU to execute a memory barrier. */
 608
 609        return 1;
 610}
 611
 612#else /* !CONFIG_NO_HZ */
 613
 614# define dyntick_save_progress_counter(cpu)     do { } while (0)
 615# define rcu_try_flip_waitack_needed(cpu)       (1)
 616# define rcu_try_flip_waitmb_needed(cpu)        (1)
 617
 618#endif /* CONFIG_NO_HZ */
 619
 620/*
 621 * Get here when RCU is idle.  Decide whether we need to
 622 * move out of idle state, and return non-zero if so.
 623 * "Straightforward" approach for the moment, might later
 624 * use callback-list lengths, grace-period duration, or
 625 * some such to determine when to exit idle state.
 626 * Might also need a pre-idle test that does not acquire
 627 * the lock, but let's get the simple case working first...
 628 */
 629
 630static int
 631rcu_try_flip_idle(void)
 632{
 633        int cpu;
 634
 635        RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
 636        if (!rcu_pending(smp_processor_id())) {
 637                RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
 638                return 0;
 639        }
 640
 641        /*
 642         * Do the flip.
 643         */
 644
 645        RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
 646        rcu_ctrlblk.completed++;  /* stands in for rcu_try_flip_g2 */
 647
 648        /*
 649         * Need a memory barrier so that other CPUs see the new
 650         * counter value before they see the subsequent change of all
 651         * the rcu_flip_flag instances to rcu_flipped.
 652         */
 653
 654        smp_mb();       /* see above block comment. */
 655
 656        /* Now ask each CPU for acknowledgement of the flip. */
 657
 658        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
 659                per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
 660                dyntick_save_progress_counter(cpu);
 661        }
 662
 663        return 1;
 664}
 665
 666/*
 667 * Wait for CPUs to acknowledge the flip.
 668 */
 669
 670static int
 671rcu_try_flip_waitack(void)
 672{
 673        int cpu;
 674
 675        RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
 676        for_each_cpu_mask(cpu, rcu_cpu_online_map)
 677                if (rcu_try_flip_waitack_needed(cpu) &&
 678                    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
 679                        RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
 680                        return 0;
 681                }
 682
 683        /*
 684         * Make sure our checks above don't bleed into subsequent
 685         * waiting for the sum of the counters to reach zero.
 686         */
 687
 688        smp_mb();       /* see above block comment. */
 689        RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
 690        return 1;
 691}
 692
 693/*
 694 * Wait for collective ``last'' counter to reach zero,
 695 * then tell all CPUs to do an end-of-grace-period memory barrier.
 696 */
 697
 698static int
 699rcu_try_flip_waitzero(void)
 700{
 701        int cpu;
 702        int lastidx = !(rcu_ctrlblk.completed & 0x1);
 703        int sum = 0;
 704
 705        /* Check to see if the sum of the "last" counters is zero. */
 706
 707        RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
 708        for_each_cpu_mask(cpu, rcu_cpu_online_map)
 709                sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
 710        if (sum != 0) {
 711                RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
 712                return 0;
 713        }
 714
 715        /*
 716         * This ensures that the other CPUs see the call for
 717         * memory barriers -after- the sum to zero has been
 718         * detected here
 719         */
 720        smp_mb();  /*  ^^^^^^^^^^^^ */
 721
 722        /* Call for a memory barrier from each CPU. */
 723        for_each_cpu_mask(cpu, rcu_cpu_online_map) {
 724                per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
 725                dyntick_save_progress_counter(cpu);
 726        }
 727
 728        RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
 729        return 1;
 730}
 731
 732/*
 733 * Wait for all CPUs to do their end-of-grace-period memory barrier.
 734 * Return 0 once all CPUs have done so.
 735 */
 736
 737static int
 738rcu_try_flip_waitmb(void)
 739{
 740        int cpu;
 741
 742        RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
 743        for_each_cpu_mask(cpu, rcu_cpu_online_map)
 744                if (rcu_try_flip_waitmb_needed(cpu) &&
 745                    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
 746                        RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
 747                        return 0;
 748                }
 749
 750        smp_mb(); /* Ensure that the above checks precede any following flip. */
 751        RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
 752        return 1;
 753}
 754
 755/*
 756 * Attempt a single flip of the counters.  Remember, a single flip does
 757 * -not- constitute a grace period.  Instead, the interval between
 758 * at least GP_STAGES consecutive flips is a grace period.
 759 *
 760 * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
 761 * on a large SMP, they might want to use a hierarchical organization of
 762 * the per-CPU-counter pairs.
 763 */
 764static void rcu_try_flip(void)
 765{
 766        unsigned long flags;
 767
 768        RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
 769        if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
 770                RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
 771                return;
 772        }
 773
 774        /*
 775         * Take the next transition(s) through the RCU grace-period
 776         * flip-counter state machine.
 777         */
 778
 779        switch (rcu_ctrlblk.rcu_try_flip_state) {
 780        case rcu_try_flip_idle_state:
 781                if (rcu_try_flip_idle())
 782                        rcu_ctrlblk.rcu_try_flip_state =
 783                                rcu_try_flip_waitack_state;
 784                break;
 785        case rcu_try_flip_waitack_state:
 786                if (rcu_try_flip_waitack())
 787                        rcu_ctrlblk.rcu_try_flip_state =
 788                                rcu_try_flip_waitzero_state;
 789                break;
 790        case rcu_try_flip_waitzero_state:
 791                if (rcu_try_flip_waitzero())
 792                        rcu_ctrlblk.rcu_try_flip_state =
 793                                rcu_try_flip_waitmb_state;
 794                break;
 795        case rcu_try_flip_waitmb_state:
 796                if (rcu_try_flip_waitmb())
 797                        rcu_ctrlblk.rcu_try_flip_state =
 798                                rcu_try_flip_idle_state;
 799        }
 800        spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
 801}
 802
 803/*
 804 * Check to see if this CPU needs to do a memory barrier in order to
 805 * ensure that any prior RCU read-side critical sections have committed
 806 * their counter manipulations and critical-section memory references
 807 * before declaring the grace period to be completed.
 808 */
 809static void rcu_check_mb(int cpu)
 810{
 811        if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
 812                smp_mb();  /* Ensure RCU read-side accesses are visible. */
 813                per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
 814        }
 815}
 816
 817void rcu_check_callbacks(int cpu, int user)
 818{
 819        unsigned long flags;
 820        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
 821
 822        rcu_check_mb(cpu);
 823        if (rcu_ctrlblk.completed == rdp->completed)
 824                rcu_try_flip();
 825        spin_lock_irqsave(&rdp->lock, flags);
 826        RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
 827        __rcu_advance_callbacks(rdp);
 828        if (rdp->donelist == NULL) {
 829                spin_unlock_irqrestore(&rdp->lock, flags);
 830        } else {
 831                spin_unlock_irqrestore(&rdp->lock, flags);
 832                raise_softirq(RCU_SOFTIRQ);
 833        }
 834}
 835
 836/*
 837 * Needed by dynticks, to make sure all RCU processing has finished
 838 * when we go idle:
 839 */
 840void rcu_advance_callbacks(int cpu, int user)
 841{
 842        unsigned long flags;
 843        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
 844
 845        if (rcu_ctrlblk.completed == rdp->completed) {
 846                rcu_try_flip();
 847                if (rcu_ctrlblk.completed == rdp->completed)
 848                        return;
 849        }
 850        spin_lock_irqsave(&rdp->lock, flags);
 851        RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
 852        __rcu_advance_callbacks(rdp);
 853        spin_unlock_irqrestore(&rdp->lock, flags);
 854}
 855
 856#ifdef CONFIG_HOTPLUG_CPU
 857#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
 858                *dsttail = srclist; \
 859                if (srclist != NULL) { \
 860                        dsttail = srctail; \
 861                        srclist = NULL; \
 862                        srctail = &srclist;\
 863                } \
 864        } while (0)
 865
 866void rcu_offline_cpu(int cpu)
 867{
 868        int i;
 869        struct rcu_head *list = NULL;
 870        unsigned long flags;
 871        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
 872        struct rcu_head **tail = &list;
 873
 874        /*
 875         * Remove all callbacks from the newly dead CPU, retaining order.
 876         * Otherwise rcu_barrier() will fail
 877         */
 878
 879        spin_lock_irqsave(&rdp->lock, flags);
 880        rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
 881        for (i = GP_STAGES - 1; i >= 0; i--)
 882                rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
 883                                                list, tail);
 884        rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
 885        spin_unlock_irqrestore(&rdp->lock, flags);
 886        rdp->waitlistcount = 0;
 887
 888        /* Disengage the newly dead CPU from the grace-period computation. */
 889
 890        spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
 891        rcu_check_mb(cpu);
 892        if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
 893                smp_mb();  /* Subsequent counter accesses must see new value */
 894                per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
 895                smp_mb();  /* Subsequent RCU read-side critical sections */
 896                           /*  seen -after- acknowledgement. */
 897        }
 898
 899        RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
 900        RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
 901
 902        RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
 903        RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
 904
 905        cpu_clear(cpu, rcu_cpu_online_map);
 906
 907        spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
 908
 909        /*
 910         * Place the removed callbacks on the current CPU's queue.
 911         * Make them all start a new grace period: simple approach,
 912         * in theory could starve a given set of callbacks, but
 913         * you would need to be doing some serious CPU hotplugging
 914         * to make this happen.  If this becomes a problem, adding
 915         * a synchronize_rcu() to the hotplug path would be a simple
 916         * fix.
 917         */
 918
 919        local_irq_save(flags);
 920        rdp = RCU_DATA_ME();
 921        spin_lock(&rdp->lock);
 922        *rdp->nexttail = list;
 923        if (list)
 924                rdp->nexttail = tail;
 925        spin_unlock_irqrestore(&rdp->lock, flags);
 926}
 927
 928#else /* #ifdef CONFIG_HOTPLUG_CPU */
 929
 930void rcu_offline_cpu(int cpu)
 931{
 932}
 933
 934#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
 935
 936void __cpuinit rcu_online_cpu(int cpu)
 937{
 938        unsigned long flags;
 939
 940        spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
 941        cpu_set(cpu, rcu_cpu_online_map);
 942        spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
 943}
 944
 945static void rcu_process_callbacks(struct softirq_action *unused)
 946{
 947        unsigned long flags;
 948        struct rcu_head *next, *list;
 949        struct rcu_data *rdp;
 950
 951        local_irq_save(flags);
 952        rdp = RCU_DATA_ME();
 953        spin_lock(&rdp->lock);
 954        list = rdp->donelist;
 955        if (list == NULL) {
 956                spin_unlock_irqrestore(&rdp->lock, flags);
 957                return;
 958        }
 959        rdp->donelist = NULL;
 960        rdp->donetail = &rdp->donelist;
 961        RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
 962        spin_unlock_irqrestore(&rdp->lock, flags);
 963        while (list) {
 964                next = list->next;
 965                list->func(list);
 966                list = next;
 967                RCU_TRACE_ME(rcupreempt_trace_invoke);
 968        }
 969}
 970
 971void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 972{
 973        unsigned long flags;
 974        struct rcu_data *rdp;
 975
 976        head->func = func;
 977        head->next = NULL;
 978        local_irq_save(flags);
 979        rdp = RCU_DATA_ME();
 980        spin_lock(&rdp->lock);
 981        __rcu_advance_callbacks(rdp);
 982        *rdp->nexttail = head;
 983        rdp->nexttail = &head->next;
 984        RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
 985        spin_unlock(&rdp->lock);
 986        local_irq_restore(flags);
 987}
 988EXPORT_SYMBOL_GPL(call_rcu);
 989
 990/*
 991 * Wait until all currently running preempt_disable() code segments
 992 * (including hardware-irq-disable segments) complete.  Note that
 993 * in -rt this does -not- necessarily result in all currently executing
 994 * interrupt -handlers- having completed.
 995 */
 996void __synchronize_sched(void)
 997{
 998        cpumask_t oldmask;
 999        int cpu;
1000
1001        if (sched_getaffinity(0, &oldmask) < 0)
1002                oldmask = cpu_possible_map;
1003        for_each_online_cpu(cpu) {
1004                sched_setaffinity(0, &cpumask_of_cpu(cpu));
1005                schedule();
1006        }
1007        sched_setaffinity(0, &oldmask);
1008}
1009EXPORT_SYMBOL_GPL(__synchronize_sched);
1010
1011/*
1012 * Check to see if any future RCU-related work will need to be done
1013 * by the current CPU, even if none need be done immediately, returning
1014 * 1 if so.  Assumes that notifiers would take care of handling any
1015 * outstanding requests from the RCU core.
1016 *
1017 * This function is part of the RCU implementation; it is -not-
1018 * an exported member of the RCU API.
1019 */
1020int rcu_needs_cpu(int cpu)
1021{
1022        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
1023
1024        return (rdp->donelist != NULL ||
1025                !!rdp->waitlistcount ||
1026                rdp->nextlist != NULL);
1027}
1028
1029int rcu_pending(int cpu)
1030{
1031        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
1032
1033        /* The CPU has at least one callback queued somewhere. */
1034
1035        if (rdp->donelist != NULL ||
1036            !!rdp->waitlistcount ||
1037            rdp->nextlist != NULL)
1038                return 1;
1039
1040        /* The RCU core needs an acknowledgement from this CPU. */
1041
1042        if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
1043            (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
1044                return 1;
1045
1046        /* This CPU has fallen behind the global grace-period number. */
1047
1048        if (rdp->completed != rcu_ctrlblk.completed)
1049                return 1;
1050
1051        /* Nothing needed from this CPU. */
1052
1053        return 0;
1054}
1055
1056static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1057                                unsigned long action, void *hcpu)
1058{
1059        long cpu = (long)hcpu;
1060
1061        switch (action) {
1062        case CPU_UP_PREPARE:
1063        case CPU_UP_PREPARE_FROZEN:
1064                rcu_online_cpu(cpu);
1065                break;
1066        case CPU_UP_CANCELED:
1067        case CPU_UP_CANCELED_FROZEN:
1068        case CPU_DEAD:
1069        case CPU_DEAD_FROZEN:
1070                rcu_offline_cpu(cpu);
1071                break;
1072        default:
1073                break;
1074        }
1075        return NOTIFY_OK;
1076}
1077
1078static struct notifier_block __cpuinitdata rcu_nb = {
1079        .notifier_call = rcu_cpu_notify,
1080};
1081
1082void __init __rcu_init(void)
1083{
1084        int cpu;
1085        int i;
1086        struct rcu_data *rdp;
1087
1088        printk(KERN_NOTICE "Preemptible RCU implementation.\n");
1089        for_each_possible_cpu(cpu) {
1090                rdp = RCU_DATA_CPU(cpu);
1091                spin_lock_init(&rdp->lock);
1092                rdp->completed = 0;
1093                rdp->waitlistcount = 0;
1094                rdp->nextlist = NULL;
1095                rdp->nexttail = &rdp->nextlist;
1096                for (i = 0; i < GP_STAGES; i++) {
1097                        rdp->waitlist[i] = NULL;
1098                        rdp->waittail[i] = &rdp->waitlist[i];
1099                }
1100                rdp->donelist = NULL;
1101                rdp->donetail = &rdp->donelist;
1102                rdp->rcu_flipctr[0] = 0;
1103                rdp->rcu_flipctr[1] = 0;
1104        }
1105        register_cpu_notifier(&rcu_nb);
1106
1107        /*
1108         * We don't need protection against CPU-Hotplug here
1109         * since
1110         * a) If a CPU comes online while we are iterating over the
1111         *    cpu_online_map below, we would only end up making a
1112         *    duplicate call to rcu_online_cpu() which sets the corresponding
1113         *    CPU's mask in the rcu_cpu_online_map.
1114         *
1115         * b) A CPU cannot go offline at this point in time since the user
1116         *    does not have access to the sysfs interface, nor do we
1117         *    suspend the system.
1118         */
1119        for_each_online_cpu(cpu)
1120                rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
1121
1122        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
1123}
1124
1125/*
1126 * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
1127 */
1128void synchronize_kernel(void)
1129{
1130        synchronize_rcu();
1131}
1132
1133#ifdef CONFIG_RCU_TRACE
1134long *rcupreempt_flipctr(int cpu)
1135{
1136        return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
1137}
1138EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
1139
1140int rcupreempt_flip_flag(int cpu)
1141{
1142        return per_cpu(rcu_flip_flag, cpu);
1143}
1144EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
1145
1146int rcupreempt_mb_flag(int cpu)
1147{
1148        return per_cpu(rcu_mb_flag, cpu);
1149}
1150EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
1151
1152char *rcupreempt_try_flip_state_name(void)
1153{
1154        return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
1155}
1156EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
1157
1158struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
1159{
1160        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
1161
1162        return &rdp->trace;
1163}
1164EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
1165
1166#endif /* #ifdef RCU_TRACE */
1167
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.