linux/arch/ia64/sn/kernel/xpc_main.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (c) 2004-2007 Silicon Graphics, Inc.  All Rights Reserved.
   7 */
   8
   9
  10/*
  11 * Cross Partition Communication (XPC) support - standard version.
  12 *
  13 *      XPC provides a message passing capability that crosses partition
  14 *      boundaries. This module is made up of two parts:
  15 *
  16 *          partition   This part detects the presence/absence of other
  17 *                      partitions. It provides a heartbeat and monitors
  18 *                      the heartbeats of other partitions.
  19 *
  20 *          channel     This part manages the channels and sends/receives
  21 *                      messages across them to/from other partitions.
  22 *
  23 *      There are a couple of additional functions residing in XP, which
  24 *      provide an interface to XPC for its users.
  25 *
  26 *
  27 *      Caveats:
  28 *
  29 *        . We currently have no way to determine which nasid an IPI came
  30 *          from. Thus, xpc_IPI_send() does a remote AMO write followed by
  31 *          an IPI. The AMO indicates where data is to be pulled from, so
  32 *          after the IPI arrives, the remote partition checks the AMO word.
  33 *          The IPI can actually arrive before the AMO however, so other code
  34 *          must periodically check for this case. Also, remote AMO operations
  35 *          do not reliably time out. Thus we do a remote PIO read solely to
  36 *          know whether the remote partition is down and whether we should
  37 *          stop sending IPIs to it. This remote PIO read operation is set up
  38 *          in a special nofault region so SAL knows to ignore (and cleanup)
  39 *          any errors due to the remote AMO write, PIO read, and/or PIO
  40 *          write operations.
  41 *
  42 *          If/when new hardware solves this IPI problem, we should abandon
  43 *          the current approach.
  44 *
  45 */
  46
  47
  48#include <linux/kernel.h>
  49#include <linux/module.h>
  50#include <linux/init.h>
  51#include <linux/sched.h>
  52#include <linux/syscalls.h>
  53#include <linux/cache.h>
  54#include <linux/interrupt.h>
  55#include <linux/delay.h>
  56#include <linux/reboot.h>
  57#include <linux/completion.h>
  58#include <linux/kdebug.h>
  59#include <asm/sn/intr.h>
  60#include <asm/sn/sn_sal.h>
  61#include <asm/uaccess.h>
  62#include <asm/sn/xpc.h>
  63
  64
  65/* define two XPC debug device structures to be used with dev_dbg() et al */
  66
  67struct device_driver xpc_dbg_name = {
  68        .name = "xpc"
  69};
  70
  71struct device xpc_part_dbg_subname = {
  72        .bus_id = {0},          /* set to "part" at xpc_init() time */
  73        .driver = &xpc_dbg_name
  74};
  75
  76struct device xpc_chan_dbg_subname = {
  77        .bus_id = {0},          /* set to "chan" at xpc_init() time */
  78        .driver = &xpc_dbg_name
  79};
  80
  81struct device *xpc_part = &xpc_part_dbg_subname;
  82struct device *xpc_chan = &xpc_chan_dbg_subname;
  83
  84
  85static int xpc_kdebug_ignore;
  86
  87
  88/* systune related variables for /proc/sys directories */
  89
  90static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
  91static int xpc_hb_min_interval = 1;
  92static int xpc_hb_max_interval = 10;
  93
  94static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
  95static int xpc_hb_check_min_interval = 10;
  96static int xpc_hb_check_max_interval = 120;
  97
  98int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
  99static int xpc_disengage_request_min_timelimit = 0;
 100static int xpc_disengage_request_max_timelimit = 120;
 101
 102static ctl_table xpc_sys_xpc_hb_dir[] = {
 103        {
 104                .ctl_name       = CTL_UNNUMBERED,
 105                .procname       = "hb_interval",
 106                .data           = &xpc_hb_interval,
 107                .maxlen         = sizeof(int),
 108                .mode           = 0644,
 109                .proc_handler   = &proc_dointvec_minmax,
 110                .strategy       = &sysctl_intvec,
 111                .extra1         = &xpc_hb_min_interval,
 112                .extra2         = &xpc_hb_max_interval
 113        },
 114        {
 115                .ctl_name       = CTL_UNNUMBERED,
 116                .procname       = "hb_check_interval",
 117                .data           = &xpc_hb_check_interval,
 118                .maxlen         = sizeof(int),
 119                .mode           = 0644,
 120                .proc_handler   = &proc_dointvec_minmax,
 121                .strategy       = &sysctl_intvec,
 122                .extra1         = &xpc_hb_check_min_interval,
 123                .extra2         = &xpc_hb_check_max_interval
 124        },
 125        {}
 126};
 127static ctl_table xpc_sys_xpc_dir[] = {
 128        {
 129                .ctl_name       = CTL_UNNUMBERED,
 130                .procname       = "hb",
 131                .mode           = 0555,
 132                .child          = xpc_sys_xpc_hb_dir
 133        },
 134        {
 135                .ctl_name       = CTL_UNNUMBERED,
 136                .procname       = "disengage_request_timelimit",
 137                .data           = &xpc_disengage_request_timelimit,
 138                .maxlen         = sizeof(int),
 139                .mode           = 0644,
 140                .proc_handler   = &proc_dointvec_minmax,
 141                .strategy       = &sysctl_intvec,
 142                .extra1         = &xpc_disengage_request_min_timelimit,
 143                .extra2         = &xpc_disengage_request_max_timelimit
 144        },
 145        {}
 146};
 147static ctl_table xpc_sys_dir[] = {
 148        {
 149                .ctl_name       = CTL_UNNUMBERED,
 150                .procname       = "xpc",
 151                .mode           = 0555,
 152                .child          = xpc_sys_xpc_dir
 153        },
 154        {}
 155};
 156static struct ctl_table_header *xpc_sysctl;
 157
 158/* non-zero if any remote partition disengage request was timed out */
 159int xpc_disengage_request_timedout;
 160
 161/* #of IRQs received */
 162static atomic_t xpc_act_IRQ_rcvd;
 163
 164/* IRQ handler notifies this wait queue on receipt of an IRQ */
 165static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
 166
 167static unsigned long xpc_hb_check_timeout;
 168
 169/* notification that the xpc_hb_checker thread has exited */
 170static DECLARE_COMPLETION(xpc_hb_checker_exited);
 171
 172/* notification that the xpc_discovery thread has exited */
 173static DECLARE_COMPLETION(xpc_discovery_exited);
 174
 175
 176static struct timer_list xpc_hb_timer;
 177
 178
 179static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
 180
 181
 182static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
 183static struct notifier_block xpc_reboot_notifier = {
 184        .notifier_call = xpc_system_reboot,
 185};
 186
 187static int xpc_system_die(struct notifier_block *, unsigned long, void *);
 188static struct notifier_block xpc_die_notifier = {
 189        .notifier_call = xpc_system_die,
 190};
 191
 192
 193/*
 194 * Timer function to enforce the timelimit on the partition disengage request.
 195 */
 196static void
 197xpc_timeout_partition_disengage_request(unsigned long data)
 198{
 199        struct xpc_partition *part = (struct xpc_partition *) data;
 200
 201
 202        DBUG_ON(jiffies < part->disengage_request_timeout);
 203
 204        (void) xpc_partition_disengaged(part);
 205
 206        DBUG_ON(part->disengage_request_timeout != 0);
 207        DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
 208}
 209
 210
 211/*
 212 * Notify the heartbeat check thread that an IRQ has been received.
 213 */
 214static irqreturn_t
 215xpc_act_IRQ_handler(int irq, void *dev_id)
 216{
 217        atomic_inc(&xpc_act_IRQ_rcvd);
 218        wake_up_interruptible(&xpc_act_IRQ_wq);
 219        return IRQ_HANDLED;
 220}
 221
 222
 223/*
 224 * Timer to produce the heartbeat.  The timer structures function is
 225 * already set when this is initially called.  A tunable is used to
 226 * specify when the next timeout should occur.
 227 */
 228static void
 229xpc_hb_beater(unsigned long dummy)
 230{
 231        xpc_vars->heartbeat++;
 232
 233        if (jiffies >= xpc_hb_check_timeout) {
 234                wake_up_interruptible(&xpc_act_IRQ_wq);
 235        }
 236
 237        xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
 238        add_timer(&xpc_hb_timer);
 239}
 240
 241
 242/*
 243 * This thread is responsible for nearly all of the partition
 244 * activation/deactivation.
 245 */
 246static int
 247xpc_hb_checker(void *ignore)
 248{
 249        int last_IRQ_count = 0;
 250        int new_IRQ_count;
 251        int force_IRQ=0;
 252
 253
 254        /* this thread was marked active by xpc_hb_init() */
 255
 256        daemonize(XPC_HB_CHECK_THREAD_NAME);
 257
 258        set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU));
 259
 260        /* set our heartbeating to other partitions into motion */
 261        xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
 262        xpc_hb_beater(0);
 263
 264        while (!(volatile int) xpc_exiting) {
 265
 266                dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
 267                        "been received\n",
 268                        (int) (xpc_hb_check_timeout - jiffies),
 269                        atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
 270
 271
 272                /* checking of remote heartbeats is skewed by IRQ handling */
 273                if (jiffies >= xpc_hb_check_timeout) {
 274                        dev_dbg(xpc_part, "checking remote heartbeats\n");
 275                        xpc_check_remote_hb();
 276
 277                        /*
 278                         * We need to periodically recheck to ensure no
 279                         * IPI/AMO pairs have been missed.  That check
 280                         * must always reset xpc_hb_check_timeout.
 281                         */
 282                        force_IRQ = 1;
 283                }
 284
 285
 286                /* check for outstanding IRQs */
 287                new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
 288                if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
 289                        force_IRQ = 0;
 290
 291                        dev_dbg(xpc_part, "found an IRQ to process; will be "
 292                                "resetting xpc_hb_check_timeout\n");
 293
 294                        last_IRQ_count += xpc_identify_act_IRQ_sender();
 295                        if (last_IRQ_count < new_IRQ_count) {
 296                                /* retry once to help avoid missing AMO */
 297                                (void) xpc_identify_act_IRQ_sender();
 298                        }
 299                        last_IRQ_count = new_IRQ_count;
 300
 301                        xpc_hb_check_timeout = jiffies +
 302                                           (xpc_hb_check_interval * HZ);
 303                }
 304
 305                /* wait for IRQ or timeout */
 306                (void) wait_event_interruptible(xpc_act_IRQ_wq,
 307                            (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
 308                                        jiffies >= xpc_hb_check_timeout ||
 309                                                (volatile int) xpc_exiting));
 310        }
 311
 312        dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 313
 314
 315        /* mark this thread as having exited */
 316        complete(&xpc_hb_checker_exited);
 317        return 0;
 318}
 319
 320
 321/*
 322 * This thread will attempt to discover other partitions to activate
 323 * based on info provided by SAL. This new thread is short lived and
 324 * will exit once discovery is complete.
 325 */
 326static int
 327xpc_initiate_discovery(void *ignore)
 328{
 329        daemonize(XPC_DISCOVERY_THREAD_NAME);
 330
 331        xpc_discovery();
 332
 333        dev_dbg(xpc_part, "discovery thread is exiting\n");
 334
 335        /* mark this thread as having exited */
 336        complete(&xpc_discovery_exited);
 337        return 0;
 338}
 339
 340
 341/*
 342 * Establish first contact with the remote partititon. This involves pulling
 343 * the XPC per partition variables from the remote partition and waiting for
 344 * the remote partition to pull ours.
 345 */
 346static enum xpc_retval
 347xpc_make_first_contact(struct xpc_partition *part)
 348{
 349        enum xpc_retval ret;
 350
 351
 352        while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) {
 353                if (ret != xpcRetry) {
 354                        XPC_DEACTIVATE_PARTITION(part, ret);
 355                        return ret;
 356                }
 357
 358                dev_dbg(xpc_chan, "waiting to make first contact with "
 359                        "partition %d\n", XPC_PARTID(part));
 360
 361                /* wait a 1/4 of a second or so */
 362                (void) msleep_interruptible(250);
 363
 364                if (part->act_state == XPC_P_DEACTIVATING) {
 365                        return part->reason;
 366                }
 367        }
 368
 369        return xpc_mark_partition_active(part);
 370}
 371
 372
 373/*
 374 * The first kthread assigned to a newly activated partition is the one
 375 * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
 376 * that kthread until the partition is brought down, at which time that kthread
 377 * returns back to XPC HB. (The return of that kthread will signify to XPC HB
 378 * that XPC has dismantled all communication infrastructure for the associated
 379 * partition.) This kthread becomes the channel manager for that partition.
 380 *
 381 * Each active partition has a channel manager, who, besides connecting and
 382 * disconnecting channels, will ensure that each of the partition's connected
 383 * channels has the required number of assigned kthreads to get the work done.
 384 */
 385static void
 386xpc_channel_mgr(struct xpc_partition *part)
 387{
 388        while (part->act_state != XPC_P_DEACTIVATING ||
 389                        atomic_read(&part->nchannels_active) > 0 ||
 390                                        !xpc_partition_disengaged(part)) {
 391
 392                xpc_process_channel_activity(part);
 393
 394
 395                /*
 396                 * Wait until we've been requested to activate kthreads or
 397                 * all of the channel's message queues have been torn down or
 398                 * a signal is pending.
 399                 *
 400                 * The channel_mgr_requests is set to 1 after being awakened,
 401                 * This is done to prevent the channel mgr from making one pass
 402                 * through the loop for each request, since he will
 403                 * be servicing all the requests in one pass. The reason it's
 404                 * set to 1 instead of 0 is so that other kthreads will know
 405                 * that the channel mgr is running and won't bother trying to
 406                 * wake him up.
 407                 */
 408                atomic_dec(&part->channel_mgr_requests);
 409                (void) wait_event_interruptible(part->channel_mgr_wq,
 410                                (atomic_read(&part->channel_mgr_requests) > 0 ||
 411                                (volatile u64) part->local_IPI_amo != 0 ||
 412                                ((volatile u8) part->act_state ==
 413                                                        XPC_P_DEACTIVATING &&
 414                                atomic_read(&part->nchannels_active) == 0 &&
 415                                xpc_partition_disengaged(part))));
 416                atomic_set(&part->channel_mgr_requests, 1);
 417
 418                // >>> Does it need to wakeup periodically as well? In case we
 419                // >>> miscalculated the #of kthreads to wakeup or create?
 420        }
 421}
 422
 423
 424/*
 425 * When XPC HB determines that a partition has come up, it will create a new
 426 * kthread and that kthread will call this function to attempt to set up the
 427 * basic infrastructure used for Cross Partition Communication with the newly
 428 * upped partition.
 429 *
 430 * The kthread that was created by XPC HB and which setup the XPC
 431 * infrastructure will remain assigned to the partition until the partition
 432 * goes down. At which time the kthread will teardown the XPC infrastructure
 433 * and then exit.
 434 *
 435 * XPC HB will put the remote partition's XPC per partition specific variables
 436 * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
 437 * calling xpc_partition_up().
 438 */
 439static void
 440xpc_partition_up(struct xpc_partition *part)
 441{
 442        DBUG_ON(part->channels != NULL);
 443
 444        dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
 445
 446        if (xpc_setup_infrastructure(part) != xpcSuccess) {
 447                return;
 448        }
 449
 450        /*
 451         * The kthread that XPC HB called us with will become the
 452         * channel manager for this partition. It will not return
 453         * back to XPC HB until the partition's XPC infrastructure
 454         * has been dismantled.
 455         */
 456
 457        (void) xpc_part_ref(part);      /* this will always succeed */
 458
 459        if (xpc_make_first_contact(part) == xpcSuccess) {
 460                xpc_channel_mgr(part);
 461        }
 462
 463        xpc_part_deref(part);
 464
 465        xpc_teardown_infrastructure(part);
 466}
 467
 468
 469static int
 470xpc_activating(void *__partid)
 471{
 472        partid_t partid = (u64) __partid;
 473        struct xpc_partition *part = &xpc_partitions[partid];
 474        unsigned long irq_flags;
 475        struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
 476        int ret;
 477
 478
 479        DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
 480
 481        spin_lock_irqsave(&part->act_lock, irq_flags);
 482
 483        if (part->act_state == XPC_P_DEACTIVATING) {
 484                part->act_state = XPC_P_INACTIVE;
 485                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 486                part->remote_rp_pa = 0;
 487                return 0;
 488        }
 489
 490        /* indicate the thread is activating */
 491        DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
 492        part->act_state = XPC_P_ACTIVATING;
 493
 494        XPC_SET_REASON(part, 0, 0);
 495        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 496
 497        dev_dbg(xpc_part, "bringing partition %d up\n", partid);
 498
 499        daemonize("xpc%02d", partid);
 500
 501        /*
 502         * This thread needs to run at a realtime priority to prevent a
 503         * significant performance degradation.
 504         */
 505        ret = sched_setscheduler(current, SCHED_FIFO, &param);
 506        if (ret != 0) {
 507                dev_warn(xpc_part, "unable to set pid %d to a realtime "
 508                        "priority, ret=%d\n", current->pid, ret);
 509        }
 510
 511        /* allow this thread and its children to run on any CPU */
 512        set_cpus_allowed(current, CPU_MASK_ALL);
 513
 514        /*
 515         * Register the remote partition's AMOs with SAL so it can handle
 516         * and cleanup errors within that address range should the remote
 517         * partition go down. We don't unregister this range because it is
 518         * difficult to tell when outstanding writes to the remote partition
 519         * are finished and thus when it is safe to unregister. This should
 520         * not result in wasted space in the SAL xp_addr_region table because
 521         * we should get the same page for remote_amos_page_pa after module
 522         * reloads and system reboots.
 523         */
 524        if (sn_register_xp_addr_region(part->remote_amos_page_pa,
 525                                                        PAGE_SIZE, 1) < 0) {
 526                dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
 527                        "xp_addr region\n", partid);
 528
 529                spin_lock_irqsave(&part->act_lock, irq_flags);
 530                part->act_state = XPC_P_INACTIVE;
 531                XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__);
 532                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 533                part->remote_rp_pa = 0;
 534                return 0;
 535        }
 536
 537        xpc_allow_hb(partid, xpc_vars);
 538        xpc_IPI_send_activated(part);
 539
 540
 541        /*
 542         * xpc_partition_up() holds this thread and marks this partition as
 543         * XPC_P_ACTIVE by calling xpc_hb_mark_active().
 544         */
 545        (void) xpc_partition_up(part);
 546
 547        xpc_disallow_hb(partid, xpc_vars);
 548        xpc_mark_partition_inactive(part);
 549
 550        if (part->reason == xpcReactivating) {
 551                /* interrupting ourselves results in activating partition */
 552                xpc_IPI_send_reactivate(part);
 553        }
 554
 555        return 0;
 556}
 557
 558
 559void
 560xpc_activate_partition(struct xpc_partition *part)
 561{
 562        partid_t partid = XPC_PARTID(part);
 563        unsigned long irq_flags;
 564        pid_t pid;
 565
 566
 567        spin_lock_irqsave(&part->act_lock, irq_flags);
 568
 569        DBUG_ON(part->act_state != XPC_P_INACTIVE);
 570
 571        part->act_state = XPC_P_ACTIVATION_REQ;
 572        XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
 573
 574        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 575
 576        pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
 577
 578        if (unlikely(pid <= 0)) {
 579                spin_lock_irqsave(&part->act_lock, irq_flags);
 580                part->act_state = XPC_P_INACTIVE;
 581                XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
 582                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 583        }
 584}
 585
 586
 587/*
 588 * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
 589 * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
 590 * than one partition, we use an AMO_t structure per partition to indicate
 591 * whether a partition has sent an IPI or not.  >>> If it has, then wake up the
 592 * associated kthread to handle it.
 593 *
 594 * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
 595 * running on other partitions.
 596 *
 597 * Noteworthy Arguments:
 598 *
 599 *      irq - Interrupt ReQuest number. NOT USED.
 600 *
 601 *      dev_id - partid of IPI's potential sender.
 602 */
 603irqreturn_t
 604xpc_notify_IRQ_handler(int irq, void *dev_id)
 605{
 606        partid_t partid = (partid_t) (u64) dev_id;
 607        struct xpc_partition *part = &xpc_partitions[partid];
 608
 609
 610        DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
 611
 612        if (xpc_part_ref(part)) {
 613                xpc_check_for_channel_activity(part);
 614
 615                xpc_part_deref(part);
 616        }
 617        return IRQ_HANDLED;
 618}
 619
 620
 621/*
 622 * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
 623 * because the write to their associated IPI amo completed after the IRQ/IPI
 624 * was received.
 625 */
 626void
 627xpc_dropped_IPI_check(struct xpc_partition *part)
 628{
 629        if (xpc_part_ref(part)) {
 630                xpc_check_for_channel_activity(part);
 631
 632                part->dropped_IPI_timer.expires = jiffies +
 633                                                        XPC_P_DROPPED_IPI_WAIT;
 634                add_timer(&part->dropped_IPI_timer);
 635                xpc_part_deref(part);
 636        }
 637}
 638
 639
 640void
 641xpc_activate_kthreads(struct xpc_channel *ch, int needed)
 642{
 643        int idle = atomic_read(&ch->kthreads_idle);
 644        int assigned = atomic_read(&ch->kthreads_assigned);
 645        int wakeup;
 646
 647
 648        DBUG_ON(needed <= 0);
 649
 650        if (idle > 0) {
 651                wakeup = (needed > idle) ? idle : needed;
 652                needed -= wakeup;
 653
 654                dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
 655                        "channel=%d\n", wakeup, ch->partid, ch->number);
 656
 657                /* only wakeup the requested number of kthreads */
 658                wake_up_nr(&ch->idle_wq, wakeup);
 659        }
 660
 661        if (needed <= 0) {
 662                return;
 663        }
 664
 665        if (needed + assigned > ch->kthreads_assigned_limit) {
 666                needed = ch->kthreads_assigned_limit - assigned;
 667                // >>>should never be less than 0
 668                if (needed <= 0) {
 669                        return;
 670                }
 671        }
 672
 673        dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
 674                needed, ch->partid, ch->number);
 675
 676        xpc_create_kthreads(ch, needed, 0);
 677}
 678
 679
 680/*
 681 * This function is where XPC's kthreads wait for messages to deliver.
 682 */
 683static void
 684xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 685{
 686        do {
 687                /* deliver messages to their intended recipients */
 688
 689                while ((volatile s64) ch->w_local_GP.get <
 690                                (volatile s64) ch->w_remote_GP.put &&
 691                                        !((volatile u32) ch->flags &
 692                                                XPC_C_DISCONNECTING)) {
 693                        xpc_deliver_msg(ch);
 694                }
 695
 696                if (atomic_inc_return(&ch->kthreads_idle) >
 697                                                ch->kthreads_idle_limit) {
 698                        /* too many idle kthreads on this channel */
 699                        atomic_dec(&ch->kthreads_idle);
 700                        break;
 701                }
 702
 703                dev_dbg(xpc_chan, "idle kthread calling "
 704                        "wait_event_interruptible_exclusive()\n");
 705
 706                (void) wait_event_interruptible_exclusive(ch->idle_wq,
 707                                ((volatile s64) ch->w_local_GP.get <
 708                                        (volatile s64) ch->w_remote_GP.put ||
 709                                ((volatile u32) ch->flags &
 710                                                XPC_C_DISCONNECTING)));
 711
 712                atomic_dec(&ch->kthreads_idle);
 713
 714        } while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING));
 715}
 716
 717
 718static int
 719xpc_daemonize_kthread(void *args)
 720{
 721        partid_t partid = XPC_UNPACK_ARG1(args);
 722        u16 ch_number = XPC_UNPACK_ARG2(args);
 723        struct xpc_partition *part = &xpc_partitions[partid];
 724        struct xpc_channel *ch;
 725        int n_needed;
 726        unsigned long irq_flags;
 727
 728
 729        daemonize("xpc%02dc%d", partid, ch_number);
 730
 731        dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
 732                partid, ch_number);
 733
 734        ch = &part->channels[ch_number];
 735
 736        if (!(ch->flags & XPC_C_DISCONNECTING)) {
 737
 738                /* let registerer know that connection has been established */
 739
 740                spin_lock_irqsave(&ch->lock, irq_flags);
 741                if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
 742                        ch->flags |= XPC_C_CONNECTEDCALLOUT;
 743                        spin_unlock_irqrestore(&ch->lock, irq_flags);
 744
 745                        xpc_connected_callout(ch);
 746
 747                        spin_lock_irqsave(&ch->lock, irq_flags);
 748                        ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
 749                        spin_unlock_irqrestore(&ch->lock, irq_flags);
 750
 751                        /*
 752                         * It is possible that while the callout was being
 753                         * made that the remote partition sent some messages.
 754                         * If that is the case, we may need to activate
 755                         * additional kthreads to help deliver them. We only
 756                         * need one less than total #of messages to deliver.
 757                         */
 758                        n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
 759                        if (n_needed > 0 &&
 760                                        !(ch->flags & XPC_C_DISCONNECTING)) {
 761                                xpc_activate_kthreads(ch, n_needed);
 762                        }
 763                } else {
 764                        spin_unlock_irqrestore(&ch->lock, irq_flags);
 765                }
 766
 767                xpc_kthread_waitmsgs(part, ch);
 768        }
 769
 770        /* let registerer know that connection is disconnecting */
 771
 772        spin_lock_irqsave(&ch->lock, irq_flags);
 773        if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
 774                        !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
 775                ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
 776                spin_unlock_irqrestore(&ch->lock, irq_flags);
 777
 778                xpc_disconnect_callout(ch, xpcDisconnecting);
 779
 780                spin_lock_irqsave(&ch->lock, irq_flags);
 781                ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
 782        }
 783        spin_unlock_irqrestore(&ch->lock, irq_flags);
 784
 785        if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
 786                if (atomic_dec_return(&part->nchannels_engaged) == 0) {
 787                        xpc_mark_partition_disengaged(part);
 788                        xpc_IPI_send_disengage(part);
 789                }
 790        }
 791
 792        xpc_msgqueue_deref(ch);
 793
 794        dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
 795                partid, ch_number);
 796
 797        xpc_part_deref(part);
 798        return 0;
 799}
 800
 801
 802/*
 803 * For each partition that XPC has established communications with, there is
 804 * a minimum of one kernel thread assigned to perform any operation that
 805 * may potentially sleep or block (basically the callouts to the asynchronous
 806 * functions registered via xpc_connect()).
 807 *
 808 * Additional kthreads are created and destroyed by XPC as the workload
 809 * demands.
 810 *
 811 * A kthread is assigned to one of the active channels that exists for a given
 812 * partition.
 813 */
 814void
 815xpc_create_kthreads(struct xpc_channel *ch, int needed,
 816                        int ignore_disconnecting)
 817{
 818        unsigned long irq_flags;
 819        pid_t pid;
 820        u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
 821        struct xpc_partition *part = &xpc_partitions[ch->partid];
 822
 823
 824        while (needed-- > 0) {
 825
 826                /*
 827                 * The following is done on behalf of the newly created
 828                 * kthread. That kthread is responsible for doing the
 829                 * counterpart to the following before it exits.
 830                 */
 831                if (ignore_disconnecting) {
 832                        if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
 833                                /* kthreads assigned had gone to zero */
 834                                BUG_ON(!(ch->flags &
 835                                        XPC_C_DISCONNECTINGCALLOUT_MADE));
 836                                break;
 837                        }
 838
 839                } else if (ch->flags & XPC_C_DISCONNECTING) {
 840                        break;
 841
 842                } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
 843                        if (atomic_inc_return(&part->nchannels_engaged) == 1)
 844                                xpc_mark_partition_engaged(part);
 845                }
 846                (void) xpc_part_ref(part);
 847                xpc_msgqueue_ref(ch);
 848
 849                pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
 850                if (pid < 0) {
 851                        /* the fork failed */
 852
 853                        /*
 854                         * NOTE: if (ignore_disconnecting &&
 855                         * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
 856                         * then we'll deadlock if all other kthreads assigned
 857                         * to this channel are blocked in the channel's
 858                         * registerer, because the only thing that will unblock
 859                         * them is the xpcDisconnecting callout that this
 860                         * failed kernel_thread would have made.
 861                         */
 862
 863                        if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 864                            atomic_dec_return(&part->nchannels_engaged) == 0) {
 865                                xpc_mark_partition_disengaged(part);
 866                                xpc_IPI_send_disengage(part);
 867                        }
 868                        xpc_msgqueue_deref(ch);
 869                        xpc_part_deref(part);
 870
 871                        if (atomic_read(&ch->kthreads_assigned) <
 872                                                ch->kthreads_idle_limit) {
 873                                /*
 874                                 * Flag this as an error only if we have an
 875                                 * insufficient #of kthreads for the channel
 876                                 * to function.
 877                                 */
 878                                spin_lock_irqsave(&ch->lock, irq_flags);
 879                                XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
 880                                                                &irq_flags);
 881                                spin_unlock_irqrestore(&ch->lock, irq_flags);
 882                        }
 883                        break;
 884                }
 885
 886                ch->kthreads_created++; // >>> temporary debug only!!!
 887        }
 888}
 889
 890
 891void
 892xpc_disconnect_wait(int ch_number)
 893{
 894        unsigned long irq_flags;
 895        partid_t partid;
 896        struct xpc_partition *part;
 897        struct xpc_channel *ch;
 898        int wakeup_channel_mgr;
 899
 900
 901        /* now wait for all callouts to the caller's function to cease */
 902        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 903                part = &xpc_partitions[partid];
 904
 905                if (!xpc_part_ref(part)) {
 906                        continue;
 907                }
 908
 909                ch = &part->channels[ch_number];
 910
 911                if (!(ch->flags & XPC_C_WDISCONNECT)) {
 912                        xpc_part_deref(part);
 913                        continue;
 914                }
 915
 916                wait_for_completion(&ch->wdisconnect_wait);
 917
 918                spin_lock_irqsave(&ch->lock, irq_flags);
 919                DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
 920                wakeup_channel_mgr = 0;
 921
 922                if (ch->delayed_IPI_flags) {
 923                        if (part->act_state != XPC_P_DEACTIVATING) {
 924                                spin_lock(&part->IPI_lock);
 925                                XPC_SET_IPI_FLAGS(part->local_IPI_amo,
 926                                        ch->number, ch->delayed_IPI_flags);
 927                                spin_unlock(&part->IPI_lock);
 928                                wakeup_channel_mgr = 1;
 929                        }
 930                        ch->delayed_IPI_flags = 0;
 931                }
 932
 933                ch->flags &= ~XPC_C_WDISCONNECT;
 934                spin_unlock_irqrestore(&ch->lock, irq_flags);
 935
 936                if (wakeup_channel_mgr) {
 937                        xpc_wakeup_channel_mgr(part);
 938                }
 939
 940                xpc_part_deref(part);
 941        }
 942}
 943
 944
 945static void
 946xpc_do_exit(enum xpc_retval reason)
 947{
 948        partid_t partid;
 949        int active_part_count, printed_waiting_msg = 0;
 950        struct xpc_partition *part;
 951        unsigned long printmsg_time, disengage_request_timeout = 0;
 952
 953
 954        /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
 955        DBUG_ON(xpc_exiting == 1);
 956
 957        /*
 958         * Let the heartbeat checker thread and the discovery thread
 959         * (if one is running) know that they should exit. Also wake up
 960         * the heartbeat checker thread in case it's sleeping.
 961         */
 962        xpc_exiting = 1;
 963        wake_up_interruptible(&xpc_act_IRQ_wq);
 964
 965        /* ignore all incoming interrupts */
 966        free_irq(SGI_XPC_ACTIVATE, NULL);
 967
 968        /* wait for the discovery thread to exit */
 969        wait_for_completion(&xpc_discovery_exited);
 970
 971        /* wait for the heartbeat checker thread to exit */
 972        wait_for_completion(&xpc_hb_checker_exited);
 973
 974
 975        /* sleep for a 1/3 of a second or so */
 976        (void) msleep_interruptible(300);
 977
 978
 979        /* wait for all partitions to become inactive */
 980
 981        printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
 982        xpc_disengage_request_timedout = 0;
 983
 984        do {
 985                active_part_count = 0;
 986
 987                for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 988                        part = &xpc_partitions[partid];
 989
 990                        if (xpc_partition_disengaged(part) &&
 991                                        part->act_state == XPC_P_INACTIVE) {
 992                                continue;
 993                        }
 994
 995                        active_part_count++;
 996
 997                        XPC_DEACTIVATE_PARTITION(part, reason);
 998
 999                        if (part->disengage_request_timeout >
1000                                                disengage_request_timeout) {
1001                                disengage_request_timeout =
1002                                                part->disengage_request_timeout;
1003                        }
1004                }
1005
1006                if (xpc_partition_engaged(-1UL)) {
1007                        if (time_after(jiffies, printmsg_time)) {
1008                                dev_info(xpc_part, "waiting for remote "
1009                                        "partitions to disengage, timeout in "
1010                                        "%ld seconds\n",
1011                                        (disengage_request_timeout - jiffies)
1012                                                                        / HZ);
1013                                printmsg_time = jiffies +
1014                                        (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
1015                                printed_waiting_msg = 1;
1016                        }
1017
1018                } else if (active_part_count > 0) {
1019                        if (printed_waiting_msg) {
1020                                dev_info(xpc_part, "waiting for local partition"
1021                                        " to disengage\n");
1022                                printed_waiting_msg = 0;
1023                        }
1024
1025                } else {
1026                        if (!xpc_disengage_request_timedout) {
1027                                dev_info(xpc_part, "all partitions have "
1028                                        "disengaged\n");
1029                        }
1030                        break;
1031                }
1032
1033                /* sleep for a 1/3 of a second or so */
1034                (void) msleep_interruptible(300);
1035
1036        } while (1);
1037
1038        DBUG_ON(xpc_partition_engaged(-1UL));
1039
1040
1041        /* indicate to others that our reserved page is uninitialized */
1042        xpc_rsvd_page->vars_pa = 0;
1043
1044        /* now it's time to eliminate our heartbeat */
1045        del_timer_sync(&xpc_hb_timer);
1046        DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
1047
1048        if (reason == xpcUnloading) {
1049                /* take ourselves off of the reboot_notifier_list */
1050                (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1051
1052                /* take ourselves off of the die_notifier list */
1053                (void) unregister_die_notifier(&xpc_die_notifier);
1054        }
1055
1056        /* close down protections for IPI operations */
1057        xpc_restrict_IPI_ops();
1058
1059
1060        /* clear the interface to XPC's functions */
1061        xpc_clear_interface();
1062
1063        if (xpc_sysctl) {
1064                unregister_sysctl_table(xpc_sysctl);
1065        }
1066
1067        kfree(xpc_remote_copy_buffer_base);
1068}
1069
1070
1071/*
1072 * This function is called when the system is being rebooted.
1073 */
1074static int
1075xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1076{
1077        enum xpc_retval reason;
1078
1079
1080        switch (event) {
1081        case SYS_RESTART:
1082                reason = xpcSystemReboot;
1083                break;
1084        case SYS_HALT:
1085                reason = xpcSystemHalt;
1086                break;
1087        case SYS_POWER_OFF:
1088                reason = xpcSystemPoweroff;
1089                break;
1090        default:
1091                reason = xpcSystemGoingDown;
1092        }
1093
1094        xpc_do_exit(reason);
1095        return NOTIFY_DONE;
1096}
1097
1098
1099/*
1100 * Notify other partitions to disengage from all references to our memory.
1101 */
1102static void
1103xpc_die_disengage(void)
1104{
1105        struct xpc_partition *part;
1106        partid_t partid;
1107        unsigned long engaged;
1108        long time, printmsg_time, disengage_request_timeout;
1109
1110
1111        /* keep xpc_hb_checker thread from doing anything (just in case) */
1112        xpc_exiting = 1;
1113
1114        xpc_vars->heartbeating_to_mask = 0;  /* indicate we're deactivated */
1115
1116        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1117                part = &xpc_partitions[partid];
1118
1119                if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
1120                                                        remote_vars_version)) {
1121
1122                        /* just in case it was left set by an earlier XPC */
1123                        xpc_clear_partition_engaged(1UL << partid);
1124                        continue;
1125                }
1126
1127                if (xpc_partition_engaged(1UL << partid) ||
1128                                        part->act_state != XPC_P_INACTIVE) {
1129                        xpc_request_partition_disengage(part);
1130                        xpc_mark_partition_disengaged(part);
1131                        xpc_IPI_send_disengage(part);
1132                }
1133        }
1134
1135        time = rtc_time();
1136        printmsg_time = time +
1137                (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
1138        disengage_request_timeout = time +
1139                (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
1140
1141        /* wait for all other partitions to disengage from us */
1142
1143        while (1) {
1144                engaged = xpc_partition_engaged(-1UL);
1145                if (!engaged) {
1146                        dev_info(xpc_part, "all partitions have disengaged\n");
1147                        break;
1148                }
1149
1150                time = rtc_time();
1151                if (time >= disengage_request_timeout) {
1152                        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1153                                if (engaged & (1UL << partid)) {
1154                                        dev_info(xpc_part, "disengage from "
1155                                                "remote partition %d timed "
1156                                                "out\n", partid);
1157                                }
1158                        }
1159                        break;
1160                }
1161
1162                if (time >= printmsg_time) {
1163                        dev_info(xpc_part, "waiting for remote partitions to "
1164                                "disengage, timeout in %ld seconds\n",
1165                                (disengage_request_timeout - time) /
1166                                                sn_rtc_cycles_per_second);
1167                        printmsg_time = time +
1168                                        (XPC_DISENGAGE_PRINTMSG_INTERVAL *
1169                                                sn_rtc_cycles_per_second);
1170                }
1171        }
1172}
1173
1174
1175/*
1176 * This function is called when the system is being restarted or halted due
1177 * to some sort of system failure. If this is the case we need to notify the
1178 * other partitions to disengage from all references to our memory.
1179 * This function can also be called when our heartbeater could be offlined
1180 * for a time. In this case we need to notify other partitions to not worry
1181 * about the lack of a heartbeat.
1182 */
1183static int
1184xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1185{
1186        switch (event) {
1187        case DIE_MACHINE_RESTART:
1188        case DIE_MACHINE_HALT:
1189                xpc_die_disengage();
1190                break;
1191
1192        case DIE_KDEBUG_ENTER:
1193                /* Should lack of heartbeat be ignored by other partitions? */
1194                if (!xpc_kdebug_ignore) {
1195                        break;
1196                }
1197                /* fall through */
1198        case DIE_MCA_MONARCH_ENTER:
1199        case DIE_INIT_MONARCH_ENTER:
1200                xpc_vars->heartbeat++;
1201                xpc_vars->heartbeat_offline = 1;
1202                break;
1203
1204        case DIE_KDEBUG_LEAVE:
1205                /* Is lack of heartbeat being ignored by other partitions? */
1206                if (!xpc_kdebug_ignore) {
1207                        break;
1208                }
1209                /* fall through */
1210        case DIE_MCA_MONARCH_LEAVE:
1211        case DIE_INIT_MONARCH_LEAVE:
1212                xpc_vars->heartbeat++;
1213                xpc_vars->heartbeat_offline = 0;
1214                break;
1215        }
1216
1217        return NOTIFY_DONE;
1218}
1219
1220
1221int __init
1222xpc_init(void)
1223{
1224        int ret;
1225        partid_t partid;
1226        struct xpc_partition *part;
1227        pid_t pid;
1228        size_t buf_size;
1229
1230
1231        if (!ia64_platform_is("sn2")) {
1232                return -ENODEV;
1233        }
1234
1235
1236        buf_size = max(XPC_RP_VARS_SIZE,
1237                                XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
1238        xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
1239                                     GFP_KERNEL, &xpc_remote_copy_buffer_base);
1240        if (xpc_remote_copy_buffer == NULL)
1241                return -ENOMEM;
1242
1243        snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
1244        snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
1245
1246        xpc_sysctl = register_sysctl_table(xpc_sys_dir);
1247
1248        /*
1249         * The first few fields of each entry of xpc_partitions[] need to
1250         * be initialized now so that calls to xpc_connect() and
1251         * xpc_disconnect() can be made prior to the activation of any remote
1252         * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
1253         * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
1254         * PARTITION HAS BEEN ACTIVATED.
1255         */
1256        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1257                part = &xpc_partitions[partid];
1258
1259                DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part));
1260
1261                part->act_IRQ_rcvd = 0;
1262                spin_lock_init(&part->act_lock);
1263                part->act_state = XPC_P_INACTIVE;
1264                XPC_SET_REASON(part, 0, 0);
1265
1266                init_timer(&part->disengage_request_timer);
1267                part->disengage_request_timer.function =
1268                                xpc_timeout_partition_disengage_request;
1269                part->disengage_request_timer.data = (unsigned long) part;
1270
1271                part->setup_state = XPC_P_UNSET;
1272                init_waitqueue_head(&part->teardown_wq);
1273                atomic_set(&part->references, 0);
1274        }
1275
1276        /*
1277         * Open up protections for IPI operations (and AMO operations on
1278         * Shub 1.1 systems).
1279         */
1280        xpc_allow_IPI_ops();
1281
1282        /*
1283         * Interrupts being processed will increment this atomic variable and
1284         * awaken the heartbeat thread which will process the interrupts.
1285         */
1286        atomic_set(&xpc_act_IRQ_rcvd, 0);
1287
1288        /*
1289         * This is safe to do before the xpc_hb_checker thread has started
1290         * because the handler releases a wait queue.  If an interrupt is
1291         * received before the thread is waiting, it will not go to sleep,
1292         * but rather immediately process the interrupt.
1293         */
1294        ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
1295                                                        "xpc hb", NULL);
1296        if (ret != 0) {
1297                dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
1298                        "errno=%d\n", -ret);
1299
1300                xpc_restrict_IPI_ops();
1301
1302                if (xpc_sysctl) {
1303                        unregister_sysctl_table(xpc_sysctl);
1304                }
1305
1306                kfree(xpc_remote_copy_buffer_base);
1307                return -EBUSY;
1308        }
1309
1310        /*
1311         * Fill the partition reserved page with the information needed by
1312         * other partitions to discover we are alive and establish initial
1313         * communications.
1314         */
1315        xpc_rsvd_page = xpc_rsvd_page_init();
1316        if (xpc_rsvd_page == NULL) {
1317                dev_err(xpc_part, "could not setup our reserved page\n");
1318
1319                free_irq(SGI_XPC_ACTIVATE, NULL);
1320                xpc_restrict_IPI_ops();
1321
1322                if (xpc_sysctl) {
1323                        unregister_sysctl_table(xpc_sysctl);
1324                }
1325
1326                kfree(xpc_remote_copy_buffer_base);
1327                return -EBUSY;
1328        }
1329
1330
1331        /* add ourselves to the reboot_notifier_list */
1332        ret = register_reboot_notifier(&xpc_reboot_notifier);
1333        if (ret != 0) {
1334                dev_warn(xpc_part, "can't register reboot notifier\n");
1335        }
1336
1337        /* add ourselves to the die_notifier list */
1338        ret = register_die_notifier(&xpc_die_notifier);
1339        if (ret != 0) {
1340                dev_warn(xpc_part, "can't register die notifier\n");
1341        }
1342
1343        init_timer(&xpc_hb_timer);
1344        xpc_hb_timer.function = xpc_hb_beater;
1345
1346        /*
1347         * The real work-horse behind xpc.  This processes incoming
1348         * interrupts and monitors remote heartbeats.
1349         */
1350        pid = kernel_thread(xpc_hb_checker, NULL, 0);
1351        if (pid < 0) {
1352                dev_err(xpc_part, "failed while forking hb check thread\n");
1353
1354                /* indicate to others that our reserved page is uninitialized */
1355                xpc_rsvd_page->vars_pa = 0;
1356
1357                /* take ourselves off of the reboot_notifier_list */
1358                (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1359
1360                /* take ourselves off of the die_notifier list */
1361                (void) unregister_die_notifier(&xpc_die_notifier);
1362
1363                del_timer_sync(&xpc_hb_timer);
1364                free_irq(SGI_XPC_ACTIVATE, NULL);
1365                xpc_restrict_IPI_ops();
1366
1367                if (xpc_sysctl) {
1368                        unregister_sysctl_table(xpc_sysctl);
1369                }
1370
1371                kfree(xpc_remote_copy_buffer_base);
1372                return -EBUSY;
1373        }
1374
1375
1376        /*
1377         * Startup a thread that will attempt to discover other partitions to
1378         * activate based on info provided by SAL. This new thread is short
1379         * lived and will exit once discovery is complete.
1380         */
1381        pid = kernel_thread(xpc_initiate_discovery, NULL, 0);
1382        if (pid < 0) {
1383                dev_err(xpc_part, "failed while forking discovery thread\n");
1384
1385                /* mark this new thread as a non-starter */
1386                complete(&xpc_discovery_exited);
1387
1388                xpc_do_exit(xpcUnloading);
1389                return -EBUSY;
1390        }
1391
1392
1393        /* set the interface to point at XPC's functions */
1394        xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1395                          xpc_initiate_allocate, xpc_initiate_send,
1396                          xpc_initiate_send_notify, xpc_initiate_received,
1397                          xpc_initiate_partid_to_nasids);
1398
1399        return 0;
1400}
1401module_init(xpc_init);
1402
1403
1404void __exit
1405xpc_exit(void)
1406{
1407        xpc_do_exit(xpcUnloading);
1408}
1409module_exit(xpc_exit);
1410
1411
1412MODULE_AUTHOR("Silicon Graphics, Inc.");
1413MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
1414MODULE_LICENSE("GPL");
1415
1416module_param(xpc_hb_interval, int, 0);
1417MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
1418                "heartbeat increments.");
1419
1420module_param(xpc_hb_check_interval, int, 0);
1421MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1422                "heartbeat checks.");
1423
1424module_param(xpc_disengage_request_timelimit, int, 0);
1425MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
1426                "for disengage request to complete.");
1427
1428module_param(xpc_kdebug_ignore, int, 0);
1429MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
1430                "other partitions when dropping into kdebug.");
1431
1432
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.