linux/arch/ia64/sn/kernel/xpc_partition.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
   7 */
   8
   9
  10/*
  11 * Cross Partition Communication (XPC) partition support.
  12 *
  13 *      This is the part of XPC that detects the presence/absence of
  14 *      other partitions. It provides a heartbeat and monitors the
  15 *      heartbeats of other partitions.
  16 *
  17 */
  18
  19
  20#include <linux/kernel.h>
  21#include <linux/sysctl.h>
  22#include <linux/cache.h>
  23#include <linux/mmzone.h>
  24#include <linux/nodemask.h>
  25#include <asm/uncached.h>
  26#include <asm/sn/bte.h>
  27#include <asm/sn/intr.h>
  28#include <asm/sn/sn_sal.h>
  29#include <asm/sn/nodepda.h>
  30#include <asm/sn/addrs.h>
  31#include <asm/sn/xpc.h>
  32
  33
  34/* XPC is exiting flag */
  35int xpc_exiting;
  36
  37
  38/* SH_IPI_ACCESS shub register value on startup */
  39static u64 xpc_sh1_IPI_access;
  40static u64 xpc_sh2_IPI_access0;
  41static u64 xpc_sh2_IPI_access1;
  42static u64 xpc_sh2_IPI_access2;
  43static u64 xpc_sh2_IPI_access3;
  44
  45
  46/* original protection values for each node */
  47u64 xpc_prot_vec[MAX_NUMNODES];
  48
  49
  50/* this partition's reserved page pointers */
  51struct xpc_rsvd_page *xpc_rsvd_page;
  52static u64 *xpc_part_nasids;
  53static u64 *xpc_mach_nasids;
  54struct xpc_vars *xpc_vars;
  55struct xpc_vars_part *xpc_vars_part;
  56
  57static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
  58static int xp_nasid_mask_words; /* actual size in words of nasid mask */
  59
  60
  61/*
  62 * For performance reasons, each entry of xpc_partitions[] is cacheline
  63 * aligned. And xpc_partitions[] is padded with an additional entry at the
  64 * end so that the last legitimate entry doesn't share its cacheline with
  65 * another variable.
  66 */
  67struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
  68
  69
  70/*
  71 * Generic buffer used to store a local copy of portions of a remote
  72 * partition's reserved page (either its header and part_nasids mask,
  73 * or its vars).
  74 */
  75char *xpc_remote_copy_buffer;
  76void *xpc_remote_copy_buffer_base;
  77
  78
  79/*
  80 * Guarantee that the kmalloc'd memory is cacheline aligned.
  81 */
  82void *
  83xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  84{
  85        /* see if kmalloc will give us cachline aligned memory by default */
  86        *base = kmalloc(size, flags);
  87        if (*base == NULL) {
  88                return NULL;
  89        }
  90        if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
  91                return *base;
  92        }
  93        kfree(*base);
  94
  95        /* nope, we'll have to do it ourselves */
  96        *base = kmalloc(size + L1_CACHE_BYTES, flags);
  97        if (*base == NULL) {
  98                return NULL;
  99        }
 100        return (void *) L1_CACHE_ALIGN((u64) *base);
 101}
 102
 103
 104/*
 105 * Given a nasid, get the physical address of the  partition's reserved page
 106 * for that nasid. This function returns 0 on any error.
 107 */
 108static u64
 109xpc_get_rsvd_page_pa(int nasid)
 110{
 111        bte_result_t bte_res;
 112        s64 status;
 113        u64 cookie = 0;
 114        u64 rp_pa = nasid;      /* seed with nasid */
 115        u64 len = 0;
 116        u64 buf = buf;
 117        u64 buf_len = 0;
 118        void *buf_base = NULL;
 119
 120
 121        while (1) {
 122
 123                status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
 124                                                                &len);
 125
 126                dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
 127                        "0x%016lx, address=0x%016lx, len=0x%016lx\n",
 128                        status, cookie, rp_pa, len);
 129
 130                if (status != SALRET_MORE_PASSES) {
 131                        break;
 132                }
 133
 134                if (L1_CACHE_ALIGN(len) > buf_len) {
 135                        kfree(buf_base);
 136                        buf_len = L1_CACHE_ALIGN(len);
 137                        buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
 138                                                        GFP_KERNEL, &buf_base);
 139                        if (buf_base == NULL) {
 140                                dev_err(xpc_part, "unable to kmalloc "
 141                                        "len=0x%016lx\n", buf_len);
 142                                status = SALRET_ERROR;
 143                                break;
 144                        }
 145                }
 146
 147                bte_res = xp_bte_copy(rp_pa, buf, buf_len,
 148                                        (BTE_NOTIFY | BTE_WACQUIRE), NULL);
 149                if (bte_res != BTE_SUCCESS) {
 150                        dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
 151                        status = SALRET_ERROR;
 152                        break;
 153                }
 154        }
 155
 156        kfree(buf_base);
 157
 158        if (status != SALRET_OK) {
 159                rp_pa = 0;
 160        }
 161        dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
 162        return rp_pa;
 163}
 164
 165
 166/*
 167 * Fill the partition reserved page with the information needed by
 168 * other partitions to discover we are alive and establish initial
 169 * communications.
 170 */
 171struct xpc_rsvd_page *
 172xpc_rsvd_page_init(void)
 173{
 174        struct xpc_rsvd_page *rp;
 175        AMO_t *amos_page;
 176        u64 rp_pa, nasid_array = 0;
 177        int i, ret;
 178
 179
 180        /* get the local reserved page's address */
 181
 182        preempt_disable();
 183        rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
 184        preempt_enable();
 185        if (rp_pa == 0) {
 186                dev_err(xpc_part, "SAL failed to locate the reserved page\n");
 187                return NULL;
 188        }
 189        rp = (struct xpc_rsvd_page *) __va(rp_pa);
 190
 191        if (rp->partid != sn_partition_id) {
 192                dev_err(xpc_part, "the reserved page's partid of %d should be "
 193                        "%d\n", rp->partid, sn_partition_id);
 194                return NULL;
 195        }
 196
 197        rp->version = XPC_RP_VERSION;
 198
 199        /* establish the actual sizes of the nasid masks */
 200        if (rp->SAL_version == 1) {
 201                /* SAL_version 1 didn't set the nasids_size field */
 202                rp->nasids_size = 128;
 203        }
 204        xp_nasid_mask_bytes = rp->nasids_size;
 205        xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
 206
 207        /* setup the pointers to the various items in the reserved page */
 208        xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
 209        xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
 210        xpc_vars = XPC_RP_VARS(rp);
 211        xpc_vars_part = XPC_RP_VARS_PART(rp);
 212
 213        /*
 214         * Before clearing xpc_vars, see if a page of AMOs had been previously
 215         * allocated. If not we'll need to allocate one and set permissions
 216         * so that cross-partition AMOs are allowed.
 217         *
 218         * The allocated AMO page needs MCA reporting to remain disabled after
 219         * XPC has unloaded.  To make this work, we keep a copy of the pointer
 220         * to this page (i.e., amos_page) in the struct xpc_vars structure,
 221         * which is pointed to by the reserved page, and re-use that saved copy
 222         * on subsequent loads of XPC. This AMO page is never freed, and its
 223         * memory protections are never restricted.
 224         */
 225        if ((amos_page = xpc_vars->amos_page) == NULL) {
 226                amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
 227                if (amos_page == NULL) {
 228                        dev_err(xpc_part, "can't allocate page of AMOs\n");
 229                        return NULL;
 230                }
 231
 232                /*
 233                 * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
 234                 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
 235                 */
 236                if (!enable_shub_wars_1_1()) {
 237                        ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
 238                                        PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
 239                                        &nasid_array);
 240                        if (ret != 0) {
 241                                dev_err(xpc_part, "can't change memory "
 242                                        "protections\n");
 243                                uncached_free_page(__IA64_UNCACHED_OFFSET |
 244                                                   TO_PHYS((u64) amos_page));
 245                                return NULL;
 246                        }
 247                }
 248        } else if (!IS_AMO_ADDRESS((u64) amos_page)) {
 249                /*
 250                 * EFI's XPBOOT can also set amos_page in the reserved page,
 251                 * but it happens to leave it as an uncached physical address
 252                 * and we need it to be an uncached virtual, so we'll have to
 253                 * convert it.
 254                 */
 255                if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
 256                        dev_err(xpc_part, "previously used amos_page address "
 257                                "is bad = 0x%p\n", (void *) amos_page);
 258                        return NULL;
 259                }
 260                amos_page = (AMO_t *) TO_AMO((u64) amos_page);
 261        }
 262
 263        /* clear xpc_vars */
 264        memset(xpc_vars, 0, sizeof(struct xpc_vars));
 265
 266        xpc_vars->version = XPC_V_VERSION;
 267        xpc_vars->act_nasid = cpuid_to_nasid(0);
 268        xpc_vars->act_phys_cpuid = cpu_physical_id(0);
 269        xpc_vars->vars_part_pa = __pa(xpc_vars_part);
 270        xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
 271        xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
 272
 273
 274        /* clear xpc_vars_part */
 275        memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
 276                                                        XP_MAX_PARTITIONS);
 277
 278        /* initialize the activate IRQ related AMO variables */
 279        for (i = 0; i < xp_nasid_mask_words; i++) {
 280                (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
 281        }
 282
 283        /* initialize the engaged remote partitions related AMO variables */
 284        (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
 285        (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
 286
 287        /* timestamp of when reserved page was setup by XPC */
 288        rp->stamp = CURRENT_TIME;
 289
 290        /*
 291         * This signifies to the remote partition that our reserved
 292         * page is initialized.
 293         */
 294        rp->vars_pa = __pa(xpc_vars);
 295
 296        return rp;
 297}
 298
 299
 300/*
 301 * Change protections to allow IPI operations (and AMO operations on
 302 * Shub 1.1 systems).
 303 */
 304void
 305xpc_allow_IPI_ops(void)
 306{
 307        int node;
 308        int nasid;
 309
 310
 311        // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
 312
 313        if (is_shub2()) {
 314                xpc_sh2_IPI_access0 =
 315                        (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
 316                xpc_sh2_IPI_access1 =
 317                        (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
 318                xpc_sh2_IPI_access2 =
 319                        (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
 320                xpc_sh2_IPI_access3 =
 321                        (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
 322
 323                for_each_online_node(node) {
 324                        nasid = cnodeid_to_nasid(node);
 325                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
 326                                                                -1UL);
 327                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
 328                                                                -1UL);
 329                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
 330                                                                -1UL);
 331                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
 332                                                                -1UL);
 333                }
 334
 335        } else {
 336                xpc_sh1_IPI_access =
 337                        (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
 338
 339                for_each_online_node(node) {
 340                        nasid = cnodeid_to_nasid(node);
 341                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
 342                                                                -1UL);
 343
 344                        /*
 345                         * Since the BIST collides with memory operations on
 346                         * SHUB 1.1 sn_change_memprotect() cannot be used.
 347                         */
 348                        if (enable_shub_wars_1_1()) {
 349                                /* open up everything */
 350                                xpc_prot_vec[node] = (u64) HUB_L((u64 *)
 351                                                GLOBAL_MMR_ADDR(nasid,
 352                                                SH1_MD_DQLP_MMR_DIR_PRIVEC0));
 353                                HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
 354                                                SH1_MD_DQLP_MMR_DIR_PRIVEC0),
 355                                                                -1UL);
 356                                HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
 357                                                SH1_MD_DQRP_MMR_DIR_PRIVEC0),
 358                                                                -1UL);
 359                        }
 360                }
 361        }
 362}
 363
 364
 365/*
 366 * Restrict protections to disallow IPI operations (and AMO operations on
 367 * Shub 1.1 systems).
 368 */
 369void
 370xpc_restrict_IPI_ops(void)
 371{
 372        int node;
 373        int nasid;
 374
 375
 376        // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
 377
 378        if (is_shub2()) {
 379
 380                for_each_online_node(node) {
 381                        nasid = cnodeid_to_nasid(node);
 382                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
 383                                                        xpc_sh2_IPI_access0);
 384                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
 385                                                        xpc_sh2_IPI_access1);
 386                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
 387                                                        xpc_sh2_IPI_access2);
 388                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
 389                                                        xpc_sh2_IPI_access3);
 390                }
 391
 392        } else {
 393
 394                for_each_online_node(node) {
 395                        nasid = cnodeid_to_nasid(node);
 396                        HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
 397                                                        xpc_sh1_IPI_access);
 398
 399                        if (enable_shub_wars_1_1()) {
 400                                HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
 401                                                SH1_MD_DQLP_MMR_DIR_PRIVEC0),
 402                                                        xpc_prot_vec[node]);
 403                                HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
 404                                                SH1_MD_DQRP_MMR_DIR_PRIVEC0),
 405                                                        xpc_prot_vec[node]);
 406                        }
 407                }
 408        }
 409}
 410
 411
 412/*
 413 * At periodic intervals, scan through all active partitions and ensure
 414 * their heartbeat is still active.  If not, the partition is deactivated.
 415 */
 416void
 417xpc_check_remote_hb(void)
 418{
 419        struct xpc_vars *remote_vars;
 420        struct xpc_partition *part;
 421        partid_t partid;
 422        bte_result_t bres;
 423
 424
 425        remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
 426
 427        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 428
 429                if (xpc_exiting) {
 430                        break;
 431                }
 432
 433                if (partid == sn_partition_id) {
 434                        continue;
 435                }
 436
 437                part = &xpc_partitions[partid];
 438
 439                if (part->act_state == XPC_P_INACTIVE ||
 440                                part->act_state == XPC_P_DEACTIVATING) {
 441                        continue;
 442                }
 443
 444                /* pull the remote_hb cache line */
 445                bres = xp_bte_copy(part->remote_vars_pa,
 446                                        (u64) remote_vars,
 447                                        XPC_RP_VARS_SIZE,
 448                                        (BTE_NOTIFY | BTE_WACQUIRE), NULL);
 449                if (bres != BTE_SUCCESS) {
 450                        XPC_DEACTIVATE_PARTITION(part,
 451                                                xpc_map_bte_errors(bres));
 452                        continue;
 453                }
 454
 455                dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
 456                        " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
 457                        partid, remote_vars->heartbeat, part->last_heartbeat,
 458                        remote_vars->heartbeat_offline,
 459                        remote_vars->heartbeating_to_mask);
 460
 461                if (((remote_vars->heartbeat == part->last_heartbeat) &&
 462                        (remote_vars->heartbeat_offline == 0)) ||
 463                             !xpc_hb_allowed(sn_partition_id, remote_vars)) {
 464
 465                        XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
 466                        continue;
 467                }
 468
 469                part->last_heartbeat = remote_vars->heartbeat;
 470        }
 471}
 472
 473
 474/*
 475 * Get a copy of a portion of the remote partition's rsvd page.
 476 *
 477 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
 478 * is large enough to contain a copy of their reserved page header and
 479 * part_nasids mask.
 480 */
 481static enum xpc_retval
 482xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 483                struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
 484{
 485        int bres, i;
 486
 487
 488        /* get the reserved page's physical address */
 489
 490        *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
 491        if (*remote_rp_pa == 0) {
 492                return xpcNoRsvdPageAddr;
 493        }
 494
 495
 496        /* pull over the reserved page header and part_nasids mask */
 497        bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp,
 498                                XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
 499                                (BTE_NOTIFY | BTE_WACQUIRE), NULL);
 500        if (bres != BTE_SUCCESS) {
 501                return xpc_map_bte_errors(bres);
 502        }
 503
 504
 505        if (discovered_nasids != NULL) {
 506                u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
 507
 508
 509                for (i = 0; i < xp_nasid_mask_words; i++) {
 510                        discovered_nasids[i] |= remote_part_nasids[i];
 511                }
 512        }
 513
 514
 515        /* check that the partid is for another partition */
 516
 517        if (remote_rp->partid < 1 ||
 518                                remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
 519                return xpcInvalidPartid;
 520        }
 521
 522        if (remote_rp->partid == sn_partition_id) {
 523                return xpcLocalPartid;
 524        }
 525
 526
 527        if (XPC_VERSION_MAJOR(remote_rp->version) !=
 528                                        XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
 529                return xpcBadVersion;
 530        }
 531
 532        return xpcSuccess;
 533}
 534
 535
 536/*
 537 * Get a copy of the remote partition's XPC variables from the reserved page.
 538 *
 539 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
 540 * assumed to be of size XPC_RP_VARS_SIZE.
 541 */
 542static enum xpc_retval
 543xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
 544{
 545        int bres;
 546
 547
 548        if (remote_vars_pa == 0) {
 549                return xpcVarsNotSet;
 550        }
 551
 552        /* pull over the cross partition variables */
 553        bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE,
 554                                (BTE_NOTIFY | BTE_WACQUIRE), NULL);
 555        if (bres != BTE_SUCCESS) {
 556                return xpc_map_bte_errors(bres);
 557        }
 558
 559        if (XPC_VERSION_MAJOR(remote_vars->version) !=
 560                                        XPC_VERSION_MAJOR(XPC_V_VERSION)) {
 561                return xpcBadVersion;
 562        }
 563
 564        return xpcSuccess;
 565}
 566
 567
 568/*
 569 * Update the remote partition's info.
 570 */
 571static void
 572xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
 573                struct timespec *remote_rp_stamp, u64 remote_rp_pa,
 574                u64 remote_vars_pa, struct xpc_vars *remote_vars)
 575{
 576        part->remote_rp_version = remote_rp_version;
 577        dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
 578                part->remote_rp_version);
 579
 580        part->remote_rp_stamp = *remote_rp_stamp;
 581        dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
 582                part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
 583
 584        part->remote_rp_pa = remote_rp_pa;
 585        dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
 586
 587        part->remote_vars_pa = remote_vars_pa;
 588        dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
 589                part->remote_vars_pa);
 590
 591        part->last_heartbeat = remote_vars->heartbeat;
 592        dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
 593                part->last_heartbeat);
 594
 595        part->remote_vars_part_pa = remote_vars->vars_part_pa;
 596        dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
 597                part->remote_vars_part_pa);
 598
 599        part->remote_act_nasid = remote_vars->act_nasid;
 600        dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
 601                part->remote_act_nasid);
 602
 603        part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
 604        dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
 605                part->remote_act_phys_cpuid);
 606
 607        part->remote_amos_page_pa = remote_vars->amos_page_pa;
 608        dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
 609                part->remote_amos_page_pa);
 610
 611        part->remote_vars_version = remote_vars->version;
 612        dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
 613                part->remote_vars_version);
 614}
 615
 616
 617/*
 618 * Prior code has determined the nasid which generated an IPI.  Inspect
 619 * that nasid to determine if its partition needs to be activated or
 620 * deactivated.
 621 *
 622 * A partition is consider "awaiting activation" if our partition
 623 * flags indicate it is not active and it has a heartbeat.  A
 624 * partition is considered "awaiting deactivation" if our partition
 625 * flags indicate it is active but it has no heartbeat or it is not
 626 * sending its heartbeat to us.
 627 *
 628 * To determine the heartbeat, the remote nasid must have a properly
 629 * initialized reserved page.
 630 */
 631static void
 632xpc_identify_act_IRQ_req(int nasid)
 633{
 634        struct xpc_rsvd_page *remote_rp;
 635        struct xpc_vars *remote_vars;
 636        u64 remote_rp_pa;
 637        u64 remote_vars_pa;
 638        int remote_rp_version;
 639        int reactivate = 0;
 640        int stamp_diff;
 641        struct timespec remote_rp_stamp = { 0, 0 };
 642        partid_t partid;
 643        struct xpc_partition *part;
 644        enum xpc_retval ret;
 645
 646
 647        /* pull over the reserved page structure */
 648
 649        remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
 650
 651        ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
 652        if (ret != xpcSuccess) {
 653                dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
 654                        "which sent interrupt, reason=%d\n", nasid, ret);
 655                return;
 656        }
 657
 658        remote_vars_pa = remote_rp->vars_pa;
 659        remote_rp_version = remote_rp->version;
 660        if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
 661                remote_rp_stamp = remote_rp->stamp;
 662        }
 663        partid = remote_rp->partid;
 664        part = &xpc_partitions[partid];
 665
 666
 667        /* pull over the cross partition variables */
 668
 669        remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
 670
 671        ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
 672        if (ret != xpcSuccess) {
 673
 674                dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
 675                        "which sent interrupt, reason=%d\n", nasid, ret);
 676
 677                XPC_DEACTIVATE_PARTITION(part, ret);
 678                return;
 679        }
 680
 681
 682        part->act_IRQ_rcvd++;
 683
 684        dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
 685                "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
 686                remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
 687
 688        if (xpc_partition_disengaged(part) &&
 689                                        part->act_state == XPC_P_INACTIVE) {
 690
 691                xpc_update_partition_info(part, remote_rp_version,
 692                                        &remote_rp_stamp, remote_rp_pa,
 693                                        remote_vars_pa, remote_vars);
 694
 695                if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
 696                        if (xpc_partition_disengage_requested(1UL << partid)) {
 697                                /*
 698                                 * Other side is waiting on us to disengage,
 699                                 * even though we already have.
 700                                 */
 701                                return;
 702                        }
 703                } else {
 704                        /* other side doesn't support disengage requests */
 705                        xpc_clear_partition_disengage_request(1UL << partid);
 706                }
 707
 708                xpc_activate_partition(part);
 709                return;
 710        }
 711
 712        DBUG_ON(part->remote_rp_version == 0);
 713        DBUG_ON(part->remote_vars_version == 0);
 714
 715        if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
 716                DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
 717                                                        remote_vars_version));
 718
 719                if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
 720                        DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
 721                                                                version));
 722                        /* see if the other side rebooted */
 723                        if (part->remote_amos_page_pa ==
 724                                remote_vars->amos_page_pa &&
 725                                        xpc_hb_allowed(sn_partition_id,
 726                                                                remote_vars)) {
 727                                /* doesn't look that way, so ignore the IPI */
 728                                return;
 729                        }
 730                }
 731
 732                /*
 733                 * Other side rebooted and previous XPC didn't support the
 734                 * disengage request, so we don't need to do anything special.
 735                 */
 736
 737                xpc_update_partition_info(part, remote_rp_version,
 738                                                &remote_rp_stamp, remote_rp_pa,
 739                                                remote_vars_pa, remote_vars);
 740                part->reactivate_nasid = nasid;
 741                XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
 742                return;
 743        }
 744
 745        DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
 746
 747        if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
 748                DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
 749
 750                /*
 751                 * Other side rebooted and previous XPC did support the
 752                 * disengage request, but the new one doesn't.
 753                 */
 754
 755                xpc_clear_partition_engaged(1UL << partid);
 756                xpc_clear_partition_disengage_request(1UL << partid);
 757
 758                xpc_update_partition_info(part, remote_rp_version,
 759                                                &remote_rp_stamp, remote_rp_pa,
 760                                                remote_vars_pa, remote_vars);
 761                reactivate = 1;
 762
 763        } else {
 764                DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
 765
 766                stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
 767                                                        &remote_rp_stamp);
 768                if (stamp_diff != 0) {
 769                        DBUG_ON(stamp_diff >= 0);
 770
 771                        /*
 772                         * Other side rebooted and the previous XPC did support
 773                         * the disengage request, as does the new one.
 774                         */
 775
 776                        DBUG_ON(xpc_partition_engaged(1UL << partid));
 777                        DBUG_ON(xpc_partition_disengage_requested(1UL <<
 778                                                                partid));
 779
 780                        xpc_update_partition_info(part, remote_rp_version,
 781                                                &remote_rp_stamp, remote_rp_pa,
 782                                                remote_vars_pa, remote_vars);
 783                        reactivate = 1;
 784                }
 785        }
 786
 787        if (part->disengage_request_timeout > 0 &&
 788                                        !xpc_partition_disengaged(part)) {
 789                /* still waiting on other side to disengage from us */
 790                return;
 791        }
 792
 793        if (reactivate) {
 794                part->reactivate_nasid = nasid;
 795                XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
 796
 797        } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
 798                        xpc_partition_disengage_requested(1UL << partid)) {
 799                XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
 800        }
 801}
 802
 803
 804/*
 805 * Loop through the activation AMO variables and process any bits
 806 * which are set.  Each bit indicates a nasid sending a partition
 807 * activation or deactivation request.
 808 *
 809 * Return #of IRQs detected.
 810 */
 811int
 812xpc_identify_act_IRQ_sender(void)
 813{
 814        int word, bit;
 815        u64 nasid_mask;
 816        u64 nasid;                      /* remote nasid */
 817        int n_IRQs_detected = 0;
 818        AMO_t *act_amos;
 819
 820
 821        act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
 822
 823
 824        /* scan through act AMO variable looking for non-zero entries */
 825        for (word = 0; word < xp_nasid_mask_words; word++) {
 826
 827                if (xpc_exiting) {
 828                        break;
 829                }
 830
 831                nasid_mask = xpc_IPI_receive(&act_amos[word]);
 832                if (nasid_mask == 0) {
 833                        /* no IRQs from nasids in this variable */
 834                        continue;
 835                }
 836
 837                dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
 838                        nasid_mask);
 839
 840
 841                /*
 842                 * If this nasid has been added to the machine since
 843                 * our partition was reset, this will retain the
 844                 * remote nasid in our reserved pages machine mask.
 845                 * This is used in the event of module reload.
 846                 */
 847                xpc_mach_nasids[word] |= nasid_mask;
 848
 849
 850                /* locate the nasid(s) which sent interrupts */
 851
 852                for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
 853                        if (nasid_mask & (1UL << bit)) {
 854                                n_IRQs_detected++;
 855                                nasid = XPC_NASID_FROM_W_B(word, bit);
 856                                dev_dbg(xpc_part, "interrupt from nasid %ld\n",
 857                                        nasid);
 858                                xpc_identify_act_IRQ_req(nasid);
 859                        }
 860                }
 861        }
 862        return n_IRQs_detected;
 863}
 864
 865
 866/*
 867 * See if the other side has responded to a partition disengage request
 868 * from us.
 869 */
 870int
 871xpc_partition_disengaged(struct xpc_partition *part)
 872{
 873        partid_t partid = XPC_PARTID(part);
 874        int disengaged;
 875
 876
 877        disengaged = (xpc_partition_engaged(1UL << partid) == 0);
 878        if (part->disengage_request_timeout) {
 879                if (!disengaged) {
 880                        if (jiffies < part->disengage_request_timeout) {
 881                                /* timelimit hasn't been reached yet */
 882                                return 0;
 883                        }
 884
 885                        /*
 886                         * Other side hasn't responded to our disengage
 887                         * request in a timely fashion, so assume it's dead.
 888                         */
 889
 890                        dev_info(xpc_part, "disengage from remote partition %d "
 891                                "timed out\n", partid);
 892                        xpc_disengage_request_timedout = 1;
 893                        xpc_clear_partition_engaged(1UL << partid);
 894                        disengaged = 1;
 895                }
 896                part->disengage_request_timeout = 0;
 897
 898                /* cancel the timer function, provided it's not us */
 899                if (!in_interrupt()) {
 900                        del_singleshot_timer_sync(&part->
 901                                                      disengage_request_timer);
 902                }
 903
 904                DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
 905                                        part->act_state != XPC_P_INACTIVE);
 906                if (part->act_state != XPC_P_INACTIVE) {
 907                        xpc_wakeup_channel_mgr(part);
 908                }
 909
 910                if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
 911                        xpc_cancel_partition_disengage_request(part);
 912                }
 913        }
 914        return disengaged;
 915}
 916
 917
 918/*
 919 * Mark specified partition as active.
 920 */
 921enum xpc_retval
 922xpc_mark_partition_active(struct xpc_partition *part)
 923{
 924        unsigned long irq_flags;
 925        enum xpc_retval ret;
 926
 927
 928        dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
 929
 930        spin_lock_irqsave(&part->act_lock, irq_flags);
 931        if (part->act_state == XPC_P_ACTIVATING) {
 932                part->act_state = XPC_P_ACTIVE;
 933                ret = xpcSuccess;
 934        } else {
 935                DBUG_ON(part->reason == xpcSuccess);
 936                ret = part->reason;
 937        }
 938        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 939
 940        return ret;
 941}
 942
 943
 944/*
 945 * Notify XPC that the partition is down.
 946 */
 947void
 948xpc_deactivate_partition(const int line, struct xpc_partition *part,
 949                                enum xpc_retval reason)
 950{
 951        unsigned long irq_flags;
 952
 953
 954        spin_lock_irqsave(&part->act_lock, irq_flags);
 955
 956        if (part->act_state == XPC_P_INACTIVE) {
 957                XPC_SET_REASON(part, reason, line);
 958                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 959                if (reason == xpcReactivating) {
 960                        /* we interrupt ourselves to reactivate partition */
 961                        xpc_IPI_send_reactivate(part);
 962                }
 963                return;
 964        }
 965        if (part->act_state == XPC_P_DEACTIVATING) {
 966                if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
 967                                        reason == xpcReactivating) {
 968                        XPC_SET_REASON(part, reason, line);
 969                }
 970                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 971                return;
 972        }
 973
 974        part->act_state = XPC_P_DEACTIVATING;
 975        XPC_SET_REASON(part, reason, line);
 976
 977        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 978
 979        if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
 980                xpc_request_partition_disengage(part);
 981                xpc_IPI_send_disengage(part);
 982
 983                /* set a timelimit on the disengage request */
 984                part->disengage_request_timeout = jiffies +
 985                                        (xpc_disengage_request_timelimit * HZ);
 986                part->disengage_request_timer.expires =
 987                                        part->disengage_request_timeout;
 988                add_timer(&part->disengage_request_timer);
 989        }
 990
 991        dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
 992                XPC_PARTID(part), reason);
 993
 994        xpc_partition_going_down(part, reason);
 995}
 996
 997
 998/*
 999 * Mark specified partition as inactive.
1000 */
1001void
1002xpc_mark_partition_inactive(struct xpc_partition *part)
1003{
1004        unsigned long irq_flags;
1005
1006
1007        dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
1008                XPC_PARTID(part));
1009
1010        spin_lock_irqsave(&part->act_lock, irq_flags);
1011        part->act_state = XPC_P_INACTIVE;
1012        spin_unlock_irqrestore(&part->act_lock, irq_flags);
1013        part->remote_rp_pa = 0;
1014}
1015
1016
1017/*
1018 * SAL has provided a partition and machine mask.  The partition mask
1019 * contains a bit for each even nasid in our partition.  The machine
1020 * mask contains a bit for each even nasid in the entire machine.
1021 *
1022 * Using those two bit arrays, we can determine which nasids are
1023 * known in the machine.  Each should also have a reserved page
1024 * initialized if they are available for partitioning.
1025 */
1026void
1027xpc_discovery(void)
1028{
1029        void *remote_rp_base;
1030        struct xpc_rsvd_page *remote_rp;
1031        struct xpc_vars *remote_vars;
1032        u64 remote_rp_pa;
1033        u64 remote_vars_pa;
1034        int region;
1035        int region_size;
1036        int max_regions;
1037        int nasid;
1038        struct xpc_rsvd_page *rp;
1039        partid_t partid;
1040        struct xpc_partition *part;
1041        u64 *discovered_nasids;
1042        enum xpc_retval ret;
1043
1044
1045        remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
1046                                                xp_nasid_mask_bytes,
1047                                                GFP_KERNEL, &remote_rp_base);
1048        if (remote_rp == NULL) {
1049                return;
1050        }
1051        remote_vars = (struct xpc_vars *) remote_rp;
1052
1053
1054        discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
1055                                                        GFP_KERNEL);
1056        if (discovered_nasids == NULL) {
1057                kfree(remote_rp_base);
1058                return;
1059        }
1060
1061        rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
1062
1063        /*
1064         * The term 'region' in this context refers to the minimum number of
1065         * nodes that can comprise an access protection grouping. The access
1066         * protection is in regards to memory, IOI and IPI.
1067         */
1068        max_regions = 64;
1069        region_size = sn_region_size;
1070
1071        switch (region_size) {
1072        case 128:
1073                max_regions *= 2;
1074        case 64:
1075                max_regions *= 2;
1076        case 32:
1077                max_regions *= 2;
1078                region_size = 16;
1079                DBUG_ON(!is_shub2());
1080        }
1081
1082        for (region = 0; region < max_regions; region++) {
1083
1084                if ((volatile int) xpc_exiting) {
1085                        break;
1086                }
1087
1088                dev_dbg(xpc_part, "searching region %d\n", region);
1089
1090                for (nasid = (region * region_size * 2);
1091                     nasid < ((region + 1) * region_size * 2);
1092                     nasid += 2) {
1093
1094                        if ((volatile int) xpc_exiting) {
1095                                break;
1096                        }
1097
1098                        dev_dbg(xpc_part, "checking nasid %d\n", nasid);
1099
1100
1101                        if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
1102                                dev_dbg(xpc_part, "PROM indicates Nasid %d is "
1103                                        "part of the local partition; skipping "
1104                                        "region\n", nasid);
1105                                break;
1106                        }
1107
1108                        if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
1109                                dev_dbg(xpc_part, "PROM indicates Nasid %d was "
1110                                        "not on Numa-Link network at reset\n",
1111                                        nasid);
1112                                continue;
1113                        }
1114
1115                        if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
1116                                dev_dbg(xpc_part, "Nasid %d is part of a "
1117                                        "partition which was previously "
1118                                        "discovered\n", nasid);
1119                                continue;
1120                        }
1121
1122
1123                        /* pull over the reserved page structure */
1124
1125                        ret = xpc_get_remote_rp(nasid, discovered_nasids,
1126                                              remote_rp, &remote_rp_pa);
1127                        if (ret != xpcSuccess) {
1128                                dev_dbg(xpc_part, "unable to get reserved page "
1129                                        "from nasid %d, reason=%d\n", nasid,
1130                                        ret);
1131
1132                                if (ret == xpcLocalPartid) {
1133                                        break;
1134                                }
1135                                continue;
1136                        }
1137
1138                        remote_vars_pa = remote_rp->vars_pa;
1139
1140                        partid = remote_rp->partid;
1141                        part = &xpc_partitions[partid];
1142
1143
1144                        /* pull over the cross partition variables */
1145
1146                        ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
1147                        if (ret != xpcSuccess) {
1148                                dev_dbg(xpc_part, "unable to get XPC variables "
1149                                        "from nasid %d, reason=%d\n", nasid,
1150                                        ret);
1151
1152                                XPC_DEACTIVATE_PARTITION(part, ret);
1153                                continue;
1154                        }
1155
1156                        if (part->act_state != XPC_P_INACTIVE) {
1157                                dev_dbg(xpc_part, "partition %d on nasid %d is "
1158                                        "already activating\n", partid, nasid);
1159                                break;
1160                        }
1161
1162                        /*
1163                         * Register the remote partition's AMOs with SAL so it
1164                         * can handle and cleanup errors within that address
1165                         * range should the remote partition go down. We don't
1166                         * unregister this range because it is difficult to
1167                         * tell when outstanding writes to the remote partition
1168                         * are finished and thus when it is thus safe to
1169                         * unregister. This should not result in wasted space
1170                         * in the SAL xp_addr_region table because we should
1171                         * get the same page for remote_act_amos_pa after
1172                         * module reloads and system reboots.
1173                         */
1174                        if (sn_register_xp_addr_region(
1175                                            remote_vars->amos_page_pa,
1176                                                        PAGE_SIZE, 1) < 0) {
1177                                dev_dbg(xpc_part, "partition %d failed to "
1178                                        "register xp_addr region 0x%016lx\n",
1179                                        partid, remote_vars->amos_page_pa);
1180
1181                                XPC_SET_REASON(part, xpcPhysAddrRegFailed,
1182                                                __LINE__);
1183                                break;
1184                        }
1185
1186                        /*
1187                         * The remote nasid is valid and available.
1188                         * Send an interrupt to that nasid to notify
1189                         * it that we are ready to begin activation.
1190                         */
1191                        dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
1192                                "nasid %d, phys_cpuid 0x%x\n",
1193                                remote_vars->amos_page_pa,
1194                                remote_vars->act_nasid,
1195                                remote_vars->act_phys_cpuid);
1196
1197                        if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1198                                                                version)) {
1199                                part->remote_amos_page_pa =
1200                                                remote_vars->amos_page_pa;
1201                                xpc_mark_partition_disengaged(part);
1202                                xpc_cancel_partition_disengage_request(part);
1203                        }
1204                        xpc_IPI_send_activate(remote_vars);
1205                }
1206        }
1207
1208        kfree(discovered_nasids);
1209        kfree(remote_rp_base);
1210}
1211
1212
1213/*
1214 * Given a partid, get the nasids owned by that partition from the
1215 * remote partition's reserved page.
1216 */
1217enum xpc_retval
1218xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
1219{
1220        struct xpc_partition *part;
1221        u64 part_nasid_pa;
1222        int bte_res;
1223
1224
1225        part = &xpc_partitions[partid];
1226        if (part->remote_rp_pa == 0) {
1227                return xpcPartitionDown;
1228        }
1229
1230        memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
1231
1232        part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
1233
1234        bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask,
1235                        xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
1236
1237        return xpc_map_bte_errors(bte_res);
1238}
1239
1240
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.