linux/arch/powerpc/platforms/powernv/pci-ioda.c
<<
>>
Prefs
   1/*
   2 * Support PCI/PCIe on PowerNV platforms
   3 *
   4 * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#undef DEBUG
  13
  14#include <linux/kernel.h>
  15#include <linux/pci.h>
  16#include <linux/delay.h>
  17#include <linux/string.h>
  18#include <linux/init.h>
  19#include <linux/bootmem.h>
  20#include <linux/irq.h>
  21#include <linux/io.h>
  22#include <linux/msi.h>
  23
  24#include <asm/sections.h>
  25#include <asm/io.h>
  26#include <asm/prom.h>
  27#include <asm/pci-bridge.h>
  28#include <asm/machdep.h>
  29#include <asm/ppc-pci.h>
  30#include <asm/opal.h>
  31#include <asm/iommu.h>
  32#include <asm/tce.h>
  33#include <asm/abs_addr.h>
  34
  35#include "powernv.h"
  36#include "pci.h"
  37
  38struct resource_wrap {
  39        struct list_head        link;
  40        resource_size_t         size;
  41        resource_size_t         align;
  42        struct pci_dev          *dev;   /* Set if it's a device */
  43        struct pci_bus          *bus;   /* Set if it's a bridge */
  44};
  45
  46static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe,
  47                       struct va_format *vaf)
  48{
  49        char pfix[32];
  50
  51        if (pe->pdev)
  52                strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
  53        else
  54                sprintf(pfix, "%04x:%02x     ",
  55                        pci_domain_nr(pe->pbus), pe->pbus->number);
  56        return printk("pci %s%s: [PE# %.3d] %pV", level, pfix, pe->pe_number, vaf);
  57}
  58
  59#define define_pe_printk_level(func, kern_level)                \
  60static int func(const struct pnv_ioda_pe *pe, const char *fmt, ...)     \
  61{                                                               \
  62        struct va_format vaf;                                   \
  63        va_list args;                                           \
  64        int r;                                                  \
  65                                                                \
  66        va_start(args, fmt);                                    \
  67                                                                \
  68        vaf.fmt = fmt;                                          \
  69        vaf.va = &args;                                         \
  70                                                                \
  71        r = __pe_printk(kern_level, pe, &vaf);                  \
  72        va_end(args);                                           \
  73                                                                \
  74        return r;                                               \
  75}                                                               \
  76
  77define_pe_printk_level(pe_err, KERN_ERR);
  78define_pe_printk_level(pe_warn, KERN_WARNING);
  79define_pe_printk_level(pe_info, KERN_INFO);
  80
  81
  82/* Calculate resource usage & alignment requirement of a single
  83 * device. This will also assign all resources within the device
  84 * for a given type starting at 0 for the biggest one and then
  85 * assigning in decreasing order of size.
  86 */
  87static void __devinit pnv_ioda_calc_dev(struct pci_dev *dev, unsigned int flags,
  88                                        resource_size_t *size,
  89                                        resource_size_t *align)
  90{
  91        resource_size_t start;
  92        struct resource *r;
  93        int i;
  94
  95        pr_devel("  -> CDR %s\n", pci_name(dev));
  96
  97        *size = *align = 0;
  98
  99        /* Clear the resources out and mark them all unset */
 100        for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 101                r = &dev->resource[i];
 102                if (!(r->flags & flags))
 103                    continue;
 104                if (r->start) {
 105                        r->end -= r->start;
 106                        r->start = 0;
 107                }
 108                r->flags |= IORESOURCE_UNSET;
 109        }
 110
 111        /* We currently keep all memory resources together, we
 112         * will handle prefetch & 64-bit separately in the future
 113         * but for now we stick everybody in M32
 114         */
 115        start = 0;
 116        for (;;) {
 117                resource_size_t max_size = 0;
 118                int max_no = -1;
 119
 120                /* Find next biggest resource */
 121                for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 122                        r = &dev->resource[i];
 123                        if (!(r->flags & IORESOURCE_UNSET) ||
 124                            !(r->flags & flags))
 125                                continue;
 126                        if (resource_size(r) > max_size) {
 127                                max_size = resource_size(r);
 128                                max_no = i;
 129                        }
 130                }
 131                if (max_no < 0)
 132                        break;
 133                r = &dev->resource[max_no];
 134                if (max_size > *align)
 135                        *align = max_size;
 136                *size += max_size;
 137                r->start = start;
 138                start += max_size;
 139                r->end = r->start + max_size - 1;
 140                r->flags &= ~IORESOURCE_UNSET;
 141                pr_devel("  ->     R%d %016llx..%016llx\n",
 142                         max_no, r->start, r->end);
 143        }
 144        pr_devel("  <- CDR %s size=%llx align=%llx\n",
 145                 pci_name(dev), *size, *align);
 146}
 147
 148/* Allocate a resource "wrap" for a given device or bridge and
 149 * insert it at the right position in the sorted list
 150 */
 151static void __devinit pnv_ioda_add_wrap(struct list_head *list,
 152                                        struct pci_bus *bus,
 153                                        struct pci_dev *dev,
 154                                        resource_size_t size,
 155                                        resource_size_t align)
 156{
 157        struct resource_wrap *w1, *w = kzalloc(sizeof(*w), GFP_KERNEL);
 158
 159        w->size = size;
 160        w->align = align;
 161        w->dev = dev;
 162        w->bus = bus;
 163
 164        list_for_each_entry(w1, list, link) {
 165                if (w1->align < align) {
 166                        list_add_tail(&w->link, &w1->link);
 167                        return;
 168                }
 169        }
 170        list_add_tail(&w->link, list);
 171}
 172
 173/* Offset device resources of a given type */
 174static void __devinit pnv_ioda_offset_dev(struct pci_dev *dev,
 175                                          unsigned int flags,
 176                                          resource_size_t offset)
 177{
 178        struct resource *r;
 179        int i;
 180
 181        pr_devel("  -> ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
 182
 183        for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 184                r = &dev->resource[i];
 185                if (r->flags & flags) {
 186                        dev->resource[i].start += offset;
 187                        dev->resource[i].end += offset;
 188                }
 189        }
 190
 191        pr_devel("  <- ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset);
 192}
 193
 194/* Offset bus resources (& all children) of a given type */
 195static void __devinit pnv_ioda_offset_bus(struct pci_bus *bus,
 196                                          unsigned int flags,
 197                                          resource_size_t offset)
 198{
 199        struct resource *r;
 200        struct pci_dev *dev;
 201        struct pci_bus *cbus;
 202        int i;
 203
 204        pr_devel("  -> OBR %s [%x] +%016llx\n",
 205                 bus->self ? pci_name(bus->self) : "root", flags, offset);
 206
 207        pci_bus_for_each_resource(bus, r, i) {
 208                if (r && (r->flags & flags)) {
 209                        r->start += offset;
 210                        r->end += offset;
 211                }
 212        }
 213        list_for_each_entry(dev, &bus->devices, bus_list)
 214                pnv_ioda_offset_dev(dev, flags, offset);
 215        list_for_each_entry(cbus, &bus->children, node)
 216                pnv_ioda_offset_bus(cbus, flags, offset);
 217
 218        pr_devel("  <- OBR %s [%x]\n",
 219                 bus->self ? pci_name(bus->self) : "root", flags);
 220}
 221
 222/* This is the guts of our IODA resource allocation. This is called
 223 * recursively for each bus in the system. It calculates all the
 224 * necessary size and requirements for children and assign them
 225 * resources such that:
 226 *
 227 *   - Each function fits in it's own contiguous set of IO/M32
 228 *     segment
 229 *
 230 *   - All segments behind a P2P bridge are contiguous and obey
 231 *     alignment constraints of those bridges
 232 */
 233static void __devinit pnv_ioda_calc_bus(struct pci_bus *bus, unsigned int flags,
 234                                        resource_size_t *size,
 235                                        resource_size_t *align)
 236{
 237        struct pci_controller *hose = pci_bus_to_host(bus);
 238        struct pnv_phb *phb = hose->private_data;
 239        resource_size_t dev_size, dev_align, start;
 240        resource_size_t min_align, min_balign;
 241        struct pci_dev *cdev;
 242        struct pci_bus *cbus;
 243        struct list_head head;
 244        struct resource_wrap *w;
 245        unsigned int bres;
 246
 247        *size = *align = 0;
 248
 249        pr_devel("-> CBR %s [%x]\n",
 250                 bus->self ? pci_name(bus->self) : "root", flags);
 251
 252        /* Calculate alignment requirements based on the type
 253         * of resource we are working on
 254         */
 255        if (flags & IORESOURCE_IO) {
 256                bres = 0;
 257                min_align = phb->ioda.io_segsize;
 258                min_balign = 0x1000;
 259        } else {
 260                bres = 1;
 261                min_align = phb->ioda.m32_segsize;
 262                min_balign = 0x100000;
 263        }
 264
 265        /* Gather all our children resources ordered by alignment */
 266        INIT_LIST_HEAD(&head);
 267
 268        /*   - Busses */
 269        list_for_each_entry(cbus, &bus->children, node) {
 270                pnv_ioda_calc_bus(cbus, flags, &dev_size, &dev_align);
 271                pnv_ioda_add_wrap(&head, cbus, NULL, dev_size, dev_align);
 272        }
 273
 274        /*   - Devices */
 275        list_for_each_entry(cdev, &bus->devices, bus_list) {
 276                pnv_ioda_calc_dev(cdev, flags, &dev_size, &dev_align);
 277                /* Align them to segment size */
 278                if (dev_align < min_align)
 279                        dev_align = min_align;
 280                pnv_ioda_add_wrap(&head, NULL, cdev, dev_size, dev_align);
 281        }
 282        if (list_empty(&head))
 283                goto empty;
 284
 285        /* Now we can do two things: assign offsets to them within that
 286         * level and get our total alignment & size requirements. The
 287         * assignment algorithm is going to be uber-trivial for now, we
 288         * can try to be smarter later at filling out holes.
 289         */
 290        if (bus->self) {
 291                /* No offset for downstream bridges */
 292                start = 0;
 293        } else {
 294                /* Offset from the root */
 295                if (flags & IORESOURCE_IO)
 296                        /* Don't hand out IO 0 */
 297                        start = hose->io_resource.start + 0x1000;
 298                else
 299                        start = hose->mem_resources[0].start;
 300        }
 301        while(!list_empty(&head)) {
 302                w = list_first_entry(&head, struct resource_wrap, link);
 303                list_del(&w->link);
 304                if (w->size) {
 305                        if (start) {
 306                                start = ALIGN(start, w->align);
 307                                if (w->dev)
 308                                        pnv_ioda_offset_dev(w->dev,flags,start);
 309                                else if (w->bus)
 310                                        pnv_ioda_offset_bus(w->bus,flags,start);
 311                        }
 312                        if (w->align > *align)
 313                                *align = w->align;
 314                }
 315                start += w->size;
 316                kfree(w);
 317        }
 318        *size = start;
 319
 320        /* Align and setup bridge resources */
 321        *align = max_t(resource_size_t, *align,
 322                       max_t(resource_size_t, min_align, min_balign));
 323        *size = ALIGN(*size,
 324                      max_t(resource_size_t, min_align, min_balign));
 325 empty:
 326        /* Only setup P2P's, not the PHB itself */
 327        if (bus->self) {
 328                struct resource *res = bus->resource[bres];
 329
 330                if (WARN_ON(res == NULL))
 331                        return;
 332
 333                /*
 334                 * FIXME: We should probably export and call
 335                 * pci_bridge_check_ranges() to properly re-initialize
 336                 * the PCI portion of the flags here, and to detect
 337                 * what the bridge actually supports.
 338                 */
 339                res->start = 0;
 340                res->flags = (*size) ? flags : 0;
 341                res->end = (*size) ? (*size - 1) : 0;
 342        }
 343
 344        pr_devel("<- CBR %s [%x] *size=%016llx *align=%016llx\n",
 345                 bus->self ? pci_name(bus->self) : "root", flags,*size,*align);
 346}
 347
 348static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev)
 349{
 350        struct device_node *np;
 351
 352        np = pci_device_to_OF_node(dev);
 353        if (!np)
 354                return NULL;
 355        return PCI_DN(np);
 356}
 357
 358static void __devinit pnv_ioda_setup_pe_segments(struct pci_dev *dev)
 359{
 360        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 361        struct pnv_phb *phb = hose->private_data;
 362        struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
 363        unsigned int pe, i;
 364        resource_size_t pos;
 365        struct resource io_res;
 366        struct resource m32_res;
 367        struct pci_bus_region region;
 368        int rc;
 369
 370        /* Anything not referenced in the device-tree gets PE#0 */
 371        pe = pdn ? pdn->pe_number : 0;
 372
 373        /* Calculate the device min/max */
 374        io_res.start = m32_res.start = (resource_size_t)-1;
 375        io_res.end = m32_res.end = 0;
 376        io_res.flags = IORESOURCE_IO;
 377        m32_res.flags = IORESOURCE_MEM;
 378
 379        for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 380                struct resource *r = NULL;
 381                if (dev->resource[i].flags & IORESOURCE_IO)
 382                        r = &io_res;
 383                if (dev->resource[i].flags & IORESOURCE_MEM)
 384                        r = &m32_res;
 385                if (!r)
 386                        continue;
 387                if (dev->resource[i].start < r->start)
 388                        r->start = dev->resource[i].start;
 389                if (dev->resource[i].end > r->end)
 390                        r->end = dev->resource[i].end;
 391        }
 392
 393        /* Setup IO segments */
 394        if (io_res.start < io_res.end) {
 395                pcibios_resource_to_bus(dev, &region, &io_res);
 396                pos = region.start;
 397                i = pos / phb->ioda.io_segsize;
 398                while(i < phb->ioda.total_pe && pos <= region.end) {
 399                        if (phb->ioda.io_segmap[i]) {
 400                                pr_err("%s: Trying to use IO seg #%d which is"
 401                                       " already used by PE# %d\n",
 402                                       pci_name(dev), i,
 403                                       phb->ioda.io_segmap[i]);
 404                                /* XXX DO SOMETHING TO DISABLE DEVICE ? */
 405                                break;
 406                        }
 407                        phb->ioda.io_segmap[i] = pe;
 408                        rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
 409                                                         OPAL_IO_WINDOW_TYPE,
 410                                                         0, i);
 411                        if (rc != OPAL_SUCCESS) {
 412                                pr_err("%s: OPAL error %d setting up mapping"
 413                                       " for IO seg# %d\n",
 414                                       pci_name(dev), rc, i);
 415                                /* XXX DO SOMETHING TO DISABLE DEVICE ? */
 416                                break;
 417                        }
 418                        pos += phb->ioda.io_segsize;
 419                        i++;
 420                };
 421        }
 422
 423        /* Setup M32 segments */
 424        if (m32_res.start < m32_res.end) {
 425                pcibios_resource_to_bus(dev, &region, &m32_res);
 426                pos = region.start;
 427                i = pos / phb->ioda.m32_segsize;
 428                while(i < phb->ioda.total_pe && pos <= region.end) {
 429                        if (phb->ioda.m32_segmap[i]) {
 430                                pr_err("%s: Trying to use M32 seg #%d which is"
 431                                       " already used by PE# %d\n",
 432                                       pci_name(dev), i,
 433                                       phb->ioda.m32_segmap[i]);
 434                                /* XXX DO SOMETHING TO DISABLE DEVICE ? */
 435                                break;
 436                        }
 437                        phb->ioda.m32_segmap[i] = pe;
 438                        rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe,
 439                                                         OPAL_M32_WINDOW_TYPE,
 440                                                         0, i);
 441                        if (rc != OPAL_SUCCESS) {
 442                                pr_err("%s: OPAL error %d setting up mapping"
 443                                       " for M32 seg# %d\n",
 444                                       pci_name(dev), rc, i);
 445                                /* XXX DO SOMETHING TO DISABLE DEVICE ? */
 446                                break;
 447                        }
 448                        pos += phb->ioda.m32_segsize;
 449                        i++;
 450                }
 451        }
 452}
 453
 454/* Check if a resource still fits in the total IO or M32 range
 455 * for a given PHB
 456 */
 457static int __devinit pnv_ioda_resource_fit(struct pci_controller *hose,
 458                                           struct resource *r)
 459{
 460        struct resource *bounds;
 461
 462        if (r->flags & IORESOURCE_IO)
 463                bounds = &hose->io_resource;
 464        else if (r->flags & IORESOURCE_MEM)
 465                bounds = &hose->mem_resources[0];
 466        else
 467                return 1;
 468
 469        if (r->start >= bounds->start && r->end <= bounds->end)
 470                return 1;
 471        r->flags = 0;
 472        return 0;
 473}
 474
 475static void __devinit pnv_ioda_update_resources(struct pci_bus *bus)
 476{
 477        struct pci_controller *hose = pci_bus_to_host(bus);
 478        struct pci_bus *cbus;
 479        struct pci_dev *cdev;
 480        unsigned int i;
 481
 482        /* We used to clear all device enables here. However it looks like
 483         * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers,
 484         * and shoot fatal errors to the PHB which in turns fences itself
 485         * and we can't recover from that ... yet. So for now, let's leave
 486         * the enables as-is and hope for the best.
 487         */
 488
 489        /* Check if bus resources fit in our IO or M32 range */
 490        for (i = 0; bus->self && (i < 2); i++) {
 491                struct resource *r = bus->resource[i];
 492                if (r && !pnv_ioda_resource_fit(hose, r))
 493                        pr_err("%s: Bus %d resource %d disabled, no room\n",
 494                               pci_name(bus->self), bus->number, i);
 495        }
 496
 497        /* Update self if it's not a PHB */
 498        if (bus->self)
 499                pci_setup_bridge(bus);
 500
 501        /* Update child devices */
 502        list_for_each_entry(cdev, &bus->devices, bus_list) {
 503                /* Check if resource fits, if not, disabled it */
 504                for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 505                        struct resource *r = &cdev->resource[i];
 506                        if (!pnv_ioda_resource_fit(hose, r))
 507                                pr_err("%s: Resource %d disabled, no room\n",
 508                                       pci_name(cdev), i);
 509                }
 510
 511                /* Assign segments */
 512                pnv_ioda_setup_pe_segments(cdev);
 513
 514                /* Update HW BARs */
 515                for (i = 0; i <= PCI_ROM_RESOURCE; i++)
 516                        pci_update_resource(cdev, i);
 517        }
 518
 519        /* Update child busses */
 520        list_for_each_entry(cbus, &bus->children, node)
 521                pnv_ioda_update_resources(cbus);
 522}
 523
 524static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb)
 525{
 526        unsigned long pe;
 527
 528        do {
 529                pe = find_next_zero_bit(phb->ioda.pe_alloc,
 530                                        phb->ioda.total_pe, 0);
 531                if (pe >= phb->ioda.total_pe)
 532                        return IODA_INVALID_PE;
 533        } while(test_and_set_bit(pe, phb->ioda.pe_alloc));
 534
 535        phb->ioda.pe_array[pe].pe_number = pe;
 536        return pe;
 537}
 538
 539static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
 540{
 541        WARN_ON(phb->ioda.pe_array[pe].pdev);
 542
 543        memset(&phb->ioda.pe_array[pe], 0, sizeof(struct pnv_ioda_pe));
 544        clear_bit(pe, phb->ioda.pe_alloc);
 545}
 546
 547/* Currently those 2 are only used when MSIs are enabled, this will change
 548 * but in the meantime, we need to protect them to avoid warnings
 549 */
 550#ifdef CONFIG_PCI_MSI
 551static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
 552{
 553        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 554        struct pnv_phb *phb = hose->private_data;
 555        struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
 556
 557        if (!pdn)
 558                return NULL;
 559        if (pdn->pe_number == IODA_INVALID_PE)
 560                return NULL;
 561        return &phb->ioda.pe_array[pdn->pe_number];
 562}
 563
 564static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
 565{
 566        struct pnv_ioda_pe *pe = __pnv_ioda_get_one_pe(dev);
 567
 568        while (!pe && dev->bus->self) {
 569                dev = dev->bus->self;
 570                pe = __pnv_ioda_get_one_pe(dev);
 571                if (pe)
 572                        pe = pe->bus_pe;
 573        }
 574        return pe;
 575}
 576#endif /* CONFIG_PCI_MSI */
 577
 578static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
 579                                           struct pnv_ioda_pe *pe)
 580{
 581        struct pci_dev *parent;
 582        uint8_t bcomp, dcomp, fcomp;
 583        long rc, rid_end, rid;
 584
 585        /* Bus validation ? */
 586        if (pe->pbus) {
 587                int count;
 588
 589                dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
 590                fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
 591                parent = pe->pbus->self;
 592                count = pe->pbus->subordinate - pe->pbus->secondary + 1;
 593                switch(count) {
 594                case  1: bcomp = OpalPciBusAll;         break;
 595                case  2: bcomp = OpalPciBus7Bits;       break;
 596                case  4: bcomp = OpalPciBus6Bits;       break;
 597                case  8: bcomp = OpalPciBus5Bits;       break;
 598                case 16: bcomp = OpalPciBus4Bits;       break;
 599                case 32: bcomp = OpalPciBus3Bits;       break;
 600                default:
 601                        pr_err("%s: Number of subordinate busses %d"
 602                               " unsupported\n",
 603                               pci_name(pe->pbus->self), count);
 604                        /* Do an exact match only */
 605                        bcomp = OpalPciBusAll;
 606                }
 607                rid_end = pe->rid + (count << 8);
 608        } else {
 609                parent = pe->pdev->bus->self;
 610                bcomp = OpalPciBusAll;
 611                dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
 612                fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
 613                rid_end = pe->rid + 1;
 614        }
 615
 616        /* Associate PE in PELT */
 617        rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
 618                             bcomp, dcomp, fcomp, OPAL_MAP_PE);
 619        if (rc) {
 620                pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
 621                return -ENXIO;
 622        }
 623        opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
 624                                  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
 625
 626        /* Add to all parents PELT-V */
 627        while (parent) {
 628                struct pci_dn *pdn = pnv_ioda_get_pdn(parent);
 629                if (pdn && pdn->pe_number != IODA_INVALID_PE) {
 630                        rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
 631                                                pe->pe_number, OPAL_ADD_PE_TO_DOMAIN);
 632                        /* XXX What to do in case of error ? */
 633                }
 634                parent = parent->bus->self;
 635        }
 636        /* Setup reverse map */
 637        for (rid = pe->rid; rid < rid_end; rid++)
 638                phb->ioda.pe_rmap[rid] = pe->pe_number;
 639
 640        /* Setup one MVTs on IODA1 */
 641        if (phb->type == PNV_PHB_IODA1) {
 642                pe->mve_number = pe->pe_number;
 643                rc = opal_pci_set_mve(phb->opal_id, pe->mve_number,
 644                                      pe->pe_number);
 645                if (rc) {
 646                        pe_err(pe, "OPAL error %ld setting up MVE %d\n",
 647                               rc, pe->mve_number);
 648                        pe->mve_number = -1;
 649                } else {
 650                        rc = opal_pci_set_mve_enable(phb->opal_id,
 651                                                     pe->mve_number, OPAL_ENABLE_MVE);
 652                        if (rc) {
 653                                pe_err(pe, "OPAL error %ld enabling MVE %d\n",
 654                                       rc, pe->mve_number);
 655                                pe->mve_number = -1;
 656                        }
 657                }
 658        } else if (phb->type == PNV_PHB_IODA2)
 659                pe->mve_number = 0;
 660
 661        return 0;
 662}
 663
 664static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb,
 665                                                 struct pnv_ioda_pe *pe)
 666{
 667        struct pnv_ioda_pe *lpe;
 668
 669        list_for_each_entry(lpe, &phb->ioda.pe_list, link) {
 670                if (lpe->dma_weight < pe->dma_weight) {
 671                        list_add_tail(&pe->link, &lpe->link);
 672                        return;
 673                }
 674        }
 675        list_add_tail(&pe->link, &phb->ioda.pe_list);
 676}
 677
 678static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
 679{
 680        /* This is quite simplistic. The "base" weight of a device
 681         * is 10. 0 means no DMA is to be accounted for it.
 682         */
 683
 684        /* If it's a bridge, no DMA */
 685        if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
 686                return 0;
 687
 688        /* Reduce the weight of slow USB controllers */
 689        if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
 690            dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
 691            dev->class == PCI_CLASS_SERIAL_USB_EHCI)
 692                return 3;
 693
 694        /* Increase the weight of RAID (includes Obsidian) */
 695        if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
 696                return 15;
 697
 698        /* Default */
 699        return 10;
 700}
 701
 702static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 703{
 704        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 705        struct pnv_phb *phb = hose->private_data;
 706        struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
 707        struct pnv_ioda_pe *pe;
 708        int pe_num;
 709
 710        if (!pdn) {
 711                pr_err("%s: Device tree node not associated properly\n",
 712                           pci_name(dev));
 713                return NULL;
 714        }
 715        if (pdn->pe_number != IODA_INVALID_PE)
 716                return NULL;
 717
 718        /* PE#0 has been pre-set */
 719        if (dev->bus->number == 0)
 720                pe_num = 0;
 721        else
 722                pe_num = pnv_ioda_alloc_pe(phb);
 723        if (pe_num == IODA_INVALID_PE) {
 724                pr_warning("%s: Not enough PE# available, disabling device\n",
 725                           pci_name(dev));
 726                return NULL;
 727        }
 728
 729        /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
 730         * pointer in the PE data structure, both should be destroyed at the
 731         * same time. However, this needs to be looked at more closely again
 732         * once we actually start removing things (Hotplug, SR-IOV, ...)
 733         *
 734         * At some point we want to remove the PDN completely anyways
 735         */
 736        pe = &phb->ioda.pe_array[pe_num];
 737        pci_dev_get(dev);
 738        pdn->pcidev = dev;
 739        pdn->pe_number = pe_num;
 740        pe->pdev = dev;
 741        pe->pbus = NULL;
 742        pe->tce32_seg = -1;
 743        pe->mve_number = -1;
 744        pe->rid = dev->bus->number << 8 | pdn->devfn;
 745
 746        pe_info(pe, "Associated device to PE\n");
 747
 748        if (pnv_ioda_configure_pe(phb, pe)) {
 749                /* XXX What do we do here ? */
 750                if (pe_num)
 751                        pnv_ioda_free_pe(phb, pe_num);
 752                pdn->pe_number = IODA_INVALID_PE;
 753                pe->pdev = NULL;
 754                pci_dev_put(dev);
 755                return NULL;
 756        }
 757
 758        /* Assign a DMA weight to the device */
 759        pe->dma_weight = pnv_ioda_dma_weight(dev);
 760        if (pe->dma_weight != 0) {
 761                phb->ioda.dma_weight += pe->dma_weight;
 762                phb->ioda.dma_pe_count++;
 763        }
 764
 765        /* Link the PE */
 766        pnv_ioda_link_pe_by_weight(phb, pe);
 767
 768        return pe;
 769}
 770
 771static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 772{
 773        struct pci_dev *dev;
 774
 775        list_for_each_entry(dev, &bus->devices, bus_list) {
 776                struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
 777
 778                if (pdn == NULL) {
 779                        pr_warn("%s: No device node associated with device !\n",
 780                                pci_name(dev));
 781                        continue;
 782                }
 783                pci_dev_get(dev);
 784                pdn->pcidev = dev;
 785                pdn->pe_number = pe->pe_number;
 786                pe->dma_weight += pnv_ioda_dma_weight(dev);
 787                if (dev->subordinate)
 788                        pnv_ioda_setup_same_PE(dev->subordinate, pe);
 789        }
 790}
 791
 792static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
 793                                            struct pnv_ioda_pe *ppe)
 794{
 795        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 796        struct pnv_phb *phb = hose->private_data;
 797        struct pci_bus *bus = dev->subordinate;
 798        struct pnv_ioda_pe *pe;
 799        int pe_num;
 800
 801        if (!bus) {
 802                pr_warning("%s: Bridge without a subordinate bus !\n",
 803                           pci_name(dev));
 804                return;
 805        }
 806        pe_num = pnv_ioda_alloc_pe(phb);
 807        if (pe_num == IODA_INVALID_PE) {
 808                pr_warning("%s: Not enough PE# available, disabling bus\n",
 809                           pci_name(dev));
 810                return;
 811        }
 812
 813        pe = &phb->ioda.pe_array[pe_num];
 814        ppe->bus_pe = pe;
 815        pe->pbus = bus;
 816        pe->pdev = NULL;
 817        pe->tce32_seg = -1;
 818        pe->mve_number = -1;
 819        pe->rid = bus->secondary << 8;
 820        pe->dma_weight = 0;
 821
 822        pe_info(pe, "Secondary busses %d..%d associated with PE\n",
 823                bus->secondary, bus->subordinate);
 824
 825        if (pnv_ioda_configure_pe(phb, pe)) {
 826                /* XXX What do we do here ? */
 827                if (pe_num)
 828                        pnv_ioda_free_pe(phb, pe_num);
 829                pe->pbus = NULL;
 830                return;
 831        }
 832
 833        /* Associate it with all child devices */
 834        pnv_ioda_setup_same_PE(bus, pe);
 835
 836        /* Account for one DMA PE if at least one DMA capable device exist
 837         * below the bridge
 838         */
 839        if (pe->dma_weight != 0) {
 840                phb->ioda.dma_weight += pe->dma_weight;
 841                phb->ioda.dma_pe_count++;
 842        }
 843
 844        /* Link the PE */
 845        pnv_ioda_link_pe_by_weight(phb, pe);
 846}
 847
 848static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus)
 849{
 850        struct pci_dev *dev;
 851        struct pnv_ioda_pe *pe;
 852
 853        list_for_each_entry(dev, &bus->devices, bus_list) {
 854                pe = pnv_ioda_setup_dev_PE(dev);
 855                if (pe == NULL)
 856                        continue;
 857                /* Leaving the PCIe domain ... single PE# */
 858                if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
 859                        pnv_ioda_setup_bus_PE(dev, pe);
 860                else if (dev->subordinate)
 861                        pnv_ioda_setup_PEs(dev->subordinate);
 862        }
 863}
 864
 865static void __devinit pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb,
 866                                                 struct pci_dev *dev)
 867{
 868        /* We delay DMA setup after we have assigned all PE# */
 869}
 870
 871static void __devinit pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
 872                                             struct pci_bus *bus)
 873{
 874        struct pci_dev *dev;
 875
 876        list_for_each_entry(dev, &bus->devices, bus_list) {
 877                set_iommu_table_base(&dev->dev, &pe->tce32_table);
 878                if (dev->subordinate)
 879                        pnv_ioda_setup_bus_dma(pe, dev->subordinate);
 880        }
 881}
 882
 883static void __devinit pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 884                                                struct pnv_ioda_pe *pe,
 885                                                unsigned int base,
 886                                                unsigned int segs)
 887{
 888
 889        struct page *tce_mem = NULL;
 890        const __be64 *swinvp;
 891        struct iommu_table *tbl;
 892        unsigned int i;
 893        int64_t rc;
 894        void *addr;
 895
 896        /* 256M DMA window, 4K TCE pages, 8 bytes TCE */
 897#define TCE32_TABLE_SIZE        ((0x10000000 / 0x1000) * 8)
 898
 899        /* XXX FIXME: Handle 64-bit only DMA devices */
 900        /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
 901        /* XXX FIXME: Allocate multi-level tables on PHB3 */
 902
 903        /* We shouldn't already have a 32-bit DMA associated */
 904        if (WARN_ON(pe->tce32_seg >= 0))
 905                return;
 906
 907        /* Grab a 32-bit TCE table */
 908        pe->tce32_seg = base;
 909        pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
 910                (base << 28), ((base + segs) << 28) - 1);
 911
 912        /* XXX Currently, we allocate one big contiguous table for the
 913         * TCEs. We only really need one chunk per 256M of TCE space
 914         * (ie per segment) but that's an optimization for later, it
 915         * requires some added smarts with our get/put_tce implementation
 916         */
 917        tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
 918                                   get_order(TCE32_TABLE_SIZE * segs));
 919        if (!tce_mem) {
 920                pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
 921                goto fail;
 922        }
 923        addr = page_address(tce_mem);
 924        memset(addr, 0, TCE32_TABLE_SIZE * segs);
 925
 926        /* Configure HW */
 927        for (i = 0; i < segs; i++) {
 928                rc = opal_pci_map_pe_dma_window(phb->opal_id,
 929                                              pe->pe_number,
 930                                              base + i, 1,
 931                                              __pa(addr) + TCE32_TABLE_SIZE * i,
 932                                              TCE32_TABLE_SIZE, 0x1000);
 933                if (rc) {
 934                        pe_err(pe, " Failed to configure 32-bit TCE table,"
 935                               " err %ld\n", rc);
 936                        goto fail;
 937                }
 938        }
 939
 940        /* Setup linux iommu table */
 941        tbl = &pe->tce32_table;
 942        pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
 943                                  base << 28);
 944
 945        /* OPAL variant of P7IOC SW invalidated TCEs */
 946        swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
 947        if (swinvp) {
 948                /* We need a couple more fields -- an address and a data
 949                 * to or.  Since the bus is only printed out on table free
 950                 * errors, and on the first pass the data will be a relative
 951                 * bus number, print that out instead.
 952                 */
 953                tbl->it_busno = 0;
 954                tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
 955                tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
 956                        | TCE_PCI_SWINV_PAIR;
 957        }
 958        iommu_init_table(tbl, phb->hose->node);
 959
 960        if (pe->pdev)
 961                set_iommu_table_base(&pe->pdev->dev, tbl);
 962        else
 963                pnv_ioda_setup_bus_dma(pe, pe->pbus);
 964
 965        return;
 966 fail:
 967        /* XXX Failure: Try to fallback to 64-bit only ? */
 968        if (pe->tce32_seg >= 0)
 969                pe->tce32_seg = -1;
 970        if (tce_mem)
 971                __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
 972}
 973
 974static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb)
 975{
 976        struct pci_controller *hose = phb->hose;
 977        unsigned int residual, remaining, segs, tw, base;
 978        struct pnv_ioda_pe *pe;
 979
 980        /* If we have more PE# than segments available, hand out one
 981         * per PE until we run out and let the rest fail. If not,
 982         * then we assign at least one segment per PE, plus more based
 983         * on the amount of devices under that PE
 984         */
 985        if (phb->ioda.dma_pe_count > phb->ioda.tce32_count)
 986                residual = 0;
 987        else
 988                residual = phb->ioda.tce32_count -
 989                        phb->ioda.dma_pe_count;
 990
 991        pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
 992                hose->global_number, phb->ioda.tce32_count);
 993        pr_info("PCI: %d PE# for a total weight of %d\n",
 994                phb->ioda.dma_pe_count, phb->ioda.dma_weight);
 995
 996        /* Walk our PE list and configure their DMA segments, hand them
 997         * out one base segment plus any residual segments based on
 998         * weight
 999         */
1000        remaining = phb->ioda.tce32_count;
1001        tw = phb->ioda.dma_weight;
1002        base = 0;
1003        list_for_each_entry(pe, &phb->ioda.pe_list, link) {
1004                if (!pe->dma_weight)
1005                        continue;
1006                if (!remaining) {
1007                        pe_warn(pe, "No DMA32 resources available\n");
1008                        continue;
1009                }
1010                segs = 1;
1011                if (residual) {
1012                        segs += ((pe->dma_weight * residual)  + (tw / 2)) / tw;
1013                        if (segs > remaining)
1014                                segs = remaining;
1015                }
1016                pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
1017                        pe->dma_weight, segs);
1018                pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
1019                remaining -= segs;
1020                base += segs;
1021        }
1022}
1023
1024#ifdef CONFIG_PCI_MSI
1025static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
1026                                  unsigned int hwirq, unsigned int is_64,
1027                                  struct msi_msg *msg)
1028{
1029        struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
1030        unsigned int xive_num = hwirq - phb->msi_base;
1031        uint64_t addr64;
1032        uint32_t addr32, data;
1033        int rc;
1034
1035        /* No PE assigned ? bail out ... no MSI for you ! */
1036        if (pe == NULL)
1037                return -ENXIO;
1038
1039        /* Check if we have an MVE */
1040        if (pe->mve_number < 0)
1041                return -ENXIO;
1042
1043        /* Assign XIVE to PE */
1044        rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
1045        if (rc) {
1046                pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
1047                        pci_name(dev), rc, xive_num);
1048                return -EIO;
1049        }
1050
1051        if (is_64) {
1052                rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
1053                                     &addr64, &data);
1054                if (rc) {
1055                        pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
1056                                pci_name(dev), rc);
1057                        return -EIO;
1058                }
1059                msg->address_hi = addr64 >> 32;
1060                msg->address_lo = addr64 & 0xfffffffful;
1061        } else {
1062                rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
1063                                     &addr32, &data);
1064                if (rc) {
1065                        pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
1066                                pci_name(dev), rc);
1067                        return -EIO;
1068                }
1069                msg->address_hi = 0;
1070                msg->address_lo = addr32;
1071        }
1072        msg->data = data;
1073
1074        pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
1075                 " address=%x_%08x data=%x PE# %d\n",
1076                 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
1077                 msg->address_hi, msg->address_lo, data, pe->pe_number);
1078
1079        return 0;
1080}
1081
1082static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
1083{
1084        unsigned int bmap_size;
1085        const __be32 *prop = of_get_property(phb->hose->dn,
1086                                             "ibm,opal-msi-ranges", NULL);
1087        if (!prop) {
1088                /* BML Fallback */
1089                prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
1090        }
1091        if (!prop)
1092                return;
1093
1094        phb->msi_base = be32_to_cpup(prop);
1095        phb->msi_count = be32_to_cpup(prop + 1);
1096        bmap_size = BITS_TO_LONGS(phb->msi_count) * sizeof(unsigned long);
1097        phb->msi_map = zalloc_maybe_bootmem(bmap_size, GFP_KERNEL);
1098        if (!phb->msi_map) {
1099                pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
1100                       phb->hose->global_number);
1101                return;
1102        }
1103        phb->msi_setup = pnv_pci_ioda_msi_setup;
1104        phb->msi32_support = 1;
1105        pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
1106                phb->msi_count, phb->msi_base);
1107}
1108#else
1109static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
1110#endif /* CONFIG_PCI_MSI */
1111
1112/* This is the starting point of our IODA specific resource
1113 * allocation process
1114 */
1115static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
1116{
1117        resource_size_t size, align;
1118        struct pci_bus *child;
1119
1120        /* Associate PEs per functions */
1121        pnv_ioda_setup_PEs(hose->bus);
1122
1123        /* Calculate all resources */
1124        pnv_ioda_calc_bus(hose->bus, IORESOURCE_IO, &size, &align);
1125        pnv_ioda_calc_bus(hose->bus, IORESOURCE_MEM, &size, &align);
1126
1127        /* Apply then to HW */
1128        pnv_ioda_update_resources(hose->bus);
1129
1130        /* Setup DMA */
1131        pnv_ioda_setup_dma(hose->private_data);
1132
1133        /* Configure PCI Express settings */
1134        list_for_each_entry(child, &hose->bus->children, node) {
1135                struct pci_dev *self = child->self;
1136                if (!self)
1137                        continue;
1138                pcie_bus_configure_settings(child, self->pcie_mpss);
1139        }
1140}
1141
1142/* Prevent enabling devices for which we couldn't properly
1143 * assign a PE
1144 */
1145static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev)
1146{
1147        struct pci_dn *pdn = pnv_ioda_get_pdn(dev);
1148
1149        if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1150                return -EINVAL;
1151        return 0;
1152}
1153
1154static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
1155                               u32 devfn)
1156{
1157        return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
1158}
1159
1160void __init pnv_pci_init_ioda1_phb(struct device_node *np)
1161{
1162        struct pci_controller *hose;
1163        static int primary = 1;
1164        struct pnv_phb *phb;
1165        unsigned long size, m32map_off, iomap_off, pemap_off;
1166        const u64 *prop64;
1167        u64 phb_id;
1168        void *aux;
1169        long rc;
1170
1171        pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
1172
1173        prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
1174        if (!prop64) {
1175                pr_err("  Missing \"ibm,opal-phbid\" property !\n");
1176                return;
1177        }
1178        phb_id = be64_to_cpup(prop64);
1179        pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
1180
1181        phb = alloc_bootmem(sizeof(struct pnv_phb));
1182        if (phb) {
1183                memset(phb, 0, sizeof(struct pnv_phb));
1184                phb->hose = hose = pcibios_alloc_controller(np);
1185        }
1186        if (!phb || !phb->hose) {
1187                pr_err("PCI: Failed to allocate PCI controller for %s\n",
1188                       np->full_name);
1189                return;
1190        }
1191
1192        spin_lock_init(&phb->lock);
1193        /* XXX Use device-tree */
1194        hose->first_busno = 0;
1195        hose->last_busno = 0xff;
1196        hose->private_data = phb;
1197        phb->opal_id = phb_id;
1198        phb->type = PNV_PHB_IODA1;
1199
1200        /* Detect specific models for error handling */
1201        if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
1202                phb->model = PNV_PHB_MODEL_P7IOC;
1203        else
1204                phb->model = PNV_PHB_MODEL_UNKNOWN;
1205
1206        /* We parse "ranges" now since we need to deduce the register base
1207         * from the IO base
1208         */
1209        pci_process_bridge_OF_ranges(phb->hose, np, primary);
1210        primary = 0;
1211
1212        /* Magic formula from Milton */
1213        phb->regs = of_iomap(np, 0);
1214        if (phb->regs == NULL)
1215                pr_err("  Failed to map registers !\n");
1216
1217
1218        /* XXX This is hack-a-thon. This needs to be changed so that:
1219         *  - we obtain stuff like PE# etc... from device-tree
1220         *  - we properly re-allocate M32 ourselves
1221         *    (the OFW one isn't very good)
1222         */
1223
1224        /* Initialize more IODA stuff */
1225        phb->ioda.total_pe = 128;
1226
1227        phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
1228        /* OFW Has already off top 64k of M32 space (MSI space) */
1229        phb->ioda.m32_size += 0x10000;
1230
1231        phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
1232        phb->ioda.m32_pci_base = hose->mem_resources[0].start -
1233                hose->pci_mem_offset;
1234        phb->ioda.io_size = hose->pci_io_size;
1235        phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
1236        phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
1237
1238        /* Allocate aux data & arrays */
1239        size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
1240        m32map_off = size;
1241        size += phb->ioda.total_pe;
1242        iomap_off = size;
1243        size += phb->ioda.total_pe;
1244        pemap_off = size;
1245        size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe);
1246        aux = alloc_bootmem(size);
1247        memset(aux, 0, size);
1248        phb->ioda.pe_alloc = aux;
1249        phb->ioda.m32_segmap = aux + m32map_off;
1250        phb->ioda.io_segmap = aux + iomap_off;
1251        phb->ioda.pe_array = aux + pemap_off;
1252        set_bit(0, phb->ioda.pe_alloc);
1253
1254        INIT_LIST_HEAD(&phb->ioda.pe_list);
1255
1256        /* Calculate how many 32-bit TCE segments we have */
1257        phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
1258
1259        /* Clear unusable m64 */
1260        hose->mem_resources[1].flags = 0;
1261        hose->mem_resources[1].start = 0;
1262        hose->mem_resources[1].end = 0;
1263        hose->mem_resources[2].flags = 0;
1264        hose->mem_resources[2].start = 0;
1265        hose->mem_resources[2].end = 0;
1266
1267#if 0
1268        rc = opal_pci_set_phb_mem_window(opal->phb_id,
1269                                         window_type,
1270                                         window_num,
1271                                         starting_real_address,
1272                                         starting_pci_address,
1273                                         segment_size);
1274#endif
1275
1276        pr_info("  %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n",
1277                phb->ioda.total_pe,
1278                phb->ioda.m32_size, phb->ioda.m32_segsize,
1279                phb->ioda.io_size, phb->ioda.io_segsize);
1280
1281        if (phb->regs)  {
1282                pr_devel(" BUID     = 0x%016llx\n", in_be64(phb->regs + 0x100));
1283                pr_devel(" PHB2_CR  = 0x%016llx\n", in_be64(phb->regs + 0x160));
1284                pr_devel(" IO_BAR   = 0x%016llx\n", in_be64(phb->regs + 0x170));
1285                pr_devel(" IO_BAMR  = 0x%016llx\n", in_be64(phb->regs + 0x178));
1286                pr_devel(" IO_SAR   = 0x%016llx\n", in_be64(phb->regs + 0x180));
1287                pr_devel(" M32_BAR  = 0x%016llx\n", in_be64(phb->regs + 0x190));
1288                pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
1289                pr_devel(" M32_SAR  = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
1290        }
1291        phb->hose->ops = &pnv_pci_ops;
1292
1293        /* Setup RID -> PE mapping function */
1294        phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
1295
1296        /* Setup TCEs */
1297        phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
1298
1299        /* Setup MSI support */
1300        pnv_pci_init_ioda_msis(phb);
1301
1302        /* We set both probe_only and PCI_REASSIGN_ALL_RSRC. This is an
1303         * odd combination which essentially means that we skip all resource
1304         * fixups and assignments in the generic code, and do it all
1305         * ourselves here
1306         */
1307        pci_probe_only = 1;
1308        ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
1309        ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
1310        pci_add_flags(PCI_REASSIGN_ALL_RSRC);
1311
1312        /* Reset IODA tables to a clean state */
1313        rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
1314        if (rc)
1315                pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
1316        opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
1317}
1318
1319void __init pnv_pci_init_ioda_hub(struct device_node *np)
1320{
1321        struct device_node *phbn;
1322        const u64 *prop64;
1323        u64 hub_id;
1324
1325        pr_info("Probing IODA IO-Hub %s\n", np->full_name);
1326
1327        prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
1328        if (!prop64) {
1329                pr_err(" Missing \"ibm,opal-hubid\" property !\n");
1330                return;
1331        }
1332        hub_id = be64_to_cpup(prop64);
1333        pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
1334
1335        /* Count child PHBs */
1336        for_each_child_of_node(np, phbn) {
1337                /* Look for IODA1 PHBs */
1338                if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
1339                        pnv_pci_init_ioda1_phb(phbn);
1340        }
1341}
1342