linux/drivers/pci/dmar.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006, Intel Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15 * Place - Suite 330, Boston, MA 02111-1307 USA.
  16 *
  17 * Copyright (C) 2006-2008 Intel Corporation
  18 * Author: Ashok Raj <ashok.raj@intel.com>
  19 * Author: Shaohua Li <shaohua.li@intel.com>
  20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  21 *
  22 * This file implements early detection/parsing of Remapping Devices
  23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  24 * tables.
  25 *
  26 * These routines are used by both DMA-remapping and Interrupt-remapping
  27 */
  28
  29#include <linux/pci.h>
  30#include <linux/dmar.h>
  31#include <linux/iova.h>
  32#include <linux/intel-iommu.h>
  33#include <linux/timer.h>
  34#include <linux/irq.h>
  35#include <linux/interrupt.h>
  36
  37#undef PREFIX
  38#define PREFIX "DMAR:"
  39
  40/* No locks are needed as DMA remapping hardware unit
  41 * list is constructed at boot time and hotplug of
  42 * these units are not supported by the architecture.
  43 */
  44LIST_HEAD(dmar_drhd_units);
  45
  46static struct acpi_table_header * __initdata dmar_tbl;
  47static acpi_size dmar_tbl_size;
  48
  49static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
  50{
  51        /*
  52         * add INCLUDE_ALL at the tail, so scan the list will find it at
  53         * the very end.
  54         */
  55        if (drhd->include_all)
  56                list_add_tail(&drhd->list, &dmar_drhd_units);
  57        else
  58                list_add(&drhd->list, &dmar_drhd_units);
  59}
  60
  61static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
  62                                           struct pci_dev **dev, u16 segment)
  63{
  64        struct pci_bus *bus;
  65        struct pci_dev *pdev = NULL;
  66        struct acpi_dmar_pci_path *path;
  67        int count;
  68
  69        bus = pci_find_bus(segment, scope->bus);
  70        path = (struct acpi_dmar_pci_path *)(scope + 1);
  71        count = (scope->length - sizeof(struct acpi_dmar_device_scope))
  72                / sizeof(struct acpi_dmar_pci_path);
  73
  74        while (count) {
  75                if (pdev)
  76                        pci_dev_put(pdev);
  77                /*
  78                 * Some BIOSes list non-exist devices in DMAR table, just
  79                 * ignore it
  80                 */
  81                if (!bus) {
  82                        printk(KERN_WARNING
  83                        PREFIX "Device scope bus [%d] not found\n",
  84                        scope->bus);
  85                        break;
  86                }
  87                pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
  88                if (!pdev) {
  89                        printk(KERN_WARNING PREFIX
  90                        "Device scope device [%04x:%02x:%02x.%02x] not found\n",
  91                                segment, bus->number, path->dev, path->fn);
  92                        break;
  93                }
  94                path ++;
  95                count --;
  96                bus = pdev->subordinate;
  97        }
  98        if (!pdev) {
  99                printk(KERN_WARNING PREFIX
 100                "Device scope device [%04x:%02x:%02x.%02x] not found\n",
 101                segment, scope->bus, path->dev, path->fn);
 102                *dev = NULL;
 103                return 0;
 104        }
 105        if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
 106                        pdev->subordinate) || (scope->entry_type == \
 107                        ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
 108                pci_dev_put(pdev);
 109                printk(KERN_WARNING PREFIX
 110                        "Device scope type does not match for %s\n",
 111                         pci_name(pdev));
 112                return -EINVAL;
 113        }
 114        *dev = pdev;
 115        return 0;
 116}
 117
 118static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
 119                                       struct pci_dev ***devices, u16 segment)
 120{
 121        struct acpi_dmar_device_scope *scope;
 122        void * tmp = start;
 123        int index;
 124        int ret;
 125
 126        *cnt = 0;
 127        while (start < end) {
 128                scope = start;
 129                if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
 130                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
 131                        (*cnt)++;
 132                else
 133                        printk(KERN_WARNING PREFIX
 134                                "Unsupported device scope\n");
 135                start += scope->length;
 136        }
 137        if (*cnt == 0)
 138                return 0;
 139
 140        *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
 141        if (!*devices)
 142                return -ENOMEM;
 143
 144        start = tmp;
 145        index = 0;
 146        while (start < end) {
 147                scope = start;
 148                if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
 149                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
 150                        ret = dmar_parse_one_dev_scope(scope,
 151                                &(*devices)[index], segment);
 152                        if (ret) {
 153                                kfree(*devices);
 154                                return ret;
 155                        }
 156                        index ++;
 157                }
 158                start += scope->length;
 159        }
 160
 161        return 0;
 162}
 163
 164/**
 165 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
 166 * structure which uniquely represent one DMA remapping hardware unit
 167 * present in the platform
 168 */
 169static int __init
 170dmar_parse_one_drhd(struct acpi_dmar_header *header)
 171{
 172        struct acpi_dmar_hardware_unit *drhd;
 173        struct dmar_drhd_unit *dmaru;
 174        int ret = 0;
 175
 176        drhd = (struct acpi_dmar_hardware_unit *)header;
 177        if (!drhd->address) {
 178                /* Promote an attitude of violence to a BIOS engineer today */
 179                WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
 180                     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 181                     dmi_get_system_info(DMI_BIOS_VENDOR),
 182                     dmi_get_system_info(DMI_BIOS_VERSION),
 183                     dmi_get_system_info(DMI_PRODUCT_VERSION));
 184                return -ENODEV;
 185        }
 186        dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
 187        if (!dmaru)
 188                return -ENOMEM;
 189
 190        dmaru->hdr = header;
 191        dmaru->reg_base_addr = drhd->address;
 192        dmaru->segment = drhd->segment;
 193        dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 194
 195        ret = alloc_iommu(dmaru);
 196        if (ret) {
 197                kfree(dmaru);
 198                return ret;
 199        }
 200        dmar_register_drhd_unit(dmaru);
 201        return 0;
 202}
 203
 204static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 205{
 206        struct acpi_dmar_hardware_unit *drhd;
 207        int ret = 0;
 208
 209        drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
 210
 211        if (dmaru->include_all)
 212                return 0;
 213
 214        ret = dmar_parse_dev_scope((void *)(drhd + 1),
 215                                ((void *)drhd) + drhd->header.length,
 216                                &dmaru->devices_cnt, &dmaru->devices,
 217                                drhd->segment);
 218        if (ret) {
 219                list_del(&dmaru->list);
 220                kfree(dmaru);
 221        }
 222        return ret;
 223}
 224
 225#ifdef CONFIG_DMAR
 226LIST_HEAD(dmar_rmrr_units);
 227
 228static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
 229{
 230        list_add(&rmrr->list, &dmar_rmrr_units);
 231}
 232
 233
 234static int __init
 235dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 236{
 237        struct acpi_dmar_reserved_memory *rmrr;
 238        struct dmar_rmrr_unit *rmrru;
 239
 240        rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
 241        if (!rmrru)
 242                return -ENOMEM;
 243
 244        rmrru->hdr = header;
 245        rmrr = (struct acpi_dmar_reserved_memory *)header;
 246        rmrru->base_address = rmrr->base_address;
 247        rmrru->end_address = rmrr->end_address;
 248
 249        dmar_register_rmrr_unit(rmrru);
 250        return 0;
 251}
 252
 253static int __init
 254rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
 255{
 256        struct acpi_dmar_reserved_memory *rmrr;
 257        int ret;
 258
 259        rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
 260        ret = dmar_parse_dev_scope((void *)(rmrr + 1),
 261                ((void *)rmrr) + rmrr->header.length,
 262                &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
 263
 264        if (ret || (rmrru->devices_cnt == 0)) {
 265                list_del(&rmrru->list);
 266                kfree(rmrru);
 267        }
 268        return ret;
 269}
 270#endif
 271
 272static void __init
 273dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 274{
 275        struct acpi_dmar_hardware_unit *drhd;
 276        struct acpi_dmar_reserved_memory *rmrr;
 277
 278        switch (header->type) {
 279        case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 280                drhd = (struct acpi_dmar_hardware_unit *)header;
 281                printk (KERN_INFO PREFIX
 282                        "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
 283                        drhd->flags, (unsigned long long)drhd->address);
 284                break;
 285        case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 286                rmrr = (struct acpi_dmar_reserved_memory *)header;
 287
 288                printk (KERN_INFO PREFIX
 289                        "RMRR base: 0x%016Lx end: 0x%016Lx\n",
 290                        (unsigned long long)rmrr->base_address,
 291                        (unsigned long long)rmrr->end_address);
 292                break;
 293        }
 294}
 295
 296/**
 297 * dmar_table_detect - checks to see if the platform supports DMAR devices
 298 */
 299static int __init dmar_table_detect(void)
 300{
 301        acpi_status status = AE_OK;
 302
 303        /* if we could find DMAR table, then there are DMAR devices */
 304        status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
 305                                (struct acpi_table_header **)&dmar_tbl,
 306                                &dmar_tbl_size);
 307
 308        if (ACPI_SUCCESS(status) && !dmar_tbl) {
 309                printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
 310                status = AE_NOT_FOUND;
 311        }
 312
 313        return (ACPI_SUCCESS(status) ? 1 : 0);
 314}
 315
 316/**
 317 * parse_dmar_table - parses the DMA reporting table
 318 */
 319static int __init
 320parse_dmar_table(void)
 321{
 322        struct acpi_table_dmar *dmar;
 323        struct acpi_dmar_header *entry_header;
 324        int ret = 0;
 325
 326        /*
 327         * Do it again, earlier dmar_tbl mapping could be mapped with
 328         * fixed map.
 329         */
 330        dmar_table_detect();
 331
 332        dmar = (struct acpi_table_dmar *)dmar_tbl;
 333        if (!dmar)
 334                return -ENODEV;
 335
 336        if (dmar->width < PAGE_SHIFT - 1) {
 337                printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
 338                return -EINVAL;
 339        }
 340
 341        printk (KERN_INFO PREFIX "Host address width %d\n",
 342                dmar->width + 1);
 343
 344        entry_header = (struct acpi_dmar_header *)(dmar + 1);
 345        while (((unsigned long)entry_header) <
 346                        (((unsigned long)dmar) + dmar_tbl->length)) {
 347                /* Avoid looping forever on bad ACPI tables */
 348                if (entry_header->length == 0) {
 349                        printk(KERN_WARNING PREFIX
 350                                "Invalid 0-length structure\n");
 351                        ret = -EINVAL;
 352                        break;
 353                }
 354
 355                dmar_table_print_dmar_entry(entry_header);
 356
 357                switch (entry_header->type) {
 358                case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 359                        ret = dmar_parse_one_drhd(entry_header);
 360                        break;
 361                case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 362#ifdef CONFIG_DMAR
 363                        ret = dmar_parse_one_rmrr(entry_header);
 364#endif
 365                        break;
 366                default:
 367                        printk(KERN_WARNING PREFIX
 368                                "Unknown DMAR structure type\n");
 369                        ret = 0; /* for forward compatibility */
 370                        break;
 371                }
 372                if (ret)
 373                        break;
 374
 375                entry_header = ((void *)entry_header + entry_header->length);
 376        }
 377        return ret;
 378}
 379
 380int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
 381                          struct pci_dev *dev)
 382{
 383        int index;
 384
 385        while (dev) {
 386                for (index = 0; index < cnt; index++)
 387                        if (dev == devices[index])
 388                                return 1;
 389
 390                /* Check our parent */
 391                dev = dev->bus->self;
 392        }
 393
 394        return 0;
 395}
 396
 397struct dmar_drhd_unit *
 398dmar_find_matched_drhd_unit(struct pci_dev *dev)
 399{
 400        struct dmar_drhd_unit *dmaru = NULL;
 401        struct acpi_dmar_hardware_unit *drhd;
 402
 403        list_for_each_entry(dmaru, &dmar_drhd_units, list) {
 404                drhd = container_of(dmaru->hdr,
 405                                    struct acpi_dmar_hardware_unit,
 406                                    header);
 407
 408                if (dmaru->include_all &&
 409                    drhd->segment == pci_domain_nr(dev->bus))
 410                        return dmaru;
 411
 412                if (dmar_pci_device_match(dmaru->devices,
 413                                          dmaru->devices_cnt, dev))
 414                        return dmaru;
 415        }
 416
 417        return NULL;
 418}
 419
 420int __init dmar_dev_scope_init(void)
 421{
 422        struct dmar_drhd_unit *drhd, *drhd_n;
 423        int ret = -ENODEV;
 424
 425        list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
 426                ret = dmar_parse_dev(drhd);
 427                if (ret)
 428                        return ret;
 429        }
 430
 431#ifdef CONFIG_DMAR
 432        {
 433                struct dmar_rmrr_unit *rmrr, *rmrr_n;
 434                list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
 435                        ret = rmrr_parse_dev(rmrr);
 436                        if (ret)
 437                                return ret;
 438                }
 439        }
 440#endif
 441
 442        return ret;
 443}
 444
 445
 446int __init dmar_table_init(void)
 447{
 448        static int dmar_table_initialized;
 449        int ret;
 450
 451        if (dmar_table_initialized)
 452                return 0;
 453
 454        dmar_table_initialized = 1;
 455
 456        ret = parse_dmar_table();
 457        if (ret) {
 458                if (ret != -ENODEV)
 459                        printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
 460                return ret;
 461        }
 462
 463        if (list_empty(&dmar_drhd_units)) {
 464                printk(KERN_INFO PREFIX "No DMAR devices found\n");
 465                return -ENODEV;
 466        }
 467
 468#ifdef CONFIG_DMAR
 469        if (list_empty(&dmar_rmrr_units))
 470                printk(KERN_INFO PREFIX "No RMRR found\n");
 471#endif
 472
 473#ifdef CONFIG_INTR_REMAP
 474        parse_ioapics_under_ir();
 475#endif
 476        return 0;
 477}
 478
 479void __init detect_intel_iommu(void)
 480{
 481        int ret;
 482
 483        ret = dmar_table_detect();
 484
 485        {
 486#ifdef CONFIG_INTR_REMAP
 487                struct acpi_table_dmar *dmar;
 488                /*
 489                 * for now we will disable dma-remapping when interrupt
 490                 * remapping is enabled.
 491                 * When support for queued invalidation for IOTLB invalidation
 492                 * is added, we will not need this any more.
 493                 */
 494                dmar = (struct acpi_table_dmar *) dmar_tbl;
 495                if (ret && cpu_has_x2apic && dmar->flags & 0x1)
 496                        printk(KERN_INFO
 497                               "Queued invalidation will be enabled to support "
 498                               "x2apic and Intr-remapping.\n");
 499#endif
 500#ifdef CONFIG_DMAR
 501                if (ret && !no_iommu && !iommu_detected && !swiotlb &&
 502                    !dmar_disabled)
 503                        iommu_detected = 1;
 504#endif
 505        }
 506        early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
 507        dmar_tbl = NULL;
 508}
 509
 510
 511int alloc_iommu(struct dmar_drhd_unit *drhd)
 512{
 513        struct intel_iommu *iommu;
 514        int map_size;
 515        u32 ver;
 516        static int iommu_allocated = 0;
 517        int agaw = 0;
 518
 519        iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 520        if (!iommu)
 521                return -ENOMEM;
 522
 523        iommu->seq_id = iommu_allocated++;
 524        sprintf (iommu->name, "dmar%d", iommu->seq_id);
 525
 526        iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
 527        if (!iommu->reg) {
 528                printk(KERN_ERR "IOMMU: can't map the region\n");
 529                goto error;
 530        }
 531        iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 532        iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 533
 534#ifdef CONFIG_DMAR
 535        agaw = iommu_calculate_agaw(iommu);
 536        if (agaw < 0) {
 537                printk(KERN_ERR
 538                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
 539                        iommu->seq_id);
 540                goto error;
 541        }
 542#endif
 543        iommu->agaw = agaw;
 544
 545        /* the registers might be more than one page */
 546        map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
 547                cap_max_fault_reg_offset(iommu->cap));
 548        map_size = VTD_PAGE_ALIGN(map_size);
 549        if (map_size > VTD_PAGE_SIZE) {
 550                iounmap(iommu->reg);
 551                iommu->reg = ioremap(drhd->reg_base_addr, map_size);
 552                if (!iommu->reg) {
 553                        printk(KERN_ERR "IOMMU: can't map the region\n");
 554                        goto error;
 555                }
 556        }
 557
 558        ver = readl(iommu->reg + DMAR_VER_REG);
 559        pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
 560                (unsigned long long)drhd->reg_base_addr,
 561                DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
 562                (unsigned long long)iommu->cap,
 563                (unsigned long long)iommu->ecap);
 564
 565        spin_lock_init(&iommu->register_lock);
 566
 567        drhd->iommu = iommu;
 568        return 0;
 569error:
 570        kfree(iommu);
 571        return -1;
 572}
 573
 574void free_iommu(struct intel_iommu *iommu)
 575{
 576        if (!iommu)
 577                return;
 578
 579#ifdef CONFIG_DMAR
 580        free_dmar_iommu(iommu);
 581#endif
 582
 583        if (iommu->reg)
 584                iounmap(iommu->reg);
 585        kfree(iommu);
 586}
 587
 588/*
 589 * Reclaim all the submitted descriptors which have completed its work.
 590 */
 591static inline void reclaim_free_desc(struct q_inval *qi)
 592{
 593        while (qi->desc_status[qi->free_tail] == QI_DONE) {
 594                qi->desc_status[qi->free_tail] = QI_FREE;
 595                qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
 596                qi->free_cnt++;
 597        }
 598}
 599
 600static int qi_check_fault(struct intel_iommu *iommu, int index)
 601{
 602        u32 fault;
 603        int head;
 604        struct q_inval *qi = iommu->qi;
 605        int wait_index = (index + 1) % QI_LENGTH;
 606
 607        fault = readl(iommu->reg + DMAR_FSTS_REG);
 608
 609        /*
 610         * If IQE happens, the head points to the descriptor associated
 611         * with the error. No new descriptors are fetched until the IQE
 612         * is cleared.
 613         */
 614        if (fault & DMA_FSTS_IQE) {
 615                head = readl(iommu->reg + DMAR_IQH_REG);
 616                if ((head >> 4) == index) {
 617                        memcpy(&qi->desc[index], &qi->desc[wait_index],
 618                                        sizeof(struct qi_desc));
 619                        __iommu_flush_cache(iommu, &qi->desc[index],
 620                                        sizeof(struct qi_desc));
 621                        writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
 622                        return -EINVAL;
 623                }
 624        }
 625
 626        return 0;
 627}
 628
 629/*
 630 * Submit the queued invalidation descriptor to the remapping
 631 * hardware unit and wait for its completion.
 632 */
 633int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 634{
 635        int rc = 0;
 636        struct q_inval *qi = iommu->qi;
 637        struct qi_desc *hw, wait_desc;
 638        int wait_index, index;
 639        unsigned long flags;
 640
 641        if (!qi)
 642                return 0;
 643
 644        hw = qi->desc;
 645
 646        spin_lock_irqsave(&qi->q_lock, flags);
 647        while (qi->free_cnt < 3) {
 648                spin_unlock_irqrestore(&qi->q_lock, flags);
 649                cpu_relax();
 650                spin_lock_irqsave(&qi->q_lock, flags);
 651        }
 652
 653        index = qi->free_head;
 654        wait_index = (index + 1) % QI_LENGTH;
 655
 656        qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
 657
 658        hw[index] = *desc;
 659
 660        wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
 661                        QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
 662        wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
 663
 664        hw[wait_index] = wait_desc;
 665
 666        __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
 667        __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
 668
 669        qi->free_head = (qi->free_head + 2) % QI_LENGTH;
 670        qi->free_cnt -= 2;
 671
 672        /*
 673         * update the HW tail register indicating the presence of
 674         * new descriptors.
 675         */
 676        writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
 677
 678        while (qi->desc_status[wait_index] != QI_DONE) {
 679                /*
 680                 * We will leave the interrupts disabled, to prevent interrupt
 681                 * context to queue another cmd while a cmd is already submitted
 682                 * and waiting for completion on this cpu. This is to avoid
 683                 * a deadlock where the interrupt context can wait indefinitely
 684                 * for free slots in the queue.
 685                 */
 686                rc = qi_check_fault(iommu, index);
 687                if (rc)
 688                        goto out;
 689
 690                spin_unlock(&qi->q_lock);
 691                cpu_relax();
 692                spin_lock(&qi->q_lock);
 693        }
 694out:
 695        qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
 696
 697        reclaim_free_desc(qi);
 698        spin_unlock_irqrestore(&qi->q_lock, flags);
 699
 700        return rc;
 701}
 702
 703/*
 704 * Flush the global interrupt entry cache.
 705 */
 706void qi_global_iec(struct intel_iommu *iommu)
 707{
 708        struct qi_desc desc;
 709
 710        desc.low = QI_IEC_TYPE;
 711        desc.high = 0;
 712
 713        /* should never fail */
 714        qi_submit_sync(&desc, iommu);
 715}
 716
 717int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
 718                     u64 type, int non_present_entry_flush)
 719{
 720        struct qi_desc desc;
 721
 722        if (non_present_entry_flush) {
 723                if (!cap_caching_mode(iommu->cap))
 724                        return 1;
 725                else
 726                        did = 0;
 727        }
 728
 729        desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
 730                        | QI_CC_GRAN(type) | QI_CC_TYPE;
 731        desc.high = 0;
 732
 733        return qi_submit_sync(&desc, iommu);
 734}
 735
 736int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 737                   unsigned int size_order, u64 type,
 738                   int non_present_entry_flush)
 739{
 740        u8 dw = 0, dr = 0;
 741
 742        struct qi_desc desc;
 743        int ih = 0;
 744
 745        if (non_present_entry_flush) {
 746                if (!cap_caching_mode(iommu->cap))
 747                        return 1;
 748                else
 749                        did = 0;
 750        }
 751
 752        if (cap_write_drain(iommu->cap))
 753                dw = 1;
 754
 755        if (cap_read_drain(iommu->cap))
 756                dr = 1;
 757
 758        desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
 759                | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
 760        desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
 761                | QI_IOTLB_AM(size_order);
 762
 763        return qi_submit_sync(&desc, iommu);
 764}
 765
 766/*
 767 * Disable Queued Invalidation interface.
 768 */
 769void dmar_disable_qi(struct intel_iommu *iommu)
 770{
 771        unsigned long flags;
 772        u32 sts;
 773        cycles_t start_time = get_cycles();
 774
 775        if (!ecap_qis(iommu->ecap))
 776                return;
 777
 778        spin_lock_irqsave(&iommu->register_lock, flags);
 779
 780        sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
 781        if (!(sts & DMA_GSTS_QIES))
 782                goto end;
 783
 784        /*
 785         * Give a chance to HW to complete the pending invalidation requests.
 786         */
 787        while ((readl(iommu->reg + DMAR_IQT_REG) !=
 788                readl(iommu->reg + DMAR_IQH_REG)) &&
 789                (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
 790                cpu_relax();
 791
 792        iommu->gcmd &= ~DMA_GCMD_QIE;
 793
 794        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 795
 796        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
 797                      !(sts & DMA_GSTS_QIES), sts);
 798end:
 799        spin_unlock_irqrestore(&iommu->register_lock, flags);
 800}
 801
 802/*
 803 * Enable queued invalidation.
 804 */
 805static void __dmar_enable_qi(struct intel_iommu *iommu)
 806{
 807        u32 cmd, sts;
 808        unsigned long flags;
 809        struct q_inval *qi = iommu->qi;
 810
 811        qi->free_head = qi->free_tail = 0;
 812        qi->free_cnt = QI_LENGTH;
 813
 814        spin_lock_irqsave(&iommu->register_lock, flags);
 815
 816        /* write zero to the tail reg */
 817        writel(0, iommu->reg + DMAR_IQT_REG);
 818
 819        dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
 820
 821        cmd = iommu->gcmd | DMA_GCMD_QIE;
 822        iommu->gcmd |= DMA_GCMD_QIE;
 823        writel(cmd, iommu->reg + DMAR_GCMD_REG);
 824
 825        /* Make sure hardware complete it */
 826        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
 827
 828        spin_unlock_irqrestore(&iommu->register_lock, flags);
 829}
 830
 831/*
 832 * Enable Queued Invalidation interface. This is a must to support
 833 * interrupt-remapping. Also used by DMA-remapping, which replaces
 834 * register based IOTLB invalidation.
 835 */
 836int dmar_enable_qi(struct intel_iommu *iommu)
 837{
 838        struct q_inval *qi;
 839
 840        if (!ecap_qis(iommu->ecap))
 841                return -ENOENT;
 842
 843        /*
 844         * queued invalidation is already setup and enabled.
 845         */
 846        if (iommu->qi)
 847                return 0;
 848
 849        iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
 850        if (!iommu->qi)
 851                return -ENOMEM;
 852
 853        qi = iommu->qi;
 854
 855        qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
 856        if (!qi->desc) {
 857                kfree(qi);
 858                iommu->qi = 0;
 859                return -ENOMEM;
 860        }
 861
 862        qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
 863        if (!qi->desc_status) {
 864                free_page((unsigned long) qi->desc);
 865                kfree(qi);
 866                iommu->qi = 0;
 867                return -ENOMEM;
 868        }
 869
 870        qi->free_head = qi->free_tail = 0;
 871        qi->free_cnt = QI_LENGTH;
 872
 873        spin_lock_init(&qi->q_lock);
 874
 875        __dmar_enable_qi(iommu);
 876
 877        return 0;
 878}
 879
 880/* iommu interrupt handling. Most stuff are MSI-like. */
 881
 882enum faulttype {
 883        DMA_REMAP,
 884        INTR_REMAP,
 885        UNKNOWN,
 886};
 887
 888static const char *dma_remap_fault_reasons[] =
 889{
 890        "Software",
 891        "Present bit in root entry is clear",
 892        "Present bit in context entry is clear",
 893        "Invalid context entry",
 894        "Access beyond MGAW",
 895        "PTE Write access is not set",
 896        "PTE Read access is not set",
 897        "Next page table ptr is invalid",
 898        "Root table address invalid",
 899        "Context table ptr is invalid",
 900        "non-zero reserved fields in RTP",
 901        "non-zero reserved fields in CTP",
 902        "non-zero reserved fields in PTE",
 903};
 904
 905static const char *intr_remap_fault_reasons[] =
 906{
 907        "Detected reserved fields in the decoded interrupt-remapped request",
 908        "Interrupt index exceeded the interrupt-remapping table size",
 909        "Present field in the IRTE entry is clear",
 910        "Error accessing interrupt-remapping table pointed by IRTA_REG",
 911        "Detected reserved fields in the IRTE entry",
 912        "Blocked a compatibility format interrupt request",
 913        "Blocked an interrupt request due to source-id verification failure",
 914};
 915
 916#define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
 917
 918const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 919{
 920        if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
 921                                     ARRAY_SIZE(intr_remap_fault_reasons))) {
 922                *fault_type = INTR_REMAP;
 923                return intr_remap_fault_reasons[fault_reason - 0x20];
 924        } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
 925                *fault_type = DMA_REMAP;
 926                return dma_remap_fault_reasons[fault_reason];
 927        } else {
 928                *fault_type = UNKNOWN;
 929                return "Unknown";
 930        }
 931}
 932
 933void dmar_msi_unmask(unsigned int irq)
 934{
 935        struct intel_iommu *iommu = get_irq_data(irq);
 936        unsigned long flag;
 937
 938        /* unmask it */
 939        spin_lock_irqsave(&iommu->register_lock, flag);
 940        writel(0, iommu->reg + DMAR_FECTL_REG);
 941        /* Read a reg to force flush the post write */
 942        readl(iommu->reg + DMAR_FECTL_REG);
 943        spin_unlock_irqrestore(&iommu->register_lock, flag);
 944}
 945
 946void dmar_msi_mask(unsigned int irq)
 947{
 948        unsigned long flag;
 949        struct intel_iommu *iommu = get_irq_data(irq);
 950
 951        /* mask it */
 952        spin_lock_irqsave(&iommu->register_lock, flag);
 953        writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
 954        /* Read a reg to force flush the post write */
 955        readl(iommu->reg + DMAR_FECTL_REG);
 956        spin_unlock_irqrestore(&iommu->register_lock, flag);
 957}
 958
 959void dmar_msi_write(int irq, struct msi_msg *msg)
 960{
 961        struct intel_iommu *iommu = get_irq_data(irq);
 962        unsigned long flag;
 963
 964        spin_lock_irqsave(&iommu->register_lock, flag);
 965        writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
 966        writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
 967        writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
 968        spin_unlock_irqrestore(&iommu->register_lock, flag);
 969}
 970
 971void dmar_msi_read(int irq, struct msi_msg *msg)
 972{
 973        struct intel_iommu *iommu = get_irq_data(irq);
 974        unsigned long flag;
 975
 976        spin_lock_irqsave(&iommu->register_lock, flag);
 977        msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
 978        msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
 979        msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
 980        spin_unlock_irqrestore(&iommu->register_lock, flag);
 981}
 982
 983static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 984                u8 fault_reason, u16 source_id, unsigned long long addr)
 985{
 986        const char *reason;
 987        int fault_type;
 988
 989        reason = dmar_get_fault_reason(fault_reason, &fault_type);
 990
 991        if (fault_type == INTR_REMAP)
 992                printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
 993                       "fault index %llx\n"
 994                        "INTR-REMAP:[fault reason %02d] %s\n",
 995                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
 996                        PCI_FUNC(source_id & 0xFF), addr >> 48,
 997                        fault_reason, reason);
 998        else
 999                printk(KERN_ERR
1000                       "DMAR:[%s] Request device [%02x:%02x.%d] "
1001                       "fault addr %llx \n"
1002                       "DMAR:[fault reason %02d] %s\n",
1003                       (type ? "DMA Read" : "DMA Write"),
1004                       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1005                       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1006        return 0;
1007}
1008
1009#define PRIMARY_FAULT_REG_LEN (16)
1010irqreturn_t dmar_fault(int irq, void *dev_id)
1011{
1012        struct intel_iommu *iommu = dev_id;
1013        int reg, fault_index;
1014        u32 fault_status;
1015        unsigned long flag;
1016
1017        spin_lock_irqsave(&iommu->register_lock, flag);
1018        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1019        if (fault_status)
1020                printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1021                       fault_status);
1022
1023        /* TBD: ignore advanced fault log currently */
1024        if (!(fault_status & DMA_FSTS_PPF))
1025                goto clear_rest;
1026
1027        fault_index = dma_fsts_fault_record_index(fault_status);
1028        reg = cap_fault_reg_offset(iommu->cap);
1029        while (1) {
1030                u8 fault_reason;
1031                u16 source_id;
1032                u64 guest_addr;
1033                int type;
1034                u32 data;
1035
1036                /* highest 32 bits */
1037                data = readl(iommu->reg + reg +
1038                                fault_index * PRIMARY_FAULT_REG_LEN + 12);
1039                if (!(data & DMA_FRCD_F))
1040                        break;
1041
1042                fault_reason = dma_frcd_fault_reason(data);
1043                type = dma_frcd_type(data);
1044
1045                data = readl(iommu->reg + reg +
1046                                fault_index * PRIMARY_FAULT_REG_LEN + 8);
1047                source_id = dma_frcd_source_id(data);
1048
1049                guest_addr = dmar_readq(iommu->reg + reg +
1050                                fault_index * PRIMARY_FAULT_REG_LEN);
1051                guest_addr = dma_frcd_page_addr(guest_addr);
1052                /* clear the fault */
1053                writel(DMA_FRCD_F, iommu->reg + reg +
1054                        fault_index * PRIMARY_FAULT_REG_LEN + 12);
1055
1056                spin_unlock_irqrestore(&iommu->register_lock, flag);
1057
1058                dmar_fault_do_one(iommu, type, fault_reason,
1059                                source_id, guest_addr);
1060
1061                fault_index++;
1062                if (fault_index > cap_num_fault_regs(iommu->cap))
1063                        fault_index = 0;
1064                spin_lock_irqsave(&iommu->register_lock, flag);
1065        }
1066clear_rest:
1067        /* clear all the other faults */
1068        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1069        writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1070
1071        spin_unlock_irqrestore(&iommu->register_lock, flag);
1072        return IRQ_HANDLED;
1073}
1074
1075int dmar_set_interrupt(struct intel_iommu *iommu)
1076{
1077        int irq, ret;
1078
1079        /*
1080         * Check if the fault interrupt is already initialized.
1081         */
1082        if (iommu->irq)
1083                return 0;
1084
1085        irq = create_irq();
1086        if (!irq) {
1087                printk(KERN_ERR "IOMMU: no free vectors\n");
1088                return -EINVAL;
1089        }
1090
1091        set_irq_data(irq, iommu);
1092        iommu->irq = irq;
1093
1094        ret = arch_setup_dmar_msi(irq);
1095        if (ret) {
1096                set_irq_data(irq, NULL);
1097                iommu->irq = 0;
1098                destroy_irq(irq);
1099                return 0;
1100        }
1101
1102        ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1103        if (ret)
1104                printk(KERN_ERR "IOMMU: can't request irq\n");
1105        return ret;
1106}
1107
1108int __init enable_drhd_fault_handling(void)
1109{
1110        struct dmar_drhd_unit *drhd;
1111
1112        /*
1113         * Enable fault control interrupt.
1114         */
1115        for_each_drhd_unit(drhd) {
1116                int ret;
1117                struct intel_iommu *iommu = drhd->iommu;
1118                ret = dmar_set_interrupt(iommu);
1119
1120                if (ret) {
1121                        printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1122                               " interrupt, ret %d\n",
1123                               (unsigned long long)drhd->reg_base_addr, ret);
1124                        return -1;
1125                }
1126        }
1127
1128        return 0;
1129}
1130
1131/*
1132 * Re-enable Queued Invalidation interface.
1133 */
1134int dmar_reenable_qi(struct intel_iommu *iommu)
1135{
1136        if (!ecap_qis(iommu->ecap))
1137                return -ENOENT;
1138
1139        if (!iommu->qi)
1140                return -ENOENT;
1141
1142        /*
1143         * First disable queued invalidation.
1144         */
1145        dmar_disable_qi(iommu);
1146        /*
1147         * Then enable queued invalidation again. Since there is no pending
1148         * invalidation requests now, it's safe to re-enable queued
1149         * invalidation.
1150         */
1151        __dmar_enable_qi(iommu);
1152
1153        return 0;
1154}
1155