1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/export.h>
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
36#include <linux/timer.h>
37#include <linux/iova.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/syscore_ops.h>
41#include <linux/tboot.h>
42#include <linux/dmi.h>
43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
45#include <asm/irq_remapping.h>
46#include <asm/cacheflush.h>
47#include <asm/iommu.h>
48
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62#define MAX_AGAW_WIDTH 64
63
64#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
65#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66
67
68
69#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
70 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
71#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72
73#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
74#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
75#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
76
77
78#define LEVEL_STRIDE (9)
79#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
98
99static inline int agaw_to_level(int agaw)
100{
101 return agaw + 2;
102}
103
104static inline int agaw_to_width(int agaw)
105{
106 return 30 + agaw * LEVEL_STRIDE;
107}
108
109static inline int width_to_agaw(int width)
110{
111 return (width - 30) / LEVEL_STRIDE;
112}
113
114static inline unsigned int level_to_offset_bits(int level)
115{
116 return (level - 1) * LEVEL_STRIDE;
117}
118
119static inline int pfn_level_offset(unsigned long pfn, int level)
120{
121 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
122}
123
124static inline unsigned long level_mask(int level)
125{
126 return -1UL << level_to_offset_bits(level);
127}
128
129static inline unsigned long level_size(int level)
130{
131 return 1UL << level_to_offset_bits(level);
132}
133
134static inline unsigned long align_to_level(unsigned long pfn, int level)
135{
136 return (pfn + level_size(level) - 1) & level_mask(level);
137}
138
139static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
140{
141 return 1 << ((lvl - 1) * LEVEL_STRIDE);
142}
143
144
145
146static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
147{
148 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
149}
150
151static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
152{
153 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
154}
155static inline unsigned long page_to_dma_pfn(struct page *pg)
156{
157 return mm_to_dma_pfn(page_to_pfn(pg));
158}
159static inline unsigned long virt_to_dma_pfn(void *p)
160{
161 return page_to_dma_pfn(virt_to_page(p));
162}
163
164
165static struct intel_iommu **g_iommus;
166
167static void __init check_tylersburg_isoch(void);
168static int rwbf_quirk;
169
170
171
172
173
174static int force_on = 0;
175
176
177
178
179
180
181
182struct root_entry {
183 u64 val;
184 u64 rsvd1;
185};
186#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
187static inline bool root_present(struct root_entry *root)
188{
189 return (root->val & 1);
190}
191static inline void set_root_present(struct root_entry *root)
192{
193 root->val |= 1;
194}
195static inline void set_root_value(struct root_entry *root, unsigned long value)
196{
197 root->val |= value & VTD_PAGE_MASK;
198}
199
200static inline struct context_entry *
201get_context_addr_from_root(struct root_entry *root)
202{
203 return (struct context_entry *)
204 (root_present(root)?phys_to_virt(
205 root->val & VTD_PAGE_MASK) :
206 NULL);
207}
208
209
210
211
212
213
214
215
216
217
218
219
220struct context_entry {
221 u64 lo;
222 u64 hi;
223};
224
225static inline bool context_present(struct context_entry *context)
226{
227 return (context->lo & 1);
228}
229static inline void context_set_present(struct context_entry *context)
230{
231 context->lo |= 1;
232}
233
234static inline void context_set_fault_enable(struct context_entry *context)
235{
236 context->lo &= (((u64)-1) << 2) | 1;
237}
238
239static inline void context_set_translation_type(struct context_entry *context,
240 unsigned long value)
241{
242 context->lo &= (((u64)-1) << 4) | 3;
243 context->lo |= (value & 3) << 2;
244}
245
246static inline void context_set_address_root(struct context_entry *context,
247 unsigned long value)
248{
249 context->lo |= value & VTD_PAGE_MASK;
250}
251
252static inline void context_set_address_width(struct context_entry *context,
253 unsigned long value)
254{
255 context->hi |= value & 7;
256}
257
258static inline void context_set_domain_id(struct context_entry *context,
259 unsigned long value)
260{
261 context->hi |= (value & ((1 << 16) - 1)) << 8;
262}
263
264static inline void context_clear_entry(struct context_entry *context)
265{
266 context->lo = 0;
267 context->hi = 0;
268}
269
270
271
272
273
274
275
276
277
278
279struct dma_pte {
280 u64 val;
281};
282
283static inline void dma_clear_pte(struct dma_pte *pte)
284{
285 pte->val = 0;
286}
287
288static inline void dma_set_pte_readable(struct dma_pte *pte)
289{
290 pte->val |= DMA_PTE_READ;
291}
292
293static inline void dma_set_pte_writable(struct dma_pte *pte)
294{
295 pte->val |= DMA_PTE_WRITE;
296}
297
298static inline void dma_set_pte_snp(struct dma_pte *pte)
299{
300 pte->val |= DMA_PTE_SNP;
301}
302
303static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
304{
305 pte->val = (pte->val & ~3) | (prot & 3);
306}
307
308static inline u64 dma_pte_addr(struct dma_pte *pte)
309{
310#ifdef CONFIG_64BIT
311 return pte->val & VTD_PAGE_MASK;
312#else
313
314 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
315#endif
316}
317
318static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
319{
320 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
321}
322
323static inline bool dma_pte_present(struct dma_pte *pte)
324{
325 return (pte->val & 3) != 0;
326}
327
328static inline bool dma_pte_superpage(struct dma_pte *pte)
329{
330 return (pte->val & (1 << 7));
331}
332
333static inline int first_pte_in_page(struct dma_pte *pte)
334{
335 return !((unsigned long)pte & ~VTD_PAGE_MASK);
336}
337
338
339
340
341
342
343
344static struct dmar_domain *si_domain;
345static int hw_pass_through = 1;
346
347
348#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
349
350
351
352
353#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
354
355
356#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
357
358
359#ifdef CONFIG_X86
360# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
361#else
362# define IOMMU_UNITS_SUPPORTED 64
363#endif
364
365struct dmar_domain {
366 int id;
367 int nid;
368 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
369
370
371 struct list_head devices;
372 struct iova_domain iovad;
373
374 struct dma_pte *pgd;
375 int gaw;
376
377
378 int agaw;
379
380 int flags;
381
382 int iommu_coherency;
383 int iommu_snooping;
384 int iommu_count;
385 int iommu_superpage;
386
387
388 spinlock_t iommu_lock;
389 u64 max_addr;
390};
391
392
393struct device_domain_info {
394 struct list_head link;
395 struct list_head global;
396 int segment;
397 u8 bus;
398 u8 devfn;
399 struct pci_dev *dev;
400 struct intel_iommu *iommu;
401 struct dmar_domain *domain;
402};
403
404static void flush_unmaps_timeout(unsigned long data);
405
406DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
407
408#define HIGH_WATER_MARK 250
409struct deferred_flush_tables {
410 int next;
411 struct iova *iova[HIGH_WATER_MARK];
412 struct dmar_domain *domain[HIGH_WATER_MARK];
413};
414
415static struct deferred_flush_tables *deferred_flush;
416
417
418static int g_num_of_iommus;
419
420static DEFINE_SPINLOCK(async_umap_flush_lock);
421static LIST_HEAD(unmaps_to_do);
422
423static int timer_on;
424static long list_size;
425
426static void domain_remove_dev_info(struct dmar_domain *domain);
427
428#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
429int dmar_disabled = 0;
430#else
431int dmar_disabled = 1;
432#endif
433
434int intel_iommu_enabled = 0;
435EXPORT_SYMBOL_GPL(intel_iommu_enabled);
436
437static int dmar_map_gfx = 1;
438static int dmar_forcedac;
439static int intel_iommu_strict;
440static int intel_iommu_superpage = 1;
441
442int intel_iommu_gfx_mapped;
443EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
444
445#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
446static DEFINE_SPINLOCK(device_domain_lock);
447static LIST_HEAD(device_domain_list);
448
449static struct iommu_ops intel_iommu_ops;
450
451static int __init intel_iommu_setup(char *str)
452{
453 if (!str)
454 return -EINVAL;
455 while (*str) {
456 if (!strncmp(str, "on", 2)) {
457 dmar_disabled = 0;
458 printk(KERN_INFO "Intel-IOMMU: enabled\n");
459 } else if (!strncmp(str, "off", 3)) {
460 dmar_disabled = 1;
461 printk(KERN_INFO "Intel-IOMMU: disabled\n");
462 } else if (!strncmp(str, "igfx_off", 8)) {
463 dmar_map_gfx = 0;
464 printk(KERN_INFO
465 "Intel-IOMMU: disable GFX device mapping\n");
466 } else if (!strncmp(str, "forcedac", 8)) {
467 printk(KERN_INFO
468 "Intel-IOMMU: Forcing DAC for PCI devices\n");
469 dmar_forcedac = 1;
470 } else if (!strncmp(str, "strict", 6)) {
471 printk(KERN_INFO
472 "Intel-IOMMU: disable batched IOTLB flush\n");
473 intel_iommu_strict = 1;
474 } else if (!strncmp(str, "sp_off", 6)) {
475 printk(KERN_INFO
476 "Intel-IOMMU: disable supported super page\n");
477 intel_iommu_superpage = 0;
478 }
479
480 str += strcspn(str, ",");
481 while (*str == ',')
482 str++;
483 }
484 return 0;
485}
486__setup("intel_iommu=", intel_iommu_setup);
487
488static struct kmem_cache *iommu_domain_cache;
489static struct kmem_cache *iommu_devinfo_cache;
490static struct kmem_cache *iommu_iova_cache;
491
492static inline void *alloc_pgtable_page(int node)
493{
494 struct page *page;
495 void *vaddr = NULL;
496
497 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
498 if (page)
499 vaddr = page_address(page);
500 return vaddr;
501}
502
503static inline void free_pgtable_page(void *vaddr)
504{
505 free_page((unsigned long)vaddr);
506}
507
508static inline void *alloc_domain_mem(void)
509{
510 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
511}
512
513static void free_domain_mem(void *vaddr)
514{
515 kmem_cache_free(iommu_domain_cache, vaddr);
516}
517
518static inline void * alloc_devinfo_mem(void)
519{
520 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
521}
522
523static inline void free_devinfo_mem(void *vaddr)
524{
525 kmem_cache_free(iommu_devinfo_cache, vaddr);
526}
527
528struct iova *alloc_iova_mem(void)
529{
530 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
531}
532
533void free_iova_mem(struct iova *iova)
534{
535 kmem_cache_free(iommu_iova_cache, iova);
536}
537
538
539static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
540{
541 unsigned long sagaw;
542 int agaw = -1;
543
544 sagaw = cap_sagaw(iommu->cap);
545 for (agaw = width_to_agaw(max_gaw);
546 agaw >= 0; agaw--) {
547 if (test_bit(agaw, &sagaw))
548 break;
549 }
550
551 return agaw;
552}
553
554
555
556
557int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
558{
559 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
560}
561
562
563
564
565
566
567int iommu_calculate_agaw(struct intel_iommu *iommu)
568{
569 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
570}
571
572
573static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
574{
575 int iommu_id;
576
577
578 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
579 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
580
581 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
582 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
583 return NULL;
584
585 return g_iommus[iommu_id];
586}
587
588static void domain_update_iommu_coherency(struct dmar_domain *domain)
589{
590 int i;
591
592 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
593
594 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
595
596 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
597 if (!ecap_coherent(g_iommus[i]->ecap)) {
598 domain->iommu_coherency = 0;
599 break;
600 }
601 }
602}
603
604static void domain_update_iommu_snooping(struct dmar_domain *domain)
605{
606 int i;
607
608 domain->iommu_snooping = 1;
609
610 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
611 if (!ecap_sc_support(g_iommus[i]->ecap)) {
612 domain->iommu_snooping = 0;
613 break;
614 }
615 }
616}
617
618static void domain_update_iommu_superpage(struct dmar_domain *domain)
619{
620 struct dmar_drhd_unit *drhd;
621 struct intel_iommu *iommu = NULL;
622 int mask = 0xf;
623
624 if (!intel_iommu_superpage) {
625 domain->iommu_superpage = 0;
626 return;
627 }
628
629
630 for_each_active_iommu(iommu, drhd) {
631 mask &= cap_super_page_val(iommu->cap);
632 if (!mask) {
633 break;
634 }
635 }
636 domain->iommu_superpage = fls(mask);
637}
638
639
640static void domain_update_iommu_cap(struct dmar_domain *domain)
641{
642 domain_update_iommu_coherency(domain);
643 domain_update_iommu_snooping(domain);
644 domain_update_iommu_superpage(domain);
645}
646
647static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
648{
649 struct dmar_drhd_unit *drhd = NULL;
650 int i;
651
652 for_each_drhd_unit(drhd) {
653 if (drhd->ignored)
654 continue;
655 if (segment != drhd->segment)
656 continue;
657
658 for (i = 0; i < drhd->devices_cnt; i++) {
659 if (drhd->devices[i] &&
660 drhd->devices[i]->bus->number == bus &&
661 drhd->devices[i]->devfn == devfn)
662 return drhd->iommu;
663 if (drhd->devices[i] &&
664 drhd->devices[i]->subordinate &&
665 drhd->devices[i]->subordinate->number <= bus &&
666 drhd->devices[i]->subordinate->busn_res.end >= bus)
667 return drhd->iommu;
668 }
669
670 if (drhd->include_all)
671 return drhd->iommu;
672 }
673
674 return NULL;
675}
676
677static void domain_flush_cache(struct dmar_domain *domain,
678 void *addr, int size)
679{
680 if (!domain->iommu_coherency)
681 clflush_cache_range(addr, size);
682}
683
684
685static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
686 u8 bus, u8 devfn)
687{
688 struct root_entry *root;
689 struct context_entry *context;
690 unsigned long phy_addr;
691 unsigned long flags;
692
693 spin_lock_irqsave(&iommu->lock, flags);
694 root = &iommu->root_entry[bus];
695 context = get_context_addr_from_root(root);
696 if (!context) {
697 context = (struct context_entry *)
698 alloc_pgtable_page(iommu->node);
699 if (!context) {
700 spin_unlock_irqrestore(&iommu->lock, flags);
701 return NULL;
702 }
703 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
704 phy_addr = virt_to_phys((void *)context);
705 set_root_value(root, phy_addr);
706 set_root_present(root);
707 __iommu_flush_cache(iommu, root, sizeof(*root));
708 }
709 spin_unlock_irqrestore(&iommu->lock, flags);
710 return &context[devfn];
711}
712
713static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
714{
715 struct root_entry *root;
716 struct context_entry *context;
717 int ret;
718 unsigned long flags;
719
720 spin_lock_irqsave(&iommu->lock, flags);
721 root = &iommu->root_entry[bus];
722 context = get_context_addr_from_root(root);
723 if (!context) {
724 ret = 0;
725 goto out;
726 }
727 ret = context_present(&context[devfn]);
728out:
729 spin_unlock_irqrestore(&iommu->lock, flags);
730 return ret;
731}
732
733static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
734{
735 struct root_entry *root;
736 struct context_entry *context;
737 unsigned long flags;
738
739 spin_lock_irqsave(&iommu->lock, flags);
740 root = &iommu->root_entry[bus];
741 context = get_context_addr_from_root(root);
742 if (context) {
743 context_clear_entry(&context[devfn]);
744 __iommu_flush_cache(iommu, &context[devfn], \
745 sizeof(*context));
746 }
747 spin_unlock_irqrestore(&iommu->lock, flags);
748}
749
750static void free_context_table(struct intel_iommu *iommu)
751{
752 struct root_entry *root;
753 int i;
754 unsigned long flags;
755 struct context_entry *context;
756
757 spin_lock_irqsave(&iommu->lock, flags);
758 if (!iommu->root_entry) {
759 goto out;
760 }
761 for (i = 0; i < ROOT_ENTRY_NR; i++) {
762 root = &iommu->root_entry[i];
763 context = get_context_addr_from_root(root);
764 if (context)
765 free_pgtable_page(context);
766 }
767 free_pgtable_page(iommu->root_entry);
768 iommu->root_entry = NULL;
769out:
770 spin_unlock_irqrestore(&iommu->lock, flags);
771}
772
773static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
774 unsigned long pfn, int target_level)
775{
776 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
777 struct dma_pte *parent, *pte = NULL;
778 int level = agaw_to_level(domain->agaw);
779 int offset;
780
781 BUG_ON(!domain->pgd);
782 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
783 parent = domain->pgd;
784
785 while (level > 0) {
786 void *tmp_page;
787
788 offset = pfn_level_offset(pfn, level);
789 pte = &parent[offset];
790 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
791 break;
792 if (level == target_level)
793 break;
794
795 if (!dma_pte_present(pte)) {
796 uint64_t pteval;
797
798 tmp_page = alloc_pgtable_page(domain->nid);
799
800 if (!tmp_page)
801 return NULL;
802
803 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
804 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
805 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
806
807 free_pgtable_page(tmp_page);
808 } else {
809 dma_pte_addr(pte);
810 domain_flush_cache(domain, pte, sizeof(*pte));
811 }
812 }
813 parent = phys_to_virt(dma_pte_addr(pte));
814 level--;
815 }
816
817 return pte;
818}
819
820
821
822static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
823 unsigned long pfn,
824 int level, int *large_page)
825{
826 struct dma_pte *parent, *pte = NULL;
827 int total = agaw_to_level(domain->agaw);
828 int offset;
829
830 parent = domain->pgd;
831 while (level <= total) {
832 offset = pfn_level_offset(pfn, total);
833 pte = &parent[offset];
834 if (level == total)
835 return pte;
836
837 if (!dma_pte_present(pte)) {
838 *large_page = total;
839 break;
840 }
841
842 if (pte->val & DMA_PTE_LARGE_PAGE) {
843 *large_page = total;
844 return pte;
845 }
846
847 parent = phys_to_virt(dma_pte_addr(pte));
848 total--;
849 }
850 return NULL;
851}
852
853
854static int dma_pte_clear_range(struct dmar_domain *domain,
855 unsigned long start_pfn,
856 unsigned long last_pfn)
857{
858 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
859 unsigned int large_page = 1;
860 struct dma_pte *first_pte, *pte;
861 int order;
862
863 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
864 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
865 BUG_ON(start_pfn > last_pfn);
866
867
868 do {
869 large_page = 1;
870 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
871 if (!pte) {
872 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
873 continue;
874 }
875 do {
876 dma_clear_pte(pte);
877 start_pfn += lvl_to_nr_pages(large_page);
878 pte++;
879 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
880
881 domain_flush_cache(domain, first_pte,
882 (void *)pte - (void *)first_pte);
883
884 } while (start_pfn && start_pfn <= last_pfn);
885
886 order = (large_page - 1) * 9;
887 return order;
888}
889
890
891static void dma_pte_free_pagetable(struct dmar_domain *domain,
892 unsigned long start_pfn,
893 unsigned long last_pfn)
894{
895 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
896 struct dma_pte *first_pte, *pte;
897 int total = agaw_to_level(domain->agaw);
898 int level;
899 unsigned long tmp;
900 int large_page = 2;
901
902 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
903 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
904 BUG_ON(start_pfn > last_pfn);
905
906
907 level = 2;
908 while (level <= total) {
909 tmp = align_to_level(start_pfn, level);
910
911
912 if (tmp + level_size(level) - 1 > last_pfn)
913 return;
914
915 do {
916 large_page = level;
917 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
918 if (large_page > level)
919 level = large_page + 1;
920 if (!pte) {
921 tmp = align_to_level(tmp + 1, level + 1);
922 continue;
923 }
924 do {
925 if (dma_pte_present(pte)) {
926 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
927 dma_clear_pte(pte);
928 }
929 pte++;
930 tmp += level_size(level);
931 } while (!first_pte_in_page(pte) &&
932 tmp + level_size(level) - 1 <= last_pfn);
933
934 domain_flush_cache(domain, first_pte,
935 (void *)pte - (void *)first_pte);
936
937 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
938 level++;
939 }
940
941 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
942 free_pgtable_page(domain->pgd);
943 domain->pgd = NULL;
944 }
945}
946
947
948static int iommu_alloc_root_entry(struct intel_iommu *iommu)
949{
950 struct root_entry *root;
951 unsigned long flags;
952
953 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
954 if (!root)
955 return -ENOMEM;
956
957 __iommu_flush_cache(iommu, root, ROOT_SIZE);
958
959 spin_lock_irqsave(&iommu->lock, flags);
960 iommu->root_entry = root;
961 spin_unlock_irqrestore(&iommu->lock, flags);
962
963 return 0;
964}
965
966static void iommu_set_root_entry(struct intel_iommu *iommu)
967{
968 void *addr;
969 u32 sts;
970 unsigned long flag;
971
972 addr = iommu->root_entry;
973
974 raw_spin_lock_irqsave(&iommu->register_lock, flag);
975 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
976
977 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
978
979
980 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
981 readl, (sts & DMA_GSTS_RTPS), sts);
982
983 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
984}
985
986static void iommu_flush_write_buffer(struct intel_iommu *iommu)
987{
988 u32 val;
989 unsigned long flag;
990
991 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
992 return;
993
994 raw_spin_lock_irqsave(&iommu->register_lock, flag);
995 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
996
997
998 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
999 readl, (!(val & DMA_GSTS_WBFS)), val);
1000
1001 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1002}
1003
1004
1005static void __iommu_flush_context(struct intel_iommu *iommu,
1006 u16 did, u16 source_id, u8 function_mask,
1007 u64 type)
1008{
1009 u64 val = 0;
1010 unsigned long flag;
1011
1012 switch (type) {
1013 case DMA_CCMD_GLOBAL_INVL:
1014 val = DMA_CCMD_GLOBAL_INVL;
1015 break;
1016 case DMA_CCMD_DOMAIN_INVL:
1017 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1018 break;
1019 case DMA_CCMD_DEVICE_INVL:
1020 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1021 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1022 break;
1023 default:
1024 BUG();
1025 }
1026 val |= DMA_CCMD_ICC;
1027
1028 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1029 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1030
1031
1032 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1033 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1034
1035 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1036}
1037
1038
1039static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1040 u64 addr, unsigned int size_order, u64 type)
1041{
1042 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1043 u64 val = 0, val_iva = 0;
1044 unsigned long flag;
1045
1046 switch (type) {
1047 case DMA_TLB_GLOBAL_FLUSH:
1048
1049 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1050 break;
1051 case DMA_TLB_DSI_FLUSH:
1052 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1053 break;
1054 case DMA_TLB_PSI_FLUSH:
1055 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1056
1057 val_iva = size_order | addr;
1058 break;
1059 default:
1060 BUG();
1061 }
1062
1063#if 0
1064
1065
1066
1067
1068 if (cap_read_drain(iommu->cap))
1069 val |= DMA_TLB_READ_DRAIN;
1070#endif
1071 if (cap_write_drain(iommu->cap))
1072 val |= DMA_TLB_WRITE_DRAIN;
1073
1074 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1075
1076 if (val_iva)
1077 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1078 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1079
1080
1081 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1082 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1083
1084 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1085
1086
1087 if (DMA_TLB_IAIG(val) == 0)
1088 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1089 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1090 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1091 (unsigned long long)DMA_TLB_IIRG(type),
1092 (unsigned long long)DMA_TLB_IAIG(val));
1093}
1094
1095static struct device_domain_info *iommu_support_dev_iotlb(
1096 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1097{
1098 int found = 0;
1099 unsigned long flags;
1100 struct device_domain_info *info;
1101 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1102
1103 if (!ecap_dev_iotlb_support(iommu->ecap))
1104 return NULL;
1105
1106 if (!iommu->qi)
1107 return NULL;
1108
1109 spin_lock_irqsave(&device_domain_lock, flags);
1110 list_for_each_entry(info, &domain->devices, link)
1111 if (info->bus == bus && info->devfn == devfn) {
1112 found = 1;
1113 break;
1114 }
1115 spin_unlock_irqrestore(&device_domain_lock, flags);
1116
1117 if (!found || !info->dev)
1118 return NULL;
1119
1120 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1121 return NULL;
1122
1123 if (!dmar_find_matched_atsr_unit(info->dev))
1124 return NULL;
1125
1126 info->iommu = iommu;
1127
1128 return info;
1129}
1130
1131static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1132{
1133 if (!info)
1134 return;
1135
1136 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1137}
1138
1139static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1140{
1141 if (!info->dev || !pci_ats_enabled(info->dev))
1142 return;
1143
1144 pci_disable_ats(info->dev);
1145}
1146
1147static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1148 u64 addr, unsigned mask)
1149{
1150 u16 sid, qdep;
1151 unsigned long flags;
1152 struct device_domain_info *info;
1153
1154 spin_lock_irqsave(&device_domain_lock, flags);
1155 list_for_each_entry(info, &domain->devices, link) {
1156 if (!info->dev || !pci_ats_enabled(info->dev))
1157 continue;
1158
1159 sid = info->bus << 8 | info->devfn;
1160 qdep = pci_ats_queue_depth(info->dev);
1161 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1162 }
1163 spin_unlock_irqrestore(&device_domain_lock, flags);
1164}
1165
1166static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1167 unsigned long pfn, unsigned int pages, int map)
1168{
1169 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1170 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1171
1172 BUG_ON(pages == 0);
1173
1174
1175
1176
1177
1178
1179
1180 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1181 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1182 DMA_TLB_DSI_FLUSH);
1183 else
1184 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1185 DMA_TLB_PSI_FLUSH);
1186
1187
1188
1189
1190
1191 if (!cap_caching_mode(iommu->cap) || !map)
1192 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1193}
1194
1195static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1196{
1197 u32 pmen;
1198 unsigned long flags;
1199
1200 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1201 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1202 pmen &= ~DMA_PMEN_EPM;
1203 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1204
1205
1206 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1207 readl, !(pmen & DMA_PMEN_PRS), pmen);
1208
1209 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1210}
1211
1212static int iommu_enable_translation(struct intel_iommu *iommu)
1213{
1214 u32 sts;
1215 unsigned long flags;
1216
1217 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1218 iommu->gcmd |= DMA_GCMD_TE;
1219 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1220
1221
1222 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1223 readl, (sts & DMA_GSTS_TES), sts);
1224
1225 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1226 return 0;
1227}
1228
1229static int iommu_disable_translation(struct intel_iommu *iommu)
1230{
1231 u32 sts;
1232 unsigned long flag;
1233
1234 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1235 iommu->gcmd &= ~DMA_GCMD_TE;
1236 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1237
1238
1239 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1240 readl, (!(sts & DMA_GSTS_TES)), sts);
1241
1242 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1243 return 0;
1244}
1245
1246
1247static int iommu_init_domains(struct intel_iommu *iommu)
1248{
1249 unsigned long ndomains;
1250 unsigned long nlongs;
1251
1252 ndomains = cap_ndoms(iommu->cap);
1253 pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id,
1254 ndomains);
1255 nlongs = BITS_TO_LONGS(ndomains);
1256
1257 spin_lock_init(&iommu->lock);
1258
1259
1260
1261
1262 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1263 if (!iommu->domain_ids) {
1264 printk(KERN_ERR "Allocating domain id array failed\n");
1265 return -ENOMEM;
1266 }
1267 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1268 GFP_KERNEL);
1269 if (!iommu->domains) {
1270 printk(KERN_ERR "Allocating domain array failed\n");
1271 return -ENOMEM;
1272 }
1273
1274
1275
1276
1277
1278 if (cap_caching_mode(iommu->cap))
1279 set_bit(0, iommu->domain_ids);
1280 return 0;
1281}
1282
1283
1284static void domain_exit(struct dmar_domain *domain);
1285static void vm_domain_exit(struct dmar_domain *domain);
1286
1287void free_dmar_iommu(struct intel_iommu *iommu)
1288{
1289 struct dmar_domain *domain;
1290 int i;
1291 unsigned long flags;
1292
1293 if ((iommu->domains) && (iommu->domain_ids)) {
1294 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1295 domain = iommu->domains[i];
1296 clear_bit(i, iommu->domain_ids);
1297
1298 spin_lock_irqsave(&domain->iommu_lock, flags);
1299 if (--domain->iommu_count == 0) {
1300 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1301 vm_domain_exit(domain);
1302 else
1303 domain_exit(domain);
1304 }
1305 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1306 }
1307 }
1308
1309 if (iommu->gcmd & DMA_GCMD_TE)
1310 iommu_disable_translation(iommu);
1311
1312 if (iommu->irq) {
1313 irq_set_handler_data(iommu->irq, NULL);
1314
1315 free_irq(iommu->irq, iommu);
1316 destroy_irq(iommu->irq);
1317 }
1318
1319 kfree(iommu->domains);
1320 kfree(iommu->domain_ids);
1321
1322 g_iommus[iommu->seq_id] = NULL;
1323
1324
1325 for (i = 0; i < g_num_of_iommus; i++) {
1326 if (g_iommus[i])
1327 break;
1328 }
1329
1330 if (i == g_num_of_iommus)
1331 kfree(g_iommus);
1332
1333
1334 free_context_table(iommu);
1335}
1336
1337static struct dmar_domain *alloc_domain(void)
1338{
1339 struct dmar_domain *domain;
1340
1341 domain = alloc_domain_mem();
1342 if (!domain)
1343 return NULL;
1344
1345 domain->nid = -1;
1346 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1347 domain->flags = 0;
1348
1349 return domain;
1350}
1351
1352static int iommu_attach_domain(struct dmar_domain *domain,
1353 struct intel_iommu *iommu)
1354{
1355 int num;
1356 unsigned long ndomains;
1357 unsigned long flags;
1358
1359 ndomains = cap_ndoms(iommu->cap);
1360
1361 spin_lock_irqsave(&iommu->lock, flags);
1362
1363 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1364 if (num >= ndomains) {
1365 spin_unlock_irqrestore(&iommu->lock, flags);
1366 printk(KERN_ERR "IOMMU: no free domain ids\n");
1367 return -ENOMEM;
1368 }
1369
1370 domain->id = num;
1371 set_bit(num, iommu->domain_ids);
1372 set_bit(iommu->seq_id, domain->iommu_bmp);
1373 iommu->domains[num] = domain;
1374 spin_unlock_irqrestore(&iommu->lock, flags);
1375
1376 return 0;
1377}
1378
1379static void iommu_detach_domain(struct dmar_domain *domain,
1380 struct intel_iommu *iommu)
1381{
1382 unsigned long flags;
1383 int num, ndomains;
1384 int found = 0;
1385
1386 spin_lock_irqsave(&iommu->lock, flags);
1387 ndomains = cap_ndoms(iommu->cap);
1388 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1389 if (iommu->domains[num] == domain) {
1390 found = 1;
1391 break;
1392 }
1393 }
1394
1395 if (found) {
1396 clear_bit(num, iommu->domain_ids);
1397 clear_bit(iommu->seq_id, domain->iommu_bmp);
1398 iommu->domains[num] = NULL;
1399 }
1400 spin_unlock_irqrestore(&iommu->lock, flags);
1401}
1402
1403static struct iova_domain reserved_iova_list;
1404static struct lock_class_key reserved_rbtree_key;
1405
1406static int dmar_init_reserved_ranges(void)
1407{
1408 struct pci_dev *pdev = NULL;
1409 struct iova *iova;
1410 int i;
1411
1412 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1413
1414 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1415 &reserved_rbtree_key);
1416
1417
1418 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1419 IOVA_PFN(IOAPIC_RANGE_END));
1420 if (!iova) {
1421 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1422 return -ENODEV;
1423 }
1424
1425
1426 for_each_pci_dev(pdev) {
1427 struct resource *r;
1428
1429 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1430 r = &pdev->resource[i];
1431 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1432 continue;
1433 iova = reserve_iova(&reserved_iova_list,
1434 IOVA_PFN(r->start),
1435 IOVA_PFN(r->end));
1436 if (!iova) {
1437 printk(KERN_ERR "Reserve iova failed\n");
1438 return -ENODEV;
1439 }
1440 }
1441 }
1442 return 0;
1443}
1444
1445static void domain_reserve_special_ranges(struct dmar_domain *domain)
1446{
1447 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1448}
1449
1450static inline int guestwidth_to_adjustwidth(int gaw)
1451{
1452 int agaw;
1453 int r = (gaw - 12) % 9;
1454
1455 if (r == 0)
1456 agaw = gaw;
1457 else
1458 agaw = gaw + 9 - r;
1459 if (agaw > 64)
1460 agaw = 64;
1461 return agaw;
1462}
1463
1464static int domain_init(struct dmar_domain *domain, int guest_width)
1465{
1466 struct intel_iommu *iommu;
1467 int adjust_width, agaw;
1468 unsigned long sagaw;
1469
1470 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1471 spin_lock_init(&domain->iommu_lock);
1472
1473 domain_reserve_special_ranges(domain);
1474
1475
1476 iommu = domain_get_iommu(domain);
1477 if (guest_width > cap_mgaw(iommu->cap))
1478 guest_width = cap_mgaw(iommu->cap);
1479 domain->gaw = guest_width;
1480 adjust_width = guestwidth_to_adjustwidth(guest_width);
1481 agaw = width_to_agaw(adjust_width);
1482 sagaw = cap_sagaw(iommu->cap);
1483 if (!test_bit(agaw, &sagaw)) {
1484
1485 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1486 agaw = find_next_bit(&sagaw, 5, agaw);
1487 if (agaw >= 5)
1488 return -ENODEV;
1489 }
1490 domain->agaw = agaw;
1491 INIT_LIST_HEAD(&domain->devices);
1492
1493 if (ecap_coherent(iommu->ecap))
1494 domain->iommu_coherency = 1;
1495 else
1496 domain->iommu_coherency = 0;
1497
1498 if (ecap_sc_support(iommu->ecap))
1499 domain->iommu_snooping = 1;
1500 else
1501 domain->iommu_snooping = 0;
1502
1503 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1504 domain->iommu_count = 1;
1505 domain->nid = iommu->node;
1506
1507
1508 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1509 if (!domain->pgd)
1510 return -ENOMEM;
1511 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1512 return 0;
1513}
1514
1515static void domain_exit(struct dmar_domain *domain)
1516{
1517 struct dmar_drhd_unit *drhd;
1518 struct intel_iommu *iommu;
1519
1520
1521 if (!domain)
1522 return;
1523
1524
1525 if (!intel_iommu_strict)
1526 flush_unmaps_timeout(0);
1527
1528 domain_remove_dev_info(domain);
1529
1530 put_iova_domain(&domain->iovad);
1531
1532
1533 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1534
1535
1536 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1537
1538 for_each_active_iommu(iommu, drhd)
1539 if (test_bit(iommu->seq_id, domain->iommu_bmp))
1540 iommu_detach_domain(domain, iommu);
1541
1542 free_domain_mem(domain);
1543}
1544
1545static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1546 u8 bus, u8 devfn, int translation)
1547{
1548 struct context_entry *context;
1549 unsigned long flags;
1550 struct intel_iommu *iommu;
1551 struct dma_pte *pgd;
1552 unsigned long num;
1553 unsigned long ndomains;
1554 int id;
1555 int agaw;
1556 struct device_domain_info *info = NULL;
1557
1558 pr_debug("Set context mapping for %02x:%02x.%d\n",
1559 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1560
1561 BUG_ON(!domain->pgd);
1562 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1563 translation != CONTEXT_TT_MULTI_LEVEL);
1564
1565 iommu = device_to_iommu(segment, bus, devfn);
1566 if (!iommu)
1567 return -ENODEV;
1568
1569 context = device_to_context_entry(iommu, bus, devfn);
1570 if (!context)
1571 return -ENOMEM;
1572 spin_lock_irqsave(&iommu->lock, flags);
1573 if (context_present(context)) {
1574 spin_unlock_irqrestore(&iommu->lock, flags);
1575 return 0;
1576 }
1577
1578 id = domain->id;
1579 pgd = domain->pgd;
1580
1581 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1582 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1583 int found = 0;
1584
1585
1586 ndomains = cap_ndoms(iommu->cap);
1587 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1588 if (iommu->domains[num] == domain) {
1589 id = num;
1590 found = 1;
1591 break;
1592 }
1593 }
1594
1595 if (found == 0) {
1596 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1597 if (num >= ndomains) {
1598 spin_unlock_irqrestore(&iommu->lock, flags);
1599 printk(KERN_ERR "IOMMU: no free domain ids\n");
1600 return -EFAULT;
1601 }
1602
1603 set_bit(num, iommu->domain_ids);
1604 iommu->domains[num] = domain;
1605 id = num;
1606 }
1607
1608
1609
1610
1611
1612 if (translation != CONTEXT_TT_PASS_THROUGH) {
1613 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1614 pgd = phys_to_virt(dma_pte_addr(pgd));
1615 if (!dma_pte_present(pgd)) {
1616 spin_unlock_irqrestore(&iommu->lock, flags);
1617 return -ENOMEM;
1618 }
1619 }
1620 }
1621 }
1622
1623 context_set_domain_id(context, id);
1624
1625 if (translation != CONTEXT_TT_PASS_THROUGH) {
1626 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1627 translation = info ? CONTEXT_TT_DEV_IOTLB :
1628 CONTEXT_TT_MULTI_LEVEL;
1629 }
1630
1631
1632
1633
1634 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1635 context_set_address_width(context, iommu->msagaw);
1636 else {
1637 context_set_address_root(context, virt_to_phys(pgd));
1638 context_set_address_width(context, iommu->agaw);
1639 }
1640
1641 context_set_translation_type(context, translation);
1642 context_set_fault_enable(context);
1643 context_set_present(context);
1644 domain_flush_cache(domain, context, sizeof(*context));
1645
1646
1647
1648
1649
1650
1651
1652 if (cap_caching_mode(iommu->cap)) {
1653 iommu->flush.flush_context(iommu, 0,
1654 (((u16)bus) << 8) | devfn,
1655 DMA_CCMD_MASK_NOBIT,
1656 DMA_CCMD_DEVICE_INVL);
1657 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1658 } else {
1659 iommu_flush_write_buffer(iommu);
1660 }
1661 iommu_enable_dev_iotlb(info);
1662 spin_unlock_irqrestore(&iommu->lock, flags);
1663
1664 spin_lock_irqsave(&domain->iommu_lock, flags);
1665 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1666 domain->iommu_count++;
1667 if (domain->iommu_count == 1)
1668 domain->nid = iommu->node;
1669 domain_update_iommu_cap(domain);
1670 }
1671 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1672 return 0;
1673}
1674
1675static int
1676domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1677 int translation)
1678{
1679 int ret;
1680 struct pci_dev *tmp, *parent;
1681
1682 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1683 pdev->bus->number, pdev->devfn,
1684 translation);
1685 if (ret)
1686 return ret;
1687
1688
1689 tmp = pci_find_upstream_pcie_bridge(pdev);
1690 if (!tmp)
1691 return 0;
1692
1693 parent = pdev->bus->self;
1694 while (parent != tmp) {
1695 ret = domain_context_mapping_one(domain,
1696 pci_domain_nr(parent->bus),
1697 parent->bus->number,
1698 parent->devfn, translation);
1699 if (ret)
1700 return ret;
1701 parent = parent->bus->self;
1702 }
1703 if (pci_is_pcie(tmp))
1704 return domain_context_mapping_one(domain,
1705 pci_domain_nr(tmp->subordinate),
1706 tmp->subordinate->number, 0,
1707 translation);
1708 else
1709 return domain_context_mapping_one(domain,
1710 pci_domain_nr(tmp->bus),
1711 tmp->bus->number,
1712 tmp->devfn,
1713 translation);
1714}
1715
1716static int domain_context_mapped(struct pci_dev *pdev)
1717{
1718 int ret;
1719 struct pci_dev *tmp, *parent;
1720 struct intel_iommu *iommu;
1721
1722 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1723 pdev->devfn);
1724 if (!iommu)
1725 return -ENODEV;
1726
1727 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1728 if (!ret)
1729 return ret;
1730
1731 tmp = pci_find_upstream_pcie_bridge(pdev);
1732 if (!tmp)
1733 return ret;
1734
1735 parent = pdev->bus->self;
1736 while (parent != tmp) {
1737 ret = device_context_mapped(iommu, parent->bus->number,
1738 parent->devfn);
1739 if (!ret)
1740 return ret;
1741 parent = parent->bus->self;
1742 }
1743 if (pci_is_pcie(tmp))
1744 return device_context_mapped(iommu, tmp->subordinate->number,
1745 0);
1746 else
1747 return device_context_mapped(iommu, tmp->bus->number,
1748 tmp->devfn);
1749}
1750
1751
1752static inline unsigned long aligned_nrpages(unsigned long host_addr,
1753 size_t size)
1754{
1755 host_addr &= ~PAGE_MASK;
1756 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1757}
1758
1759
1760static inline int hardware_largepage_caps(struct dmar_domain *domain,
1761 unsigned long iov_pfn,
1762 unsigned long phy_pfn,
1763 unsigned long pages)
1764{
1765 int support, level = 1;
1766 unsigned long pfnmerge;
1767
1768 support = domain->iommu_superpage;
1769
1770
1771
1772
1773
1774 pfnmerge = iov_pfn | phy_pfn;
1775
1776 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1777 pages >>= VTD_STRIDE_SHIFT;
1778 if (!pages)
1779 break;
1780 pfnmerge >>= VTD_STRIDE_SHIFT;
1781 level++;
1782 support--;
1783 }
1784 return level;
1785}
1786
1787static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1788 struct scatterlist *sg, unsigned long phys_pfn,
1789 unsigned long nr_pages, int prot)
1790{
1791 struct dma_pte *first_pte = NULL, *pte = NULL;
1792 phys_addr_t uninitialized_var(pteval);
1793 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1794 unsigned long sg_res;
1795 unsigned int largepage_lvl = 0;
1796 unsigned long lvl_pages = 0;
1797
1798 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1799
1800 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1801 return -EINVAL;
1802
1803 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1804
1805 if (sg)
1806 sg_res = 0;
1807 else {
1808 sg_res = nr_pages + 1;
1809 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1810 }
1811
1812 while (nr_pages > 0) {
1813 uint64_t tmp;
1814
1815 if (!sg_res) {
1816 sg_res = aligned_nrpages(sg->offset, sg->length);
1817 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1818 sg->dma_length = sg->length;
1819 pteval = page_to_phys(sg_page(sg)) | prot;
1820 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1821 }
1822
1823 if (!pte) {
1824 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1825
1826 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1827 if (!pte)
1828 return -ENOMEM;
1829
1830 if (largepage_lvl > 1) {
1831 pteval |= DMA_PTE_LARGE_PAGE;
1832
1833
1834 dma_pte_clear_range(domain, iov_pfn,
1835 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1836 dma_pte_free_pagetable(domain, iov_pfn,
1837 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1838 } else {
1839 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1840 }
1841
1842 }
1843
1844
1845
1846 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1847 if (tmp) {
1848 static int dumps = 5;
1849 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1850 iov_pfn, tmp, (unsigned long long)pteval);
1851 if (dumps) {
1852 dumps--;
1853 debug_dma_dump_mappings(NULL);
1854 }
1855 WARN_ON(1);
1856 }
1857
1858 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1859
1860 BUG_ON(nr_pages < lvl_pages);
1861 BUG_ON(sg_res < lvl_pages);
1862
1863 nr_pages -= lvl_pages;
1864 iov_pfn += lvl_pages;
1865 phys_pfn += lvl_pages;
1866 pteval += lvl_pages * VTD_PAGE_SIZE;
1867 sg_res -= lvl_pages;
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880 pte++;
1881 if (!nr_pages || first_pte_in_page(pte) ||
1882 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1883 domain_flush_cache(domain, first_pte,
1884 (void *)pte - (void *)first_pte);
1885 pte = NULL;
1886 }
1887
1888 if (!sg_res && nr_pages)
1889 sg = sg_next(sg);
1890 }
1891 return 0;
1892}
1893
1894static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1895 struct scatterlist *sg, unsigned long nr_pages,
1896 int prot)
1897{
1898 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1899}
1900
1901static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1902 unsigned long phys_pfn, unsigned long nr_pages,
1903 int prot)
1904{
1905 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1906}
1907
1908static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1909{
1910 if (!iommu)
1911 return;
1912
1913 clear_context_table(iommu, bus, devfn);
1914 iommu->flush.flush_context(iommu, 0, 0, 0,
1915 DMA_CCMD_GLOBAL_INVL);
1916 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1917}
1918
1919static inline void unlink_domain_info(struct device_domain_info *info)
1920{
1921 assert_spin_locked(&device_domain_lock);
1922 list_del(&info->link);
1923 list_del(&info->global);
1924 if (info->dev)
1925 info->dev->dev.archdata.iommu = NULL;
1926}
1927
1928static void domain_remove_dev_info(struct dmar_domain *domain)
1929{
1930 struct device_domain_info *info;
1931 unsigned long flags;
1932 struct intel_iommu *iommu;
1933
1934 spin_lock_irqsave(&device_domain_lock, flags);
1935 while (!list_empty(&domain->devices)) {
1936 info = list_entry(domain->devices.next,
1937 struct device_domain_info, link);
1938 unlink_domain_info(info);
1939 spin_unlock_irqrestore(&device_domain_lock, flags);
1940
1941 iommu_disable_dev_iotlb(info);
1942 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1943 iommu_detach_dev(iommu, info->bus, info->devfn);
1944 free_devinfo_mem(info);
1945
1946 spin_lock_irqsave(&device_domain_lock, flags);
1947 }
1948 spin_unlock_irqrestore(&device_domain_lock, flags);
1949}
1950
1951
1952
1953
1954
1955static struct dmar_domain *
1956find_domain(struct pci_dev *pdev)
1957{
1958 struct device_domain_info *info;
1959
1960
1961 info = pdev->dev.archdata.iommu;
1962 if (info)
1963 return info->domain;
1964 return NULL;
1965}
1966
1967
1968static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1969{
1970 struct dmar_domain *domain, *found = NULL;
1971 struct intel_iommu *iommu;
1972 struct dmar_drhd_unit *drhd;
1973 struct device_domain_info *info, *tmp;
1974 struct pci_dev *dev_tmp;
1975 unsigned long flags;
1976 int bus = 0, devfn = 0;
1977 int segment;
1978 int ret;
1979
1980 domain = find_domain(pdev);
1981 if (domain)
1982 return domain;
1983
1984 segment = pci_domain_nr(pdev->bus);
1985
1986 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1987 if (dev_tmp) {
1988 if (pci_is_pcie(dev_tmp)) {
1989 bus = dev_tmp->subordinate->number;
1990 devfn = 0;
1991 } else {
1992 bus = dev_tmp->bus->number;
1993 devfn = dev_tmp->devfn;
1994 }
1995 spin_lock_irqsave(&device_domain_lock, flags);
1996 list_for_each_entry(info, &device_domain_list, global) {
1997 if (info->segment == segment &&
1998 info->bus == bus && info->devfn == devfn) {
1999 found = info->domain;
2000 break;
2001 }
2002 }
2003 spin_unlock_irqrestore(&device_domain_lock, flags);
2004
2005 if (found) {
2006 domain = found;
2007 goto found_domain;
2008 }
2009 }
2010
2011 domain = alloc_domain();
2012 if (!domain)
2013 goto error;
2014
2015
2016 drhd = dmar_find_matched_drhd_unit(pdev);
2017 if (!drhd) {
2018 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2019 pci_name(pdev));
2020 free_domain_mem(domain);
2021 return NULL;
2022 }
2023 iommu = drhd->iommu;
2024
2025 ret = iommu_attach_domain(domain, iommu);
2026 if (ret) {
2027 free_domain_mem(domain);
2028 goto error;
2029 }
2030
2031 if (domain_init(domain, gaw)) {
2032 domain_exit(domain);
2033 goto error;
2034 }
2035
2036
2037 if (dev_tmp) {
2038 info = alloc_devinfo_mem();
2039 if (!info) {
2040 domain_exit(domain);
2041 goto error;
2042 }
2043 info->segment = segment;
2044 info->bus = bus;
2045 info->devfn = devfn;
2046 info->dev = NULL;
2047 info->domain = domain;
2048
2049 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2050
2051
2052 found = NULL;
2053 spin_lock_irqsave(&device_domain_lock, flags);
2054 list_for_each_entry(tmp, &device_domain_list, global) {
2055 if (tmp->segment == segment &&
2056 tmp->bus == bus && tmp->devfn == devfn) {
2057 found = tmp->domain;
2058 break;
2059 }
2060 }
2061 if (found) {
2062 spin_unlock_irqrestore(&device_domain_lock, flags);
2063 free_devinfo_mem(info);
2064 domain_exit(domain);
2065 domain = found;
2066 } else {
2067 list_add(&info->link, &domain->devices);
2068 list_add(&info->global, &device_domain_list);
2069 spin_unlock_irqrestore(&device_domain_lock, flags);
2070 }
2071 }
2072
2073found_domain:
2074 info = alloc_devinfo_mem();
2075 if (!info)
2076 goto error;
2077 info->segment = segment;
2078 info->bus = pdev->bus->number;
2079 info->devfn = pdev->devfn;
2080 info->dev = pdev;
2081 info->domain = domain;
2082 spin_lock_irqsave(&device_domain_lock, flags);
2083
2084 found = find_domain(pdev);
2085 if (found != NULL) {
2086 spin_unlock_irqrestore(&device_domain_lock, flags);
2087 if (found != domain) {
2088 domain_exit(domain);
2089 domain = found;
2090 }
2091 free_devinfo_mem(info);
2092 return domain;
2093 }
2094 list_add(&info->link, &domain->devices);
2095 list_add(&info->global, &device_domain_list);
2096 pdev->dev.archdata.iommu = info;
2097 spin_unlock_irqrestore(&device_domain_lock, flags);
2098 return domain;
2099error:
2100
2101 return find_domain(pdev);
2102}
2103
2104static int iommu_identity_mapping;
2105#define IDENTMAP_ALL 1
2106#define IDENTMAP_GFX 2
2107#define IDENTMAP_AZALIA 4
2108
2109static int iommu_domain_identity_map(struct dmar_domain *domain,
2110 unsigned long long start,
2111 unsigned long long end)
2112{
2113 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2114 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2115
2116 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2117 dma_to_mm_pfn(last_vpfn))) {
2118 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2119 return -ENOMEM;
2120 }
2121
2122 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2123 start, end, domain->id);
2124
2125
2126
2127
2128 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2129
2130 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2131 last_vpfn - first_vpfn + 1,
2132 DMA_PTE_READ|DMA_PTE_WRITE);
2133}
2134
2135static int iommu_prepare_identity_map(struct pci_dev *pdev,
2136 unsigned long long start,
2137 unsigned long long end)
2138{
2139 struct dmar_domain *domain;
2140 int ret;
2141
2142 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2143 if (!domain)
2144 return -ENOMEM;
2145
2146
2147
2148
2149
2150 if (domain == si_domain && hw_pass_through) {
2151 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2152 pci_name(pdev), start, end);
2153 return 0;
2154 }
2155
2156 printk(KERN_INFO
2157 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2158 pci_name(pdev), start, end);
2159
2160 if (end < start) {
2161 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2162 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2163 dmi_get_system_info(DMI_BIOS_VENDOR),
2164 dmi_get_system_info(DMI_BIOS_VERSION),
2165 dmi_get_system_info(DMI_PRODUCT_VERSION));
2166 ret = -EIO;
2167 goto error;
2168 }
2169
2170 if (end >> agaw_to_width(domain->agaw)) {
2171 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2172 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2173 agaw_to_width(domain->agaw),
2174 dmi_get_system_info(DMI_BIOS_VENDOR),
2175 dmi_get_system_info(DMI_BIOS_VERSION),
2176 dmi_get_system_info(DMI_PRODUCT_VERSION));
2177 ret = -EIO;
2178 goto error;
2179 }
2180
2181 ret = iommu_domain_identity_map(domain, start, end);
2182 if (ret)
2183 goto error;
2184
2185
2186 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2187 if (ret)
2188 goto error;
2189
2190 return 0;
2191
2192 error:
2193 domain_exit(domain);
2194 return ret;
2195}
2196
2197static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2198 struct pci_dev *pdev)
2199{
2200 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2201 return 0;
2202 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2203 rmrr->end_address);
2204}
2205
2206#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2207static inline void iommu_prepare_isa(void)
2208{
2209 struct pci_dev *pdev;
2210 int ret;
2211
2212 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2213 if (!pdev)
2214 return;
2215
2216 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2217 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2218
2219 if (ret)
2220 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2221 "floppy might not work\n");
2222
2223}
2224#else
2225static inline void iommu_prepare_isa(void)
2226{
2227 return;
2228}
2229#endif
2230
2231static int md_domain_init(struct dmar_domain *domain, int guest_width);
2232
2233static int __init si_domain_init(int hw)
2234{
2235 struct dmar_drhd_unit *drhd;
2236 struct intel_iommu *iommu;
2237 int nid, ret = 0;
2238
2239 si_domain = alloc_domain();
2240 if (!si_domain)
2241 return -EFAULT;
2242
2243 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2244
2245 for_each_active_iommu(iommu, drhd) {
2246 ret = iommu_attach_domain(si_domain, iommu);
2247 if (ret) {
2248 domain_exit(si_domain);
2249 return -EFAULT;
2250 }
2251 }
2252
2253 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2254 domain_exit(si_domain);
2255 return -EFAULT;
2256 }
2257
2258 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2259
2260 if (hw)
2261 return 0;
2262
2263 for_each_online_node(nid) {
2264 unsigned long start_pfn, end_pfn;
2265 int i;
2266
2267 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2268 ret = iommu_domain_identity_map(si_domain,
2269 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2270 if (ret)
2271 return ret;
2272 }
2273 }
2274
2275 return 0;
2276}
2277
2278static void domain_remove_one_dev_info(struct dmar_domain *domain,
2279 struct pci_dev *pdev);
2280static int identity_mapping(struct pci_dev *pdev)
2281{
2282 struct device_domain_info *info;
2283
2284 if (likely(!iommu_identity_mapping))
2285 return 0;
2286
2287 info = pdev->dev.archdata.iommu;
2288 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2289 return (info->domain == si_domain);
2290
2291 return 0;
2292}
2293
2294static int domain_add_dev_info(struct dmar_domain *domain,
2295 struct pci_dev *pdev,
2296 int translation)
2297{
2298 struct device_domain_info *info;
2299 unsigned long flags;
2300 int ret;
2301
2302 info = alloc_devinfo_mem();
2303 if (!info)
2304 return -ENOMEM;
2305
2306 info->segment = pci_domain_nr(pdev->bus);
2307 info->bus = pdev->bus->number;
2308 info->devfn = pdev->devfn;
2309 info->dev = pdev;
2310 info->domain = domain;
2311
2312 spin_lock_irqsave(&device_domain_lock, flags);
2313 list_add(&info->link, &domain->devices);
2314 list_add(&info->global, &device_domain_list);
2315 pdev->dev.archdata.iommu = info;
2316 spin_unlock_irqrestore(&device_domain_lock, flags);
2317
2318 ret = domain_context_mapping(domain, pdev, translation);
2319 if (ret) {
2320 spin_lock_irqsave(&device_domain_lock, flags);
2321 unlink_domain_info(info);
2322 spin_unlock_irqrestore(&device_domain_lock, flags);
2323 free_devinfo_mem(info);
2324 return ret;
2325 }
2326
2327 return 0;
2328}
2329
2330static bool device_has_rmrr(struct pci_dev *dev)
2331{
2332 struct dmar_rmrr_unit *rmrr;
2333 int i;
2334
2335 for_each_rmrr_units(rmrr) {
2336 for (i = 0; i < rmrr->devices_cnt; i++) {
2337
2338
2339
2340
2341 if (rmrr->devices[i] == dev)
2342 return true;
2343 }
2344 }
2345 return false;
2346}
2347
2348static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2349{
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359 if (device_has_rmrr(pdev) &&
2360 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2361 return 0;
2362
2363 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2364 return 1;
2365
2366 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2367 return 1;
2368
2369 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2370 return 0;
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389 if (!pci_is_pcie(pdev)) {
2390 if (!pci_is_root_bus(pdev->bus))
2391 return 0;
2392 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2393 return 0;
2394 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2395 return 0;
2396
2397
2398
2399
2400
2401
2402 if (!startup) {
2403
2404
2405
2406
2407 u64 dma_mask = pdev->dma_mask;
2408
2409 if (pdev->dev.coherent_dma_mask &&
2410 pdev->dev.coherent_dma_mask < dma_mask)
2411 dma_mask = pdev->dev.coherent_dma_mask;
2412
2413 return dma_mask >= dma_get_required_mask(&pdev->dev);
2414 }
2415
2416 return 1;
2417}
2418
2419static int __init iommu_prepare_static_identity_mapping(int hw)
2420{
2421 struct pci_dev *pdev = NULL;
2422 int ret;
2423
2424 ret = si_domain_init(hw);
2425 if (ret)
2426 return -EFAULT;
2427
2428 for_each_pci_dev(pdev) {
2429 if (iommu_should_identity_map(pdev, 1)) {
2430 ret = domain_add_dev_info(si_domain, pdev,
2431 hw ? CONTEXT_TT_PASS_THROUGH :
2432 CONTEXT_TT_MULTI_LEVEL);
2433 if (ret) {
2434
2435 if (ret == -ENODEV)
2436 continue;
2437 return ret;
2438 }
2439 pr_info("IOMMU: %s identity mapping for device %s\n",
2440 hw ? "hardware" : "software", pci_name(pdev));
2441 }
2442 }
2443
2444 return 0;
2445}
2446
2447static int __init init_dmars(void)
2448{
2449 struct dmar_drhd_unit *drhd;
2450 struct dmar_rmrr_unit *rmrr;
2451 struct pci_dev *pdev;
2452 struct intel_iommu *iommu;
2453 int i, ret;
2454
2455
2456
2457
2458
2459
2460
2461 for_each_drhd_unit(drhd) {
2462
2463
2464
2465
2466
2467 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2468 g_num_of_iommus++;
2469 continue;
2470 }
2471 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2472 IOMMU_UNITS_SUPPORTED);
2473 }
2474
2475 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2476 GFP_KERNEL);
2477 if (!g_iommus) {
2478 printk(KERN_ERR "Allocating global iommu array failed\n");
2479 ret = -ENOMEM;
2480 goto error;
2481 }
2482
2483 deferred_flush = kzalloc(g_num_of_iommus *
2484 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2485 if (!deferred_flush) {
2486 ret = -ENOMEM;
2487 goto error;
2488 }
2489
2490 for_each_drhd_unit(drhd) {
2491 if (drhd->ignored)
2492 continue;
2493
2494 iommu = drhd->iommu;
2495 g_iommus[iommu->seq_id] = iommu;
2496
2497 ret = iommu_init_domains(iommu);
2498 if (ret)
2499 goto error;
2500
2501
2502
2503
2504
2505
2506 ret = iommu_alloc_root_entry(iommu);
2507 if (ret) {
2508 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2509 goto error;
2510 }
2511 if (!ecap_pass_through(iommu->ecap))
2512 hw_pass_through = 0;
2513 }
2514
2515
2516
2517
2518 for_each_drhd_unit(drhd) {
2519 if (drhd->ignored)
2520 continue;
2521
2522 iommu = drhd->iommu;
2523
2524
2525
2526
2527
2528
2529 if (iommu->qi)
2530 continue;
2531
2532
2533
2534
2535 dmar_fault(-1, iommu);
2536
2537
2538
2539
2540 dmar_disable_qi(iommu);
2541 }
2542
2543 for_each_drhd_unit(drhd) {
2544 if (drhd->ignored)
2545 continue;
2546
2547 iommu = drhd->iommu;
2548
2549 if (dmar_enable_qi(iommu)) {
2550
2551
2552
2553
2554 iommu->flush.flush_context = __iommu_flush_context;
2555 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2556 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2557 "invalidation\n",
2558 iommu->seq_id,
2559 (unsigned long long)drhd->reg_base_addr);
2560 } else {
2561 iommu->flush.flush_context = qi_flush_context;
2562 iommu->flush.flush_iotlb = qi_flush_iotlb;
2563 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2564 "invalidation\n",
2565 iommu->seq_id,
2566 (unsigned long long)drhd->reg_base_addr);
2567 }
2568 }
2569
2570 if (iommu_pass_through)
2571 iommu_identity_mapping |= IDENTMAP_ALL;
2572
2573#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2574 iommu_identity_mapping |= IDENTMAP_GFX;
2575#endif
2576
2577 check_tylersburg_isoch();
2578
2579
2580
2581
2582
2583
2584 if (iommu_identity_mapping) {
2585 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2586 if (ret) {
2587 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2588 goto error;
2589 }
2590 }
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2606 for_each_rmrr_units(rmrr) {
2607 for (i = 0; i < rmrr->devices_cnt; i++) {
2608 pdev = rmrr->devices[i];
2609
2610
2611
2612
2613 if (!pdev)
2614 continue;
2615 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2616 if (ret)
2617 printk(KERN_ERR
2618 "IOMMU: mapping reserved region failed\n");
2619 }
2620 }
2621
2622 iommu_prepare_isa();
2623
2624
2625
2626
2627
2628
2629
2630
2631 for_each_drhd_unit(drhd) {
2632 if (drhd->ignored) {
2633
2634
2635
2636
2637 if (force_on)
2638 iommu_disable_protect_mem_regions(drhd->iommu);
2639 continue;
2640 }
2641 iommu = drhd->iommu;
2642
2643 iommu_flush_write_buffer(iommu);
2644
2645 ret = dmar_set_interrupt(iommu);
2646 if (ret)
2647 goto error;
2648
2649 iommu_set_root_entry(iommu);
2650
2651 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2652 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2653
2654 ret = iommu_enable_translation(iommu);
2655 if (ret)
2656 goto error;
2657
2658 iommu_disable_protect_mem_regions(iommu);
2659 }
2660
2661 return 0;
2662error:
2663 for_each_drhd_unit(drhd) {
2664 if (drhd->ignored)
2665 continue;
2666 iommu = drhd->iommu;
2667 free_iommu(iommu);
2668 }
2669 kfree(g_iommus);
2670 return ret;
2671}
2672
2673
2674static struct iova *intel_alloc_iova(struct device *dev,
2675 struct dmar_domain *domain,
2676 unsigned long nrpages, uint64_t dma_mask)
2677{
2678 struct pci_dev *pdev = to_pci_dev(dev);
2679 struct iova *iova = NULL;
2680
2681
2682 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2683
2684 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2685
2686
2687
2688
2689
2690 iova = alloc_iova(&domain->iovad, nrpages,
2691 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2692 if (iova)
2693 return iova;
2694 }
2695 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2696 if (unlikely(!iova)) {
2697 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2698 nrpages, pci_name(pdev));
2699 return NULL;
2700 }
2701
2702 return iova;
2703}
2704
2705static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2706{
2707 struct dmar_domain *domain;
2708 int ret;
2709
2710 domain = get_domain_for_dev(pdev,
2711 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2712 if (!domain) {
2713 printk(KERN_ERR
2714 "Allocating domain for %s failed", pci_name(pdev));
2715 return NULL;
2716 }
2717
2718
2719 if (unlikely(!domain_context_mapped(pdev))) {
2720 ret = domain_context_mapping(domain, pdev,
2721 CONTEXT_TT_MULTI_LEVEL);
2722 if (ret) {
2723 printk(KERN_ERR
2724 "Domain context map for %s failed",
2725 pci_name(pdev));
2726 return NULL;
2727 }
2728 }
2729
2730 return domain;
2731}
2732
2733static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2734{
2735 struct device_domain_info *info;
2736
2737
2738 info = dev->dev.archdata.iommu;
2739 if (likely(info))
2740 return info->domain;
2741
2742 return __get_valid_domain_for_dev(dev);
2743}
2744
2745static int iommu_dummy(struct pci_dev *pdev)
2746{
2747 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2748}
2749
2750
2751static int iommu_no_mapping(struct device *dev)
2752{
2753 struct pci_dev *pdev;
2754 int found;
2755
2756 if (unlikely(dev->bus != &pci_bus_type))
2757 return 1;
2758
2759 pdev = to_pci_dev(dev);
2760 if (iommu_dummy(pdev))
2761 return 1;
2762
2763 if (!iommu_identity_mapping)
2764 return 0;
2765
2766 found = identity_mapping(pdev);
2767 if (found) {
2768 if (iommu_should_identity_map(pdev, 0))
2769 return 1;
2770 else {
2771
2772
2773
2774
2775 domain_remove_one_dev_info(si_domain, pdev);
2776 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2777 pci_name(pdev));
2778 return 0;
2779 }
2780 } else {
2781
2782
2783
2784
2785 if (iommu_should_identity_map(pdev, 0)) {
2786 int ret;
2787 ret = domain_add_dev_info(si_domain, pdev,
2788 hw_pass_through ?
2789 CONTEXT_TT_PASS_THROUGH :
2790 CONTEXT_TT_MULTI_LEVEL);
2791 if (!ret) {
2792 printk(KERN_INFO "64bit %s uses identity mapping\n",
2793 pci_name(pdev));
2794 return 1;
2795 }
2796 }
2797 }
2798
2799 return 0;
2800}
2801
2802static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2803 size_t size, int dir, u64 dma_mask)
2804{
2805 struct pci_dev *pdev = to_pci_dev(hwdev);
2806 struct dmar_domain *domain;
2807 phys_addr_t start_paddr;
2808 struct iova *iova;
2809 int prot = 0;
2810 int ret;
2811 struct intel_iommu *iommu;
2812 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2813
2814 BUG_ON(dir == DMA_NONE);
2815
2816 if (iommu_no_mapping(hwdev))
2817 return paddr;
2818
2819 domain = get_valid_domain_for_dev(pdev);
2820 if (!domain)
2821 return 0;
2822
2823 iommu = domain_get_iommu(domain);
2824 size = aligned_nrpages(paddr, size);
2825
2826 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2827 if (!iova)
2828 goto error;
2829
2830
2831
2832
2833
2834 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2835 !cap_zlr(iommu->cap))
2836 prot |= DMA_PTE_READ;
2837 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2838 prot |= DMA_PTE_WRITE;
2839
2840
2841
2842
2843
2844
2845 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2846 mm_to_dma_pfn(paddr_pfn), size, prot);
2847 if (ret)
2848 goto error;
2849
2850
2851 if (cap_caching_mode(iommu->cap))
2852 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2853 else
2854 iommu_flush_write_buffer(iommu);
2855
2856 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2857 start_paddr += paddr & ~PAGE_MASK;
2858 return start_paddr;
2859
2860error:
2861 if (iova)
2862 __free_iova(&domain->iovad, iova);
2863 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2864 pci_name(pdev), size, (unsigned long long)paddr, dir);
2865 return 0;
2866}
2867
2868static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2869 unsigned long offset, size_t size,
2870 enum dma_data_direction dir,
2871 struct dma_attrs *attrs)
2872{
2873 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2874 dir, to_pci_dev(dev)->dma_mask);
2875}
2876
2877static void flush_unmaps(void)
2878{
2879 int i, j;
2880
2881 timer_on = 0;
2882
2883
2884 for (i = 0; i < g_num_of_iommus; i++) {
2885 struct intel_iommu *iommu = g_iommus[i];
2886 if (!iommu)
2887 continue;
2888
2889 if (!deferred_flush[i].next)
2890 continue;
2891
2892
2893 if (!cap_caching_mode(iommu->cap))
2894 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2895 DMA_TLB_GLOBAL_FLUSH);
2896 for (j = 0; j < deferred_flush[i].next; j++) {
2897 unsigned long mask;
2898 struct iova *iova = deferred_flush[i].iova[j];
2899 struct dmar_domain *domain = deferred_flush[i].domain[j];
2900
2901
2902 if (cap_caching_mode(iommu->cap))
2903 iommu_flush_iotlb_psi(iommu, domain->id,
2904 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2905 else {
2906 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2907 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2908 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2909 }
2910 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2911 }
2912 deferred_flush[i].next = 0;
2913 }
2914
2915 list_size = 0;
2916}
2917
2918static void flush_unmaps_timeout(unsigned long data)
2919{
2920 unsigned long flags;
2921
2922 spin_lock_irqsave(&async_umap_flush_lock, flags);
2923 flush_unmaps();
2924 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2925}
2926
2927static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2928{
2929 unsigned long flags;
2930 int next, iommu_id;
2931 struct intel_iommu *iommu;
2932
2933 spin_lock_irqsave(&async_umap_flush_lock, flags);
2934 if (list_size == HIGH_WATER_MARK)
2935 flush_unmaps();
2936
2937 iommu = domain_get_iommu(dom);
2938 iommu_id = iommu->seq_id;
2939
2940 next = deferred_flush[iommu_id].next;
2941 deferred_flush[iommu_id].domain[next] = dom;
2942 deferred_flush[iommu_id].iova[next] = iova;
2943 deferred_flush[iommu_id].next++;
2944
2945 if (!timer_on) {
2946 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2947 timer_on = 1;
2948 }
2949 list_size++;
2950 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2951}
2952
2953static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2954 size_t size, enum dma_data_direction dir,
2955 struct dma_attrs *attrs)
2956{
2957 struct pci_dev *pdev = to_pci_dev(dev);
2958 struct dmar_domain *domain;
2959 unsigned long start_pfn, last_pfn;
2960 struct iova *iova;
2961 struct intel_iommu *iommu;
2962
2963 if (iommu_no_mapping(dev))
2964 return;
2965
2966 domain = find_domain(pdev);
2967 BUG_ON(!domain);
2968
2969 iommu = domain_get_iommu(domain);
2970
2971 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2972 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2973 (unsigned long long)dev_addr))
2974 return;
2975
2976 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2977 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2978
2979 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2980 pci_name(pdev), start_pfn, last_pfn);
2981
2982
2983 dma_pte_clear_range(domain, start_pfn, last_pfn);
2984
2985
2986 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2987
2988 if (intel_iommu_strict) {
2989 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2990 last_pfn - start_pfn + 1, 0);
2991
2992 __free_iova(&domain->iovad, iova);
2993 } else {
2994 add_unmap(domain, iova);
2995
2996
2997
2998
2999 }
3000}
3001
3002static void *intel_alloc_coherent(struct device *hwdev, size_t size,
3003 dma_addr_t *dma_handle, gfp_t flags,
3004 struct dma_attrs *attrs)
3005{
3006 void *vaddr;
3007 int order;
3008
3009 size = PAGE_ALIGN(size);
3010 order = get_order(size);
3011
3012 if (!iommu_no_mapping(hwdev))
3013 flags &= ~(GFP_DMA | GFP_DMA32);
3014 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3015 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3016 flags |= GFP_DMA;
3017 else
3018 flags |= GFP_DMA32;
3019 }
3020
3021 vaddr = (void *)__get_free_pages(flags, order);
3022 if (!vaddr)
3023 return NULL;
3024 memset(vaddr, 0, size);
3025
3026 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3027 DMA_BIDIRECTIONAL,
3028 hwdev->coherent_dma_mask);
3029 if (*dma_handle)
3030 return vaddr;
3031 free_pages((unsigned long)vaddr, order);
3032 return NULL;
3033}
3034
3035static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
3036 dma_addr_t dma_handle, struct dma_attrs *attrs)
3037{
3038 int order;
3039
3040 size = PAGE_ALIGN(size);
3041 order = get_order(size);
3042
3043 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3044 free_pages((unsigned long)vaddr, order);
3045}
3046
3047static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3048 int nelems, enum dma_data_direction dir,
3049 struct dma_attrs *attrs)
3050{
3051 struct pci_dev *pdev = to_pci_dev(hwdev);
3052 struct dmar_domain *domain;
3053 unsigned long start_pfn, last_pfn;
3054 struct iova *iova;
3055 struct intel_iommu *iommu;
3056
3057 if (iommu_no_mapping(hwdev))
3058 return;
3059
3060 domain = find_domain(pdev);
3061 BUG_ON(!domain);
3062
3063 iommu = domain_get_iommu(domain);
3064
3065 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3066 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3067 (unsigned long long)sglist[0].dma_address))
3068 return;
3069
3070 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3071 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3072
3073
3074 dma_pte_clear_range(domain, start_pfn, last_pfn);
3075
3076
3077 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3078
3079 if (intel_iommu_strict) {
3080 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3081 last_pfn - start_pfn + 1, 0);
3082
3083 __free_iova(&domain->iovad, iova);
3084 } else {
3085 add_unmap(domain, iova);
3086
3087
3088
3089
3090 }
3091}
3092
3093static int intel_nontranslate_map_sg(struct device *hddev,
3094 struct scatterlist *sglist, int nelems, int dir)
3095{
3096 int i;
3097 struct scatterlist *sg;
3098
3099 for_each_sg(sglist, sg, nelems, i) {
3100 BUG_ON(!sg_page(sg));
3101 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3102 sg->dma_length = sg->length;
3103 }
3104 return nelems;
3105}
3106
3107static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3108 enum dma_data_direction dir, struct dma_attrs *attrs)
3109{
3110 int i;
3111 struct pci_dev *pdev = to_pci_dev(hwdev);
3112 struct dmar_domain *domain;
3113 size_t size = 0;
3114 int prot = 0;
3115 struct iova *iova = NULL;
3116 int ret;
3117 struct scatterlist *sg;
3118 unsigned long start_vpfn;
3119 struct intel_iommu *iommu;
3120
3121 BUG_ON(dir == DMA_NONE);
3122 if (iommu_no_mapping(hwdev))
3123 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3124
3125 domain = get_valid_domain_for_dev(pdev);
3126 if (!domain)
3127 return 0;
3128
3129 iommu = domain_get_iommu(domain);
3130
3131 for_each_sg(sglist, sg, nelems, i)
3132 size += aligned_nrpages(sg->offset, sg->length);
3133
3134 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3135 pdev->dma_mask);
3136 if (!iova) {
3137 sglist->dma_length = 0;
3138 return 0;
3139 }
3140
3141
3142
3143
3144
3145 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3146 !cap_zlr(iommu->cap))
3147 prot |= DMA_PTE_READ;
3148 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3149 prot |= DMA_PTE_WRITE;
3150
3151 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3152
3153 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3154 if (unlikely(ret)) {
3155
3156 dma_pte_clear_range(domain, start_vpfn,
3157 start_vpfn + size - 1);
3158
3159 dma_pte_free_pagetable(domain, start_vpfn,
3160 start_vpfn + size - 1);
3161
3162 __free_iova(&domain->iovad, iova);
3163 return 0;
3164 }
3165
3166
3167 if (cap_caching_mode(iommu->cap))
3168 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3169 else
3170 iommu_flush_write_buffer(iommu);
3171
3172 return nelems;
3173}
3174
3175static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3176{
3177 return !dma_addr;
3178}
3179
3180struct dma_map_ops intel_dma_ops = {
3181 .alloc = intel_alloc_coherent,
3182 .free = intel_free_coherent,
3183 .map_sg = intel_map_sg,
3184 .unmap_sg = intel_unmap_sg,
3185 .map_page = intel_map_page,
3186 .unmap_page = intel_unmap_page,
3187 .mapping_error = intel_mapping_error,
3188};
3189
3190static inline int iommu_domain_cache_init(void)
3191{
3192 int ret = 0;
3193
3194 iommu_domain_cache = kmem_cache_create("iommu_domain",
3195 sizeof(struct dmar_domain),
3196 0,
3197 SLAB_HWCACHE_ALIGN,
3198
3199 NULL);
3200 if (!iommu_domain_cache) {
3201 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3202 ret = -ENOMEM;
3203 }
3204
3205 return ret;
3206}
3207
3208static inline int iommu_devinfo_cache_init(void)
3209{
3210 int ret = 0;
3211
3212 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3213 sizeof(struct device_domain_info),
3214 0,
3215 SLAB_HWCACHE_ALIGN,
3216 NULL);
3217 if (!iommu_devinfo_cache) {
3218 printk(KERN_ERR "Couldn't create devinfo cache\n");
3219 ret = -ENOMEM;
3220 }
3221
3222 return ret;
3223}
3224
3225static inline int iommu_iova_cache_init(void)
3226{
3227 int ret = 0;
3228
3229 iommu_iova_cache = kmem_cache_create("iommu_iova",
3230 sizeof(struct iova),
3231 0,
3232 SLAB_HWCACHE_ALIGN,
3233 NULL);
3234 if (!iommu_iova_cache) {
3235 printk(KERN_ERR "Couldn't create iova cache\n");
3236 ret = -ENOMEM;
3237 }
3238
3239 return ret;
3240}
3241
3242static int __init iommu_init_mempool(void)
3243{
3244 int ret;
3245 ret = iommu_iova_cache_init();
3246 if (ret)
3247 return ret;
3248
3249 ret = iommu_domain_cache_init();
3250 if (ret)
3251 goto domain_error;
3252
3253 ret = iommu_devinfo_cache_init();
3254 if (!ret)
3255 return ret;
3256
3257 kmem_cache_destroy(iommu_domain_cache);
3258domain_error:
3259 kmem_cache_destroy(iommu_iova_cache);
3260
3261 return -ENOMEM;
3262}
3263
3264static void __init iommu_exit_mempool(void)
3265{
3266 kmem_cache_destroy(iommu_devinfo_cache);
3267 kmem_cache_destroy(iommu_domain_cache);
3268 kmem_cache_destroy(iommu_iova_cache);
3269
3270}
3271
3272static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3273{
3274 struct dmar_drhd_unit *drhd;
3275 u32 vtbar;
3276 int rc;
3277
3278
3279
3280
3281
3282
3283 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3284 if (rc) {
3285
3286 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3287 return;
3288 }
3289 vtbar &= 0xffff0000;
3290
3291
3292 drhd = dmar_find_matched_drhd_unit(pdev);
3293 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3294 TAINT_FIRMWARE_WORKAROUND,
3295 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3296 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3297}
3298DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3299
3300static void __init init_no_remapping_devices(void)
3301{
3302 struct dmar_drhd_unit *drhd;
3303
3304 for_each_drhd_unit(drhd) {
3305 if (!drhd->include_all) {
3306 int i;
3307 for (i = 0; i < drhd->devices_cnt; i++)
3308 if (drhd->devices[i] != NULL)
3309 break;
3310
3311 if (i == drhd->devices_cnt)
3312 drhd->ignored = 1;
3313 }
3314 }
3315
3316 for_each_drhd_unit(drhd) {
3317 int i;
3318 if (drhd->ignored || drhd->include_all)
3319 continue;
3320
3321 for (i = 0; i < drhd->devices_cnt; i++)
3322 if (drhd->devices[i] &&
3323 !IS_GFX_DEVICE(drhd->devices[i]))
3324 break;
3325
3326 if (i < drhd->devices_cnt)
3327 continue;
3328
3329
3330
3331 if (dmar_map_gfx) {
3332 intel_iommu_gfx_mapped = 1;
3333 } else {
3334 drhd->ignored = 1;
3335 for (i = 0; i < drhd->devices_cnt; i++) {
3336 if (!drhd->devices[i])
3337 continue;
3338 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3339 }
3340 }
3341 }
3342}
3343
3344#ifdef CONFIG_SUSPEND
3345static int init_iommu_hw(void)
3346{
3347 struct dmar_drhd_unit *drhd;
3348 struct intel_iommu *iommu = NULL;
3349
3350 for_each_active_iommu(iommu, drhd)
3351 if (iommu->qi)
3352 dmar_reenable_qi(iommu);
3353
3354 for_each_iommu(iommu, drhd) {
3355 if (drhd->ignored) {
3356
3357
3358
3359
3360 if (force_on)
3361 iommu_disable_protect_mem_regions(iommu);
3362 continue;
3363 }
3364
3365 iommu_flush_write_buffer(iommu);
3366
3367 iommu_set_root_entry(iommu);
3368
3369 iommu->flush.flush_context(iommu, 0, 0, 0,
3370 DMA_CCMD_GLOBAL_INVL);
3371 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3372 DMA_TLB_GLOBAL_FLUSH);
3373 if (iommu_enable_translation(iommu))
3374 return 1;
3375 iommu_disable_protect_mem_regions(iommu);
3376 }
3377
3378 return 0;
3379}
3380
3381static void iommu_flush_all(void)
3382{
3383 struct dmar_drhd_unit *drhd;
3384 struct intel_iommu *iommu;
3385
3386 for_each_active_iommu(iommu, drhd) {
3387 iommu->flush.flush_context(iommu, 0, 0, 0,
3388 DMA_CCMD_GLOBAL_INVL);
3389 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3390 DMA_TLB_GLOBAL_FLUSH);
3391 }
3392}
3393
3394static int iommu_suspend(void)
3395{
3396 struct dmar_drhd_unit *drhd;
3397 struct intel_iommu *iommu = NULL;
3398 unsigned long flag;
3399
3400 for_each_active_iommu(iommu, drhd) {
3401 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3402 GFP_ATOMIC);
3403 if (!iommu->iommu_state)
3404 goto nomem;
3405 }
3406
3407 iommu_flush_all();
3408
3409 for_each_active_iommu(iommu, drhd) {
3410 iommu_disable_translation(iommu);
3411
3412 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3413
3414 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3415 readl(iommu->reg + DMAR_FECTL_REG);
3416 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3417 readl(iommu->reg + DMAR_FEDATA_REG);
3418 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3419 readl(iommu->reg + DMAR_FEADDR_REG);
3420 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3421 readl(iommu->reg + DMAR_FEUADDR_REG);
3422
3423 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3424 }
3425 return 0;
3426
3427nomem:
3428 for_each_active_iommu(iommu, drhd)
3429 kfree(iommu->iommu_state);
3430
3431 return -ENOMEM;
3432}
3433
3434static void iommu_resume(void)
3435{
3436 struct dmar_drhd_unit *drhd;
3437 struct intel_iommu *iommu = NULL;
3438 unsigned long flag;
3439
3440 if (init_iommu_hw()) {
3441 if (force_on)
3442 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3443 else
3444 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3445 return;
3446 }
3447
3448 for_each_active_iommu(iommu, drhd) {
3449
3450 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3451
3452 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3453 iommu->reg + DMAR_FECTL_REG);
3454 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3455 iommu->reg + DMAR_FEDATA_REG);
3456 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3457 iommu->reg + DMAR_FEADDR_REG);
3458 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3459 iommu->reg + DMAR_FEUADDR_REG);
3460
3461 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3462 }
3463
3464 for_each_active_iommu(iommu, drhd)
3465 kfree(iommu->iommu_state);
3466}
3467
3468static struct syscore_ops iommu_syscore_ops = {
3469 .resume = iommu_resume,
3470 .suspend = iommu_suspend,
3471};
3472
3473static void __init init_iommu_pm_ops(void)
3474{
3475 register_syscore_ops(&iommu_syscore_ops);
3476}
3477
3478#else
3479static inline void init_iommu_pm_ops(void) {}
3480#endif
3481
3482LIST_HEAD(dmar_rmrr_units);
3483
3484static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3485{
3486 list_add(&rmrr->list, &dmar_rmrr_units);
3487}
3488
3489
3490int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3491{
3492 struct acpi_dmar_reserved_memory *rmrr;
3493 struct dmar_rmrr_unit *rmrru;
3494
3495 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3496 if (!rmrru)
3497 return -ENOMEM;
3498
3499 rmrru->hdr = header;
3500 rmrr = (struct acpi_dmar_reserved_memory *)header;
3501 rmrru->base_address = rmrr->base_address;
3502 rmrru->end_address = rmrr->end_address;
3503
3504 dmar_register_rmrr_unit(rmrru);
3505 return 0;
3506}
3507
3508static int __init
3509rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3510{
3511 struct acpi_dmar_reserved_memory *rmrr;
3512 int ret;
3513
3514 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3515 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3516 ((void *)rmrr) + rmrr->header.length,
3517 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3518
3519 if (ret || (rmrru->devices_cnt == 0)) {
3520 list_del(&rmrru->list);
3521 kfree(rmrru);
3522 }
3523 return ret;
3524}
3525
3526static LIST_HEAD(dmar_atsr_units);
3527
3528int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3529{
3530 struct acpi_dmar_atsr *atsr;
3531 struct dmar_atsr_unit *atsru;
3532
3533 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3534 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3535 if (!atsru)
3536 return -ENOMEM;
3537
3538 atsru->hdr = hdr;
3539 atsru->include_all = atsr->flags & 0x1;
3540
3541 list_add(&atsru->list, &dmar_atsr_units);
3542
3543 return 0;
3544}
3545
3546static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3547{
3548 int rc;
3549 struct acpi_dmar_atsr *atsr;
3550
3551 if (atsru->include_all)
3552 return 0;
3553
3554 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3555 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3556 (void *)atsr + atsr->header.length,
3557 &atsru->devices_cnt, &atsru->devices,
3558 atsr->segment);
3559 if (rc || !atsru->devices_cnt) {
3560 list_del(&atsru->list);
3561 kfree(atsru);
3562 }
3563
3564 return rc;
3565}
3566
3567int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3568{
3569 int i;
3570 struct pci_bus *bus;
3571 struct acpi_dmar_atsr *atsr;
3572 struct dmar_atsr_unit *atsru;
3573
3574 dev = pci_physfn(dev);
3575
3576 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3577 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3578 if (atsr->segment == pci_domain_nr(dev->bus))
3579 goto found;
3580 }
3581
3582 return 0;
3583
3584found:
3585 for (bus = dev->bus; bus; bus = bus->parent) {
3586 struct pci_dev *bridge = bus->self;
3587
3588 if (!bridge || !pci_is_pcie(bridge) ||
3589 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3590 return 0;
3591
3592 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
3593 for (i = 0; i < atsru->devices_cnt; i++)
3594 if (atsru->devices[i] == bridge)
3595 return 1;
3596 break;
3597 }
3598 }
3599
3600 if (atsru->include_all)
3601 return 1;
3602
3603 return 0;
3604}
3605
3606int __init dmar_parse_rmrr_atsr_dev(void)
3607{
3608 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3609 struct dmar_atsr_unit *atsr, *atsr_n;
3610 int ret = 0;
3611
3612 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3613 ret = rmrr_parse_dev(rmrr);
3614 if (ret)
3615 return ret;
3616 }
3617
3618 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3619 ret = atsr_parse_dev(atsr);
3620 if (ret)
3621 return ret;
3622 }
3623
3624 return ret;
3625}
3626
3627
3628
3629
3630
3631
3632
3633static int device_notifier(struct notifier_block *nb,
3634 unsigned long action, void *data)
3635{
3636 struct device *dev = data;
3637 struct pci_dev *pdev = to_pci_dev(dev);
3638 struct dmar_domain *domain;
3639
3640 if (iommu_no_mapping(dev))
3641 return 0;
3642
3643 domain = find_domain(pdev);
3644 if (!domain)
3645 return 0;
3646
3647 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3648 domain_remove_one_dev_info(domain, pdev);
3649
3650 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3651 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3652 list_empty(&domain->devices))
3653 domain_exit(domain);
3654 }
3655
3656 return 0;
3657}
3658
3659static struct notifier_block device_nb = {
3660 .notifier_call = device_notifier,
3661};
3662
3663int __init intel_iommu_init(void)
3664{
3665 int ret = 0;
3666
3667
3668 force_on = tboot_force_iommu();
3669
3670 if (dmar_table_init()) {
3671 if (force_on)
3672 panic("tboot: Failed to initialize DMAR table\n");
3673 return -ENODEV;
3674 }
3675
3676 if (dmar_dev_scope_init() < 0) {
3677 if (force_on)
3678 panic("tboot: Failed to initialize DMAR device scope\n");
3679 return -ENODEV;
3680 }
3681
3682 if (no_iommu || dmar_disabled)
3683 return -ENODEV;
3684
3685 if (iommu_init_mempool()) {
3686 if (force_on)
3687 panic("tboot: Failed to initialize iommu memory\n");
3688 return -ENODEV;
3689 }
3690
3691 if (list_empty(&dmar_rmrr_units))
3692 printk(KERN_INFO "DMAR: No RMRR found\n");
3693
3694 if (list_empty(&dmar_atsr_units))
3695 printk(KERN_INFO "DMAR: No ATSR found\n");
3696
3697 if (dmar_init_reserved_ranges()) {
3698 if (force_on)
3699 panic("tboot: Failed to reserve iommu ranges\n");
3700 return -ENODEV;
3701 }
3702
3703 init_no_remapping_devices();
3704
3705 ret = init_dmars();
3706 if (ret) {
3707 if (force_on)
3708 panic("tboot: Failed to initialize DMARs\n");
3709 printk(KERN_ERR "IOMMU: dmar init failed\n");
3710 put_iova_domain(&reserved_iova_list);
3711 iommu_exit_mempool();
3712 return ret;
3713 }
3714 printk(KERN_INFO
3715 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3716
3717 init_timer(&unmap_timer);
3718#ifdef CONFIG_SWIOTLB
3719 swiotlb = 0;
3720#endif
3721 dma_ops = &intel_dma_ops;
3722
3723 init_iommu_pm_ops();
3724
3725 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3726
3727 bus_register_notifier(&pci_bus_type, &device_nb);
3728
3729 intel_iommu_enabled = 1;
3730
3731 return 0;
3732}
3733
3734static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3735 struct pci_dev *pdev)
3736{
3737 struct pci_dev *tmp, *parent;
3738
3739 if (!iommu || !pdev)
3740 return;
3741
3742
3743 tmp = pci_find_upstream_pcie_bridge(pdev);
3744
3745 if (tmp) {
3746 parent = pdev->bus->self;
3747 while (parent != tmp) {
3748 iommu_detach_dev(iommu, parent->bus->number,
3749 parent->devfn);
3750 parent = parent->bus->self;
3751 }
3752 if (pci_is_pcie(tmp))
3753 iommu_detach_dev(iommu,
3754 tmp->subordinate->number, 0);
3755 else
3756 iommu_detach_dev(iommu, tmp->bus->number,
3757 tmp->devfn);
3758 }
3759}
3760
3761static void domain_remove_one_dev_info(struct dmar_domain *domain,
3762 struct pci_dev *pdev)
3763{
3764 struct device_domain_info *info;
3765 struct intel_iommu *iommu;
3766 unsigned long flags;
3767 int found = 0;
3768 struct list_head *entry, *tmp;
3769
3770 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3771 pdev->devfn);
3772 if (!iommu)
3773 return;
3774
3775 spin_lock_irqsave(&device_domain_lock, flags);
3776 list_for_each_safe(entry, tmp, &domain->devices) {
3777 info = list_entry(entry, struct device_domain_info, link);
3778 if (info->segment == pci_domain_nr(pdev->bus) &&
3779 info->bus == pdev->bus->number &&
3780 info->devfn == pdev->devfn) {
3781 unlink_domain_info(info);
3782 spin_unlock_irqrestore(&device_domain_lock, flags);
3783
3784 iommu_disable_dev_iotlb(info);
3785 iommu_detach_dev(iommu, info->bus, info->devfn);
3786 iommu_detach_dependent_devices(iommu, pdev);
3787 free_devinfo_mem(info);
3788
3789 spin_lock_irqsave(&device_domain_lock, flags);
3790
3791 if (found)
3792 break;
3793 else
3794 continue;
3795 }
3796
3797
3798
3799
3800
3801 if (iommu == device_to_iommu(info->segment, info->bus,
3802 info->devfn))
3803 found = 1;
3804 }
3805
3806 spin_unlock_irqrestore(&device_domain_lock, flags);
3807
3808 if (found == 0) {
3809 unsigned long tmp_flags;
3810 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3811 clear_bit(iommu->seq_id, domain->iommu_bmp);
3812 domain->iommu_count--;
3813 domain_update_iommu_cap(domain);
3814 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3815
3816 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3817 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3818 spin_lock_irqsave(&iommu->lock, tmp_flags);
3819 clear_bit(domain->id, iommu->domain_ids);
3820 iommu->domains[domain->id] = NULL;
3821 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3822 }
3823 }
3824}
3825
3826static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3827{
3828 struct device_domain_info *info;
3829 struct intel_iommu *iommu;
3830 unsigned long flags1, flags2;
3831
3832 spin_lock_irqsave(&device_domain_lock, flags1);
3833 while (!list_empty(&domain->devices)) {
3834 info = list_entry(domain->devices.next,
3835 struct device_domain_info, link);
3836 unlink_domain_info(info);
3837 spin_unlock_irqrestore(&device_domain_lock, flags1);
3838
3839 iommu_disable_dev_iotlb(info);
3840 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3841 iommu_detach_dev(iommu, info->bus, info->devfn);
3842 iommu_detach_dependent_devices(iommu, info->dev);
3843
3844
3845
3846
3847 spin_lock_irqsave(&domain->iommu_lock, flags2);
3848 if (test_and_clear_bit(iommu->seq_id,
3849 domain->iommu_bmp)) {
3850 domain->iommu_count--;
3851 domain_update_iommu_cap(domain);
3852 }
3853 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3854
3855 free_devinfo_mem(info);
3856 spin_lock_irqsave(&device_domain_lock, flags1);
3857 }
3858 spin_unlock_irqrestore(&device_domain_lock, flags1);
3859}
3860
3861
3862static unsigned long vm_domid;
3863
3864static struct dmar_domain *iommu_alloc_vm_domain(void)
3865{
3866 struct dmar_domain *domain;
3867
3868 domain = alloc_domain_mem();
3869 if (!domain)
3870 return NULL;
3871
3872 domain->id = vm_domid++;
3873 domain->nid = -1;
3874 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
3875 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3876
3877 return domain;
3878}
3879
3880static int md_domain_init(struct dmar_domain *domain, int guest_width)
3881{
3882 int adjust_width;
3883
3884 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3885 spin_lock_init(&domain->iommu_lock);
3886
3887 domain_reserve_special_ranges(domain);
3888
3889
3890 domain->gaw = guest_width;
3891 adjust_width = guestwidth_to_adjustwidth(guest_width);
3892 domain->agaw = width_to_agaw(adjust_width);
3893
3894 INIT_LIST_HEAD(&domain->devices);
3895
3896 domain->iommu_count = 0;
3897 domain->iommu_coherency = 0;
3898 domain->iommu_snooping = 0;
3899 domain->iommu_superpage = 0;
3900 domain->max_addr = 0;
3901 domain->nid = -1;
3902
3903
3904 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3905 if (!domain->pgd)
3906 return -ENOMEM;
3907 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3908 return 0;
3909}
3910
3911static void iommu_free_vm_domain(struct dmar_domain *domain)
3912{
3913 unsigned long flags;
3914 struct dmar_drhd_unit *drhd;
3915 struct intel_iommu *iommu;
3916 unsigned long i;
3917 unsigned long ndomains;
3918
3919 for_each_drhd_unit(drhd) {
3920 if (drhd->ignored)
3921 continue;
3922 iommu = drhd->iommu;
3923
3924 ndomains = cap_ndoms(iommu->cap);
3925 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3926 if (iommu->domains[i] == domain) {
3927 spin_lock_irqsave(&iommu->lock, flags);
3928 clear_bit(i, iommu->domain_ids);
3929 iommu->domains[i] = NULL;
3930 spin_unlock_irqrestore(&iommu->lock, flags);
3931 break;
3932 }
3933 }
3934 }
3935}
3936
3937static void vm_domain_exit(struct dmar_domain *domain)
3938{
3939
3940 if (!domain)
3941 return;
3942
3943 vm_domain_remove_all_dev_info(domain);
3944
3945 put_iova_domain(&domain->iovad);
3946
3947
3948 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3949
3950
3951 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3952
3953 iommu_free_vm_domain(domain);
3954 free_domain_mem(domain);
3955}
3956
3957static int intel_iommu_domain_init(struct iommu_domain *domain)
3958{
3959 struct dmar_domain *dmar_domain;
3960
3961 dmar_domain = iommu_alloc_vm_domain();
3962 if (!dmar_domain) {
3963 printk(KERN_ERR
3964 "intel_iommu_domain_init: dmar_domain == NULL\n");
3965 return -ENOMEM;
3966 }
3967 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3968 printk(KERN_ERR
3969 "intel_iommu_domain_init() failed\n");
3970 vm_domain_exit(dmar_domain);
3971 return -ENOMEM;
3972 }
3973 domain_update_iommu_cap(dmar_domain);
3974 domain->priv = dmar_domain;
3975
3976 domain->geometry.aperture_start = 0;
3977 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3978 domain->geometry.force_aperture = true;
3979
3980 return 0;
3981}
3982
3983static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3984{
3985 struct dmar_domain *dmar_domain = domain->priv;
3986
3987 domain->priv = NULL;
3988 vm_domain_exit(dmar_domain);
3989}
3990
3991static int intel_iommu_attach_device(struct iommu_domain *domain,
3992 struct device *dev)
3993{
3994 struct dmar_domain *dmar_domain = domain->priv;
3995 struct pci_dev *pdev = to_pci_dev(dev);
3996 struct intel_iommu *iommu;
3997 int addr_width;
3998
3999
4000 if (unlikely(domain_context_mapped(pdev))) {
4001 struct dmar_domain *old_domain;
4002
4003 old_domain = find_domain(pdev);
4004 if (old_domain) {
4005 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4006 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4007 domain_remove_one_dev_info(old_domain, pdev);
4008 else
4009 domain_remove_dev_info(old_domain);
4010 }
4011 }
4012
4013 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4014 pdev->devfn);
4015 if (!iommu)
4016 return -ENODEV;
4017
4018
4019 addr_width = agaw_to_width(iommu->agaw);
4020 if (addr_width > cap_mgaw(iommu->cap))
4021 addr_width = cap_mgaw(iommu->cap);
4022
4023 if (dmar_domain->max_addr > (1LL << addr_width)) {
4024 printk(KERN_ERR "%s: iommu width (%d) is not "
4025 "sufficient for the mapped address (%llx)\n",
4026 __func__, addr_width, dmar_domain->max_addr);
4027 return -EFAULT;
4028 }
4029 dmar_domain->gaw = addr_width;
4030
4031
4032
4033
4034 while (iommu->agaw < dmar_domain->agaw) {
4035 struct dma_pte *pte;
4036
4037 pte = dmar_domain->pgd;
4038 if (dma_pte_present(pte)) {
4039 dmar_domain->pgd = (struct dma_pte *)
4040 phys_to_virt(dma_pte_addr(pte));
4041 free_pgtable_page(pte);
4042 }
4043 dmar_domain->agaw--;
4044 }
4045
4046 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4047}
4048
4049static void intel_iommu_detach_device(struct iommu_domain *domain,
4050 struct device *dev)
4051{
4052 struct dmar_domain *dmar_domain = domain->priv;
4053 struct pci_dev *pdev = to_pci_dev(dev);
4054
4055 domain_remove_one_dev_info(dmar_domain, pdev);
4056}
4057
4058static int intel_iommu_map(struct iommu_domain *domain,
4059 unsigned long iova, phys_addr_t hpa,
4060 size_t size, int iommu_prot)
4061{
4062 struct dmar_domain *dmar_domain = domain->priv;
4063 u64 max_addr;
4064 int prot = 0;
4065 int ret;
4066
4067 if (iommu_prot & IOMMU_READ)
4068 prot |= DMA_PTE_READ;
4069 if (iommu_prot & IOMMU_WRITE)
4070 prot |= DMA_PTE_WRITE;
4071 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4072 prot |= DMA_PTE_SNP;
4073
4074 max_addr = iova + size;
4075 if (dmar_domain->max_addr < max_addr) {
4076 u64 end;
4077
4078
4079 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4080 if (end < max_addr) {
4081 printk(KERN_ERR "%s: iommu width (%d) is not "
4082 "sufficient for the mapped address (%llx)\n",
4083 __func__, dmar_domain->gaw, max_addr);
4084 return -EFAULT;
4085 }
4086 dmar_domain->max_addr = max_addr;
4087 }
4088
4089
4090 size = aligned_nrpages(hpa, size);
4091 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4092 hpa >> VTD_PAGE_SHIFT, size, prot);
4093 return ret;
4094}
4095
4096static size_t intel_iommu_unmap(struct iommu_domain *domain,
4097 unsigned long iova, size_t size)
4098{
4099 struct dmar_domain *dmar_domain = domain->priv;
4100 int order;
4101
4102 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4103 (iova + size - 1) >> VTD_PAGE_SHIFT);
4104
4105 if (dmar_domain->max_addr == iova + size)
4106 dmar_domain->max_addr = iova;
4107
4108 return PAGE_SIZE << order;
4109}
4110
4111static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4112 unsigned long iova)
4113{
4114 struct dmar_domain *dmar_domain = domain->priv;
4115 struct dma_pte *pte;
4116 u64 phys = 0;
4117
4118 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4119 if (pte)
4120 phys = dma_pte_addr(pte);
4121
4122 return phys;
4123}
4124
4125static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4126 unsigned long cap)
4127{
4128 struct dmar_domain *dmar_domain = domain->priv;
4129
4130 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4131 return dmar_domain->iommu_snooping;
4132 if (cap == IOMMU_CAP_INTR_REMAP)
4133 return irq_remapping_enabled;
4134
4135 return 0;
4136}
4137
4138static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
4139{
4140 pci_dev_put(*from);
4141 *from = to;
4142}
4143
4144#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4145
4146static int intel_iommu_add_device(struct device *dev)
4147{
4148 struct pci_dev *pdev = to_pci_dev(dev);
4149 struct pci_dev *bridge, *dma_pdev = NULL;
4150 struct iommu_group *group;
4151 int ret;
4152
4153 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4154 pdev->bus->number, pdev->devfn))
4155 return -ENODEV;
4156
4157 bridge = pci_find_upstream_pcie_bridge(pdev);
4158 if (bridge) {
4159 if (pci_is_pcie(bridge))
4160 dma_pdev = pci_get_domain_bus_and_slot(
4161 pci_domain_nr(pdev->bus),
4162 bridge->subordinate->number, 0);
4163 if (!dma_pdev)
4164 dma_pdev = pci_dev_get(bridge);
4165 } else
4166 dma_pdev = pci_dev_get(pdev);
4167
4168
4169 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4170
4171
4172
4173
4174
4175 if (dma_pdev->multifunction &&
4176 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
4177 swap_pci_ref(&dma_pdev,
4178 pci_get_slot(dma_pdev->bus,
4179 PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
4180 0)));
4181
4182
4183
4184
4185
4186
4187 while (!pci_is_root_bus(dma_pdev->bus)) {
4188 struct pci_bus *bus = dma_pdev->bus;
4189
4190 while (!bus->self) {
4191 if (!pci_is_root_bus(bus))
4192 bus = bus->parent;
4193 else
4194 goto root_bus;
4195 }
4196
4197 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4198 break;
4199
4200 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4201 }
4202
4203root_bus:
4204 group = iommu_group_get(&dma_pdev->dev);
4205 pci_dev_put(dma_pdev);
4206 if (!group) {
4207 group = iommu_group_alloc();
4208 if (IS_ERR(group))
4209 return PTR_ERR(group);
4210 }
4211
4212 ret = iommu_group_add_device(group, dev);
4213
4214 iommu_group_put(group);
4215 return ret;
4216}
4217
4218static void intel_iommu_remove_device(struct device *dev)
4219{
4220 iommu_group_remove_device(dev);
4221}
4222
4223static struct iommu_ops intel_iommu_ops = {
4224 .domain_init = intel_iommu_domain_init,
4225 .domain_destroy = intel_iommu_domain_destroy,
4226 .attach_dev = intel_iommu_attach_device,
4227 .detach_dev = intel_iommu_detach_device,
4228 .map = intel_iommu_map,
4229 .unmap = intel_iommu_unmap,
4230 .iova_to_phys = intel_iommu_iova_to_phys,
4231 .domain_has_cap = intel_iommu_domain_has_cap,
4232 .add_device = intel_iommu_add_device,
4233 .remove_device = intel_iommu_remove_device,
4234 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4235};
4236
4237static void __devinit quirk_iommu_g4x_gfx(struct pci_dev *dev)
4238{
4239
4240 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4241 dmar_map_gfx = 0;
4242}
4243
4244DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4245DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4246DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4247DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4248DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4249DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4250DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4251
4252static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4253{
4254
4255
4256
4257
4258 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4259 rwbf_quirk = 1;
4260}
4261
4262DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4263
4264#define GGC 0x52
4265#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4266#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4267#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4268#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4269#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4270#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4271#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4272#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4273
4274static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4275{
4276 unsigned short ggc;
4277
4278 if (pci_read_config_word(dev, GGC, &ggc))
4279 return;
4280
4281 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4282 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4283 dmar_map_gfx = 0;
4284 } else if (dmar_map_gfx) {
4285
4286 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4287 intel_iommu_strict = 1;
4288 }
4289}
4290DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4291DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4292DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4293DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4294
4295
4296
4297
4298
4299
4300
4301
4302static void __init check_tylersburg_isoch(void)
4303{
4304 struct pci_dev *pdev;
4305 uint32_t vtisochctrl;
4306
4307
4308 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4309 if (!pdev)
4310 return;
4311 pci_dev_put(pdev);
4312
4313
4314
4315
4316 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4317 if (!pdev)
4318 return;
4319
4320 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4321 pci_dev_put(pdev);
4322 return;
4323 }
4324
4325 pci_dev_put(pdev);
4326
4327
4328 if (vtisochctrl & 1)
4329 return;
4330
4331
4332 vtisochctrl &= 0x1c;
4333
4334
4335 if (vtisochctrl == 0x10)
4336 return;
4337
4338
4339 if (!vtisochctrl) {
4340 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4341 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4342 dmi_get_system_info(DMI_BIOS_VENDOR),
4343 dmi_get_system_info(DMI_BIOS_VERSION),
4344 dmi_get_system_info(DMI_PRODUCT_VERSION));
4345 iommu_identity_mapping |= IDENTMAP_AZALIA;
4346 return;
4347 }
4348
4349 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4350 vtisochctrl);
4351}
4352