1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/export.h>
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
36#include <linux/timer.h>
37#include <linux/iova.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/syscore_ops.h>
41#include <linux/tboot.h>
42#include <linux/dmi.h>
43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
45#include <asm/cacheflush.h>
46#include <asm/iommu.h>
47
48#define ROOT_SIZE VTD_PAGE_SIZE
49#define CONTEXT_SIZE VTD_PAGE_SIZE
50
51#define IS_BRIDGE_HOST_DEVICE(pdev) \
52 ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
53#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
54#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
55#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
56
57#define IOAPIC_RANGE_START (0xfee00000)
58#define IOAPIC_RANGE_END (0xfeefffff)
59#define IOVA_START_ADDR (0x1000)
60
61#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
62
63#define MAX_AGAW_WIDTH 64
64
65#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68
69
70#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
75#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
76#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
77
78
79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
100static inline int agaw_to_level(int agaw)
101{
102 return agaw + 2;
103}
104
105static inline int agaw_to_width(int agaw)
106{
107 return 30 + agaw * LEVEL_STRIDE;
108}
109
110static inline int width_to_agaw(int width)
111{
112 return (width - 30) / LEVEL_STRIDE;
113}
114
115static inline unsigned int level_to_offset_bits(int level)
116{
117 return (level - 1) * LEVEL_STRIDE;
118}
119
120static inline int pfn_level_offset(unsigned long pfn, int level)
121{
122 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123}
124
125static inline unsigned long level_mask(int level)
126{
127 return -1UL << level_to_offset_bits(level);
128}
129
130static inline unsigned long level_size(int level)
131{
132 return 1UL << level_to_offset_bits(level);
133}
134
135static inline unsigned long align_to_level(unsigned long pfn, int level)
136{
137 return (pfn + level_size(level) - 1) & level_mask(level);
138}
139
140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141{
142 return 1 << ((lvl - 1) * LEVEL_STRIDE);
143}
144
145
146
147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148{
149 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150}
151
152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153{
154 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155}
156static inline unsigned long page_to_dma_pfn(struct page *pg)
157{
158 return mm_to_dma_pfn(page_to_pfn(pg));
159}
160static inline unsigned long virt_to_dma_pfn(void *p)
161{
162 return page_to_dma_pfn(virt_to_page(p));
163}
164
165
166static struct intel_iommu **g_iommus;
167
168static void __init check_tylersburg_isoch(void);
169static int rwbf_quirk;
170
171
172
173
174
175static int force_on = 0;
176
177
178
179
180
181
182
183struct root_entry {
184 u64 val;
185 u64 rsvd1;
186};
187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188static inline bool root_present(struct root_entry *root)
189{
190 return (root->val & 1);
191}
192static inline void set_root_present(struct root_entry *root)
193{
194 root->val |= 1;
195}
196static inline void set_root_value(struct root_entry *root, unsigned long value)
197{
198 root->val |= value & VTD_PAGE_MASK;
199}
200
201static inline struct context_entry *
202get_context_addr_from_root(struct root_entry *root)
203{
204 return (struct context_entry *)
205 (root_present(root)?phys_to_virt(
206 root->val & VTD_PAGE_MASK) :
207 NULL);
208}
209
210
211
212
213
214
215
216
217
218
219
220
221struct context_entry {
222 u64 lo;
223 u64 hi;
224};
225
226static inline bool context_present(struct context_entry *context)
227{
228 return (context->lo & 1);
229}
230static inline void context_set_present(struct context_entry *context)
231{
232 context->lo |= 1;
233}
234
235static inline void context_set_fault_enable(struct context_entry *context)
236{
237 context->lo &= (((u64)-1) << 2) | 1;
238}
239
240static inline void context_set_translation_type(struct context_entry *context,
241 unsigned long value)
242{
243 context->lo &= (((u64)-1) << 4) | 3;
244 context->lo |= (value & 3) << 2;
245}
246
247static inline void context_set_address_root(struct context_entry *context,
248 unsigned long value)
249{
250 context->lo |= value & VTD_PAGE_MASK;
251}
252
253static inline void context_set_address_width(struct context_entry *context,
254 unsigned long value)
255{
256 context->hi |= value & 7;
257}
258
259static inline void context_set_domain_id(struct context_entry *context,
260 unsigned long value)
261{
262 context->hi |= (value & ((1 << 16) - 1)) << 8;
263}
264
265static inline void context_clear_entry(struct context_entry *context)
266{
267 context->lo = 0;
268 context->hi = 0;
269}
270
271
272
273
274
275
276
277
278
279
280struct dma_pte {
281 u64 val;
282};
283
284static inline void dma_clear_pte(struct dma_pte *pte)
285{
286 pte->val = 0;
287}
288
289static inline void dma_set_pte_readable(struct dma_pte *pte)
290{
291 pte->val |= DMA_PTE_READ;
292}
293
294static inline void dma_set_pte_writable(struct dma_pte *pte)
295{
296 pte->val |= DMA_PTE_WRITE;
297}
298
299static inline void dma_set_pte_snp(struct dma_pte *pte)
300{
301 pte->val |= DMA_PTE_SNP;
302}
303
304static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
305{
306 pte->val = (pte->val & ~3) | (prot & 3);
307}
308
309static inline u64 dma_pte_addr(struct dma_pte *pte)
310{
311#ifdef CONFIG_64BIT
312 return pte->val & VTD_PAGE_MASK;
313#else
314
315 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
316#endif
317}
318
319static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
320{
321 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
322}
323
324static inline bool dma_pte_present(struct dma_pte *pte)
325{
326 return (pte->val & 3) != 0;
327}
328
329static inline bool dma_pte_superpage(struct dma_pte *pte)
330{
331 return (pte->val & (1 << 7));
332}
333
334static inline int first_pte_in_page(struct dma_pte *pte)
335{
336 return !((unsigned long)pte & ~VTD_PAGE_MASK);
337}
338
339
340
341
342
343
344
345static struct dmar_domain *si_domain;
346static int hw_pass_through = 1;
347
348
349#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
350
351
352
353
354#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
355
356
357#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
358
359struct dmar_domain {
360 int id;
361 int nid;
362 unsigned long iommu_bmp;
363
364 struct list_head devices;
365 struct iova_domain iovad;
366
367 struct dma_pte *pgd;
368 int gaw;
369
370
371 int agaw;
372
373 int flags;
374
375 int iommu_coherency;
376 int iommu_snooping;
377 int iommu_count;
378 int iommu_superpage;
379
380
381 spinlock_t iommu_lock;
382 u64 max_addr;
383};
384
385
386struct device_domain_info {
387 struct list_head link;
388 struct list_head global;
389 int segment;
390 u8 bus;
391 u8 devfn;
392 struct pci_dev *dev;
393 struct intel_iommu *iommu;
394 struct dmar_domain *domain;
395};
396
397static void flush_unmaps_timeout(unsigned long data);
398
399DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
400
401#define HIGH_WATER_MARK 250
402struct deferred_flush_tables {
403 int next;
404 struct iova *iova[HIGH_WATER_MARK];
405 struct dmar_domain *domain[HIGH_WATER_MARK];
406};
407
408static struct deferred_flush_tables *deferred_flush;
409
410
411static int g_num_of_iommus;
412
413static DEFINE_SPINLOCK(async_umap_flush_lock);
414static LIST_HEAD(unmaps_to_do);
415
416static int timer_on;
417static long list_size;
418
419static void domain_remove_dev_info(struct dmar_domain *domain);
420
421#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
422int dmar_disabled = 0;
423#else
424int dmar_disabled = 1;
425#endif
426
427int intel_iommu_enabled = 0;
428EXPORT_SYMBOL_GPL(intel_iommu_enabled);
429
430static int dmar_map_gfx = 1;
431static int dmar_forcedac;
432static int intel_iommu_strict;
433static int intel_iommu_superpage = 1;
434
435int intel_iommu_gfx_mapped;
436EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
437
438#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
439static DEFINE_SPINLOCK(device_domain_lock);
440static LIST_HEAD(device_domain_list);
441
442static struct iommu_ops intel_iommu_ops;
443
444static int __init intel_iommu_setup(char *str)
445{
446 if (!str)
447 return -EINVAL;
448 while (*str) {
449 if (!strncmp(str, "on", 2)) {
450 dmar_disabled = 0;
451 printk(KERN_INFO "Intel-IOMMU: enabled\n");
452 } else if (!strncmp(str, "off", 3)) {
453 dmar_disabled = 1;
454 printk(KERN_INFO "Intel-IOMMU: disabled\n");
455 } else if (!strncmp(str, "igfx_off", 8)) {
456 dmar_map_gfx = 0;
457 printk(KERN_INFO
458 "Intel-IOMMU: disable GFX device mapping\n");
459 } else if (!strncmp(str, "forcedac", 8)) {
460 printk(KERN_INFO
461 "Intel-IOMMU: Forcing DAC for PCI devices\n");
462 dmar_forcedac = 1;
463 } else if (!strncmp(str, "strict", 6)) {
464 printk(KERN_INFO
465 "Intel-IOMMU: disable batched IOTLB flush\n");
466 intel_iommu_strict = 1;
467 } else if (!strncmp(str, "sp_off", 6)) {
468 printk(KERN_INFO
469 "Intel-IOMMU: disable supported super page\n");
470 intel_iommu_superpage = 0;
471 }
472
473 str += strcspn(str, ",");
474 while (*str == ',')
475 str++;
476 }
477 return 0;
478}
479__setup("intel_iommu=", intel_iommu_setup);
480
481static struct kmem_cache *iommu_domain_cache;
482static struct kmem_cache *iommu_devinfo_cache;
483static struct kmem_cache *iommu_iova_cache;
484
485static inline void *alloc_pgtable_page(int node)
486{
487 struct page *page;
488 void *vaddr = NULL;
489
490 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
491 if (page)
492 vaddr = page_address(page);
493 return vaddr;
494}
495
496static inline void free_pgtable_page(void *vaddr)
497{
498 free_page((unsigned long)vaddr);
499}
500
501static inline void *alloc_domain_mem(void)
502{
503 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
504}
505
506static void free_domain_mem(void *vaddr)
507{
508 kmem_cache_free(iommu_domain_cache, vaddr);
509}
510
511static inline void * alloc_devinfo_mem(void)
512{
513 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
514}
515
516static inline void free_devinfo_mem(void *vaddr)
517{
518 kmem_cache_free(iommu_devinfo_cache, vaddr);
519}
520
521struct iova *alloc_iova_mem(void)
522{
523 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
524}
525
526void free_iova_mem(struct iova *iova)
527{
528 kmem_cache_free(iommu_iova_cache, iova);
529}
530
531
532static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
533{
534 unsigned long sagaw;
535 int agaw = -1;
536
537 sagaw = cap_sagaw(iommu->cap);
538 for (agaw = width_to_agaw(max_gaw);
539 agaw >= 0; agaw--) {
540 if (test_bit(agaw, &sagaw))
541 break;
542 }
543
544 return agaw;
545}
546
547
548
549
550int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
551{
552 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
553}
554
555
556
557
558
559
560int iommu_calculate_agaw(struct intel_iommu *iommu)
561{
562 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
563}
564
565
566static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
567{
568 int iommu_id;
569
570
571 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
572 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
573
574 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
575 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
576 return NULL;
577
578 return g_iommus[iommu_id];
579}
580
581static void domain_update_iommu_coherency(struct dmar_domain *domain)
582{
583 int i;
584
585 domain->iommu_coherency = 1;
586
587 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
588 if (!ecap_coherent(g_iommus[i]->ecap)) {
589 domain->iommu_coherency = 0;
590 break;
591 }
592 }
593}
594
595static void domain_update_iommu_snooping(struct dmar_domain *domain)
596{
597 int i;
598
599 domain->iommu_snooping = 1;
600
601 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
602 if (!ecap_sc_support(g_iommus[i]->ecap)) {
603 domain->iommu_snooping = 0;
604 break;
605 }
606 }
607}
608
609static void domain_update_iommu_superpage(struct dmar_domain *domain)
610{
611 struct dmar_drhd_unit *drhd;
612 struct intel_iommu *iommu = NULL;
613 int mask = 0xf;
614
615 if (!intel_iommu_superpage) {
616 domain->iommu_superpage = 0;
617 return;
618 }
619
620
621 for_each_active_iommu(iommu, drhd) {
622 mask &= cap_super_page_val(iommu->cap);
623 if (!mask) {
624 break;
625 }
626 }
627 domain->iommu_superpage = fls(mask);
628}
629
630
631static void domain_update_iommu_cap(struct dmar_domain *domain)
632{
633 domain_update_iommu_coherency(domain);
634 domain_update_iommu_snooping(domain);
635 domain_update_iommu_superpage(domain);
636}
637
638static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
639{
640 struct dmar_drhd_unit *drhd = NULL;
641 int i;
642
643 for_each_drhd_unit(drhd) {
644 if (drhd->ignored)
645 continue;
646 if (segment != drhd->segment)
647 continue;
648
649 for (i = 0; i < drhd->devices_cnt; i++) {
650 if (drhd->devices[i] &&
651 drhd->devices[i]->bus->number == bus &&
652 drhd->devices[i]->devfn == devfn)
653 return drhd->iommu;
654 if (drhd->devices[i] &&
655 drhd->devices[i]->subordinate &&
656 drhd->devices[i]->subordinate->number <= bus &&
657 drhd->devices[i]->subordinate->subordinate >= bus)
658 return drhd->iommu;
659 }
660
661 if (drhd->include_all)
662 return drhd->iommu;
663 }
664
665 return NULL;
666}
667
668static void domain_flush_cache(struct dmar_domain *domain,
669 void *addr, int size)
670{
671 if (!domain->iommu_coherency)
672 clflush_cache_range(addr, size);
673}
674
675
676static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
677 u8 bus, u8 devfn)
678{
679 struct root_entry *root;
680 struct context_entry *context;
681 unsigned long phy_addr;
682 unsigned long flags;
683
684 spin_lock_irqsave(&iommu->lock, flags);
685 root = &iommu->root_entry[bus];
686 context = get_context_addr_from_root(root);
687 if (!context) {
688 context = (struct context_entry *)
689 alloc_pgtable_page(iommu->node);
690 if (!context) {
691 spin_unlock_irqrestore(&iommu->lock, flags);
692 return NULL;
693 }
694 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
695 phy_addr = virt_to_phys((void *)context);
696 set_root_value(root, phy_addr);
697 set_root_present(root);
698 __iommu_flush_cache(iommu, root, sizeof(*root));
699 }
700 spin_unlock_irqrestore(&iommu->lock, flags);
701 return &context[devfn];
702}
703
704static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
705{
706 struct root_entry *root;
707 struct context_entry *context;
708 int ret;
709 unsigned long flags;
710
711 spin_lock_irqsave(&iommu->lock, flags);
712 root = &iommu->root_entry[bus];
713 context = get_context_addr_from_root(root);
714 if (!context) {
715 ret = 0;
716 goto out;
717 }
718 ret = context_present(&context[devfn]);
719out:
720 spin_unlock_irqrestore(&iommu->lock, flags);
721 return ret;
722}
723
724static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
725{
726 struct root_entry *root;
727 struct context_entry *context;
728 unsigned long flags;
729
730 spin_lock_irqsave(&iommu->lock, flags);
731 root = &iommu->root_entry[bus];
732 context = get_context_addr_from_root(root);
733 if (context) {
734 context_clear_entry(&context[devfn]);
735 __iommu_flush_cache(iommu, &context[devfn], \
736 sizeof(*context));
737 }
738 spin_unlock_irqrestore(&iommu->lock, flags);
739}
740
741static void free_context_table(struct intel_iommu *iommu)
742{
743 struct root_entry *root;
744 int i;
745 unsigned long flags;
746 struct context_entry *context;
747
748 spin_lock_irqsave(&iommu->lock, flags);
749 if (!iommu->root_entry) {
750 goto out;
751 }
752 for (i = 0; i < ROOT_ENTRY_NR; i++) {
753 root = &iommu->root_entry[i];
754 context = get_context_addr_from_root(root);
755 if (context)
756 free_pgtable_page(context);
757 }
758 free_pgtable_page(iommu->root_entry);
759 iommu->root_entry = NULL;
760out:
761 spin_unlock_irqrestore(&iommu->lock, flags);
762}
763
764static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
765 unsigned long pfn, int target_level)
766{
767 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
768 struct dma_pte *parent, *pte = NULL;
769 int level = agaw_to_level(domain->agaw);
770 int offset;
771
772 BUG_ON(!domain->pgd);
773 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
774 parent = domain->pgd;
775
776 while (level > 0) {
777 void *tmp_page;
778
779 offset = pfn_level_offset(pfn, level);
780 pte = &parent[offset];
781 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
782 break;
783 if (level == target_level)
784 break;
785
786 if (!dma_pte_present(pte)) {
787 uint64_t pteval;
788
789 tmp_page = alloc_pgtable_page(domain->nid);
790
791 if (!tmp_page)
792 return NULL;
793
794 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
795 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
796 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
797
798 free_pgtable_page(tmp_page);
799 } else {
800 dma_pte_addr(pte);
801 domain_flush_cache(domain, pte, sizeof(*pte));
802 }
803 }
804 parent = phys_to_virt(dma_pte_addr(pte));
805 level--;
806 }
807
808 return pte;
809}
810
811
812
813static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
814 unsigned long pfn,
815 int level, int *large_page)
816{
817 struct dma_pte *parent, *pte = NULL;
818 int total = agaw_to_level(domain->agaw);
819 int offset;
820
821 parent = domain->pgd;
822 while (level <= total) {
823 offset = pfn_level_offset(pfn, total);
824 pte = &parent[offset];
825 if (level == total)
826 return pte;
827
828 if (!dma_pte_present(pte)) {
829 *large_page = total;
830 break;
831 }
832
833 if (pte->val & DMA_PTE_LARGE_PAGE) {
834 *large_page = total;
835 return pte;
836 }
837
838 parent = phys_to_virt(dma_pte_addr(pte));
839 total--;
840 }
841 return NULL;
842}
843
844
845static int dma_pte_clear_range(struct dmar_domain *domain,
846 unsigned long start_pfn,
847 unsigned long last_pfn)
848{
849 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
850 unsigned int large_page = 1;
851 struct dma_pte *first_pte, *pte;
852 int order;
853
854 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
855 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
856 BUG_ON(start_pfn > last_pfn);
857
858
859 do {
860 large_page = 1;
861 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
862 if (!pte) {
863 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
864 continue;
865 }
866 do {
867 dma_clear_pte(pte);
868 start_pfn += lvl_to_nr_pages(large_page);
869 pte++;
870 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
871
872 domain_flush_cache(domain, first_pte,
873 (void *)pte - (void *)first_pte);
874
875 } while (start_pfn && start_pfn <= last_pfn);
876
877 order = (large_page - 1) * 9;
878 return order;
879}
880
881
882static void dma_pte_free_pagetable(struct dmar_domain *domain,
883 unsigned long start_pfn,
884 unsigned long last_pfn)
885{
886 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
887 struct dma_pte *first_pte, *pte;
888 int total = agaw_to_level(domain->agaw);
889 int level;
890 unsigned long tmp;
891 int large_page = 2;
892
893 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
894 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
895 BUG_ON(start_pfn > last_pfn);
896
897
898 level = 2;
899 while (level <= total) {
900 tmp = align_to_level(start_pfn, level);
901
902
903 if (tmp + level_size(level) - 1 > last_pfn)
904 return;
905
906 do {
907 large_page = level;
908 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
909 if (large_page > level)
910 level = large_page + 1;
911 if (!pte) {
912 tmp = align_to_level(tmp + 1, level + 1);
913 continue;
914 }
915 do {
916 if (dma_pte_present(pte)) {
917 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
918 dma_clear_pte(pte);
919 }
920 pte++;
921 tmp += level_size(level);
922 } while (!first_pte_in_page(pte) &&
923 tmp + level_size(level) - 1 <= last_pfn);
924
925 domain_flush_cache(domain, first_pte,
926 (void *)pte - (void *)first_pte);
927
928 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
929 level++;
930 }
931
932 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
933 free_pgtable_page(domain->pgd);
934 domain->pgd = NULL;
935 }
936}
937
938
939static int iommu_alloc_root_entry(struct intel_iommu *iommu)
940{
941 struct root_entry *root;
942 unsigned long flags;
943
944 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
945 if (!root)
946 return -ENOMEM;
947
948 __iommu_flush_cache(iommu, root, ROOT_SIZE);
949
950 spin_lock_irqsave(&iommu->lock, flags);
951 iommu->root_entry = root;
952 spin_unlock_irqrestore(&iommu->lock, flags);
953
954 return 0;
955}
956
957static void iommu_set_root_entry(struct intel_iommu *iommu)
958{
959 void *addr;
960 u32 sts;
961 unsigned long flag;
962
963 addr = iommu->root_entry;
964
965 raw_spin_lock_irqsave(&iommu->register_lock, flag);
966 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
967
968 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
969
970
971 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
972 readl, (sts & DMA_GSTS_RTPS), sts);
973
974 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
975}
976
977static void iommu_flush_write_buffer(struct intel_iommu *iommu)
978{
979 u32 val;
980 unsigned long flag;
981
982 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
983 return;
984
985 raw_spin_lock_irqsave(&iommu->register_lock, flag);
986 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
987
988
989 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
990 readl, (!(val & DMA_GSTS_WBFS)), val);
991
992 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
993}
994
995
996static void __iommu_flush_context(struct intel_iommu *iommu,
997 u16 did, u16 source_id, u8 function_mask,
998 u64 type)
999{
1000 u64 val = 0;
1001 unsigned long flag;
1002
1003 switch (type) {
1004 case DMA_CCMD_GLOBAL_INVL:
1005 val = DMA_CCMD_GLOBAL_INVL;
1006 break;
1007 case DMA_CCMD_DOMAIN_INVL:
1008 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1009 break;
1010 case DMA_CCMD_DEVICE_INVL:
1011 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1012 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1013 break;
1014 default:
1015 BUG();
1016 }
1017 val |= DMA_CCMD_ICC;
1018
1019 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1020 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1021
1022
1023 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1024 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1025
1026 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1027}
1028
1029
1030static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1031 u64 addr, unsigned int size_order, u64 type)
1032{
1033 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1034 u64 val = 0, val_iva = 0;
1035 unsigned long flag;
1036
1037 switch (type) {
1038 case DMA_TLB_GLOBAL_FLUSH:
1039
1040 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1041 break;
1042 case DMA_TLB_DSI_FLUSH:
1043 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1044 break;
1045 case DMA_TLB_PSI_FLUSH:
1046 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1047
1048 val_iva = size_order | addr;
1049 break;
1050 default:
1051 BUG();
1052 }
1053
1054#if 0
1055
1056
1057
1058
1059 if (cap_read_drain(iommu->cap))
1060 val |= DMA_TLB_READ_DRAIN;
1061#endif
1062 if (cap_write_drain(iommu->cap))
1063 val |= DMA_TLB_WRITE_DRAIN;
1064
1065 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1066
1067 if (val_iva)
1068 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1069 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1070
1071
1072 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1073 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1074
1075 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1076
1077
1078 if (DMA_TLB_IAIG(val) == 0)
1079 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1080 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1081 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1082 (unsigned long long)DMA_TLB_IIRG(type),
1083 (unsigned long long)DMA_TLB_IAIG(val));
1084}
1085
1086static struct device_domain_info *iommu_support_dev_iotlb(
1087 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1088{
1089 int found = 0;
1090 unsigned long flags;
1091 struct device_domain_info *info;
1092 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1093
1094 if (!ecap_dev_iotlb_support(iommu->ecap))
1095 return NULL;
1096
1097 if (!iommu->qi)
1098 return NULL;
1099
1100 spin_lock_irqsave(&device_domain_lock, flags);
1101 list_for_each_entry(info, &domain->devices, link)
1102 if (info->bus == bus && info->devfn == devfn) {
1103 found = 1;
1104 break;
1105 }
1106 spin_unlock_irqrestore(&device_domain_lock, flags);
1107
1108 if (!found || !info->dev)
1109 return NULL;
1110
1111 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1112 return NULL;
1113
1114 if (!dmar_find_matched_atsr_unit(info->dev))
1115 return NULL;
1116
1117 info->iommu = iommu;
1118
1119 return info;
1120}
1121
1122static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1123{
1124 if (!info)
1125 return;
1126
1127 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1128}
1129
1130static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1131{
1132 if (!info->dev || !pci_ats_enabled(info->dev))
1133 return;
1134
1135 pci_disable_ats(info->dev);
1136}
1137
1138static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1139 u64 addr, unsigned mask)
1140{
1141 u16 sid, qdep;
1142 unsigned long flags;
1143 struct device_domain_info *info;
1144
1145 spin_lock_irqsave(&device_domain_lock, flags);
1146 list_for_each_entry(info, &domain->devices, link) {
1147 if (!info->dev || !pci_ats_enabled(info->dev))
1148 continue;
1149
1150 sid = info->bus << 8 | info->devfn;
1151 qdep = pci_ats_queue_depth(info->dev);
1152 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1153 }
1154 spin_unlock_irqrestore(&device_domain_lock, flags);
1155}
1156
1157static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1158 unsigned long pfn, unsigned int pages, int map)
1159{
1160 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1161 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1162
1163 BUG_ON(pages == 0);
1164
1165
1166
1167
1168
1169
1170
1171 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1172 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1173 DMA_TLB_DSI_FLUSH);
1174 else
1175 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1176 DMA_TLB_PSI_FLUSH);
1177
1178
1179
1180
1181
1182 if (!cap_caching_mode(iommu->cap) || !map)
1183 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1184}
1185
1186static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1187{
1188 u32 pmen;
1189 unsigned long flags;
1190
1191 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1192 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1193 pmen &= ~DMA_PMEN_EPM;
1194 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1195
1196
1197 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1198 readl, !(pmen & DMA_PMEN_PRS), pmen);
1199
1200 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1201}
1202
1203static int iommu_enable_translation(struct intel_iommu *iommu)
1204{
1205 u32 sts;
1206 unsigned long flags;
1207
1208 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1209 iommu->gcmd |= DMA_GCMD_TE;
1210 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1211
1212
1213 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1214 readl, (sts & DMA_GSTS_TES), sts);
1215
1216 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1217 return 0;
1218}
1219
1220static int iommu_disable_translation(struct intel_iommu *iommu)
1221{
1222 u32 sts;
1223 unsigned long flag;
1224
1225 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1226 iommu->gcmd &= ~DMA_GCMD_TE;
1227 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1228
1229
1230 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1231 readl, (!(sts & DMA_GSTS_TES)), sts);
1232
1233 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1234 return 0;
1235}
1236
1237
1238static int iommu_init_domains(struct intel_iommu *iommu)
1239{
1240 unsigned long ndomains;
1241 unsigned long nlongs;
1242
1243 ndomains = cap_ndoms(iommu->cap);
1244 pr_debug("IOMMU %d: Number of Domains supportd <%ld>\n", iommu->seq_id,
1245 ndomains);
1246 nlongs = BITS_TO_LONGS(ndomains);
1247
1248 spin_lock_init(&iommu->lock);
1249
1250
1251
1252
1253 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1254 if (!iommu->domain_ids) {
1255 printk(KERN_ERR "Allocating domain id array failed\n");
1256 return -ENOMEM;
1257 }
1258 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1259 GFP_KERNEL);
1260 if (!iommu->domains) {
1261 printk(KERN_ERR "Allocating domain array failed\n");
1262 return -ENOMEM;
1263 }
1264
1265
1266
1267
1268
1269 if (cap_caching_mode(iommu->cap))
1270 set_bit(0, iommu->domain_ids);
1271 return 0;
1272}
1273
1274
1275static void domain_exit(struct dmar_domain *domain);
1276static void vm_domain_exit(struct dmar_domain *domain);
1277
1278void free_dmar_iommu(struct intel_iommu *iommu)
1279{
1280 struct dmar_domain *domain;
1281 int i;
1282 unsigned long flags;
1283
1284 if ((iommu->domains) && (iommu->domain_ids)) {
1285 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1286 domain = iommu->domains[i];
1287 clear_bit(i, iommu->domain_ids);
1288
1289 spin_lock_irqsave(&domain->iommu_lock, flags);
1290 if (--domain->iommu_count == 0) {
1291 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1292 vm_domain_exit(domain);
1293 else
1294 domain_exit(domain);
1295 }
1296 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1297 }
1298 }
1299
1300 if (iommu->gcmd & DMA_GCMD_TE)
1301 iommu_disable_translation(iommu);
1302
1303 if (iommu->irq) {
1304 irq_set_handler_data(iommu->irq, NULL);
1305
1306 free_irq(iommu->irq, iommu);
1307 destroy_irq(iommu->irq);
1308 }
1309
1310 kfree(iommu->domains);
1311 kfree(iommu->domain_ids);
1312
1313 g_iommus[iommu->seq_id] = NULL;
1314
1315
1316 for (i = 0; i < g_num_of_iommus; i++) {
1317 if (g_iommus[i])
1318 break;
1319 }
1320
1321 if (i == g_num_of_iommus)
1322 kfree(g_iommus);
1323
1324
1325 free_context_table(iommu);
1326}
1327
1328static struct dmar_domain *alloc_domain(void)
1329{
1330 struct dmar_domain *domain;
1331
1332 domain = alloc_domain_mem();
1333 if (!domain)
1334 return NULL;
1335
1336 domain->nid = -1;
1337 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1338 domain->flags = 0;
1339
1340 return domain;
1341}
1342
1343static int iommu_attach_domain(struct dmar_domain *domain,
1344 struct intel_iommu *iommu)
1345{
1346 int num;
1347 unsigned long ndomains;
1348 unsigned long flags;
1349
1350 ndomains = cap_ndoms(iommu->cap);
1351
1352 spin_lock_irqsave(&iommu->lock, flags);
1353
1354 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1355 if (num >= ndomains) {
1356 spin_unlock_irqrestore(&iommu->lock, flags);
1357 printk(KERN_ERR "IOMMU: no free domain ids\n");
1358 return -ENOMEM;
1359 }
1360
1361 domain->id = num;
1362 set_bit(num, iommu->domain_ids);
1363 set_bit(iommu->seq_id, &domain->iommu_bmp);
1364 iommu->domains[num] = domain;
1365 spin_unlock_irqrestore(&iommu->lock, flags);
1366
1367 return 0;
1368}
1369
1370static void iommu_detach_domain(struct dmar_domain *domain,
1371 struct intel_iommu *iommu)
1372{
1373 unsigned long flags;
1374 int num, ndomains;
1375 int found = 0;
1376
1377 spin_lock_irqsave(&iommu->lock, flags);
1378 ndomains = cap_ndoms(iommu->cap);
1379 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1380 if (iommu->domains[num] == domain) {
1381 found = 1;
1382 break;
1383 }
1384 }
1385
1386 if (found) {
1387 clear_bit(num, iommu->domain_ids);
1388 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1389 iommu->domains[num] = NULL;
1390 }
1391 spin_unlock_irqrestore(&iommu->lock, flags);
1392}
1393
1394static struct iova_domain reserved_iova_list;
1395static struct lock_class_key reserved_rbtree_key;
1396
1397static int dmar_init_reserved_ranges(void)
1398{
1399 struct pci_dev *pdev = NULL;
1400 struct iova *iova;
1401 int i;
1402
1403 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1404
1405 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1406 &reserved_rbtree_key);
1407
1408
1409 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1410 IOVA_PFN(IOAPIC_RANGE_END));
1411 if (!iova) {
1412 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1413 return -ENODEV;
1414 }
1415
1416
1417 for_each_pci_dev(pdev) {
1418 struct resource *r;
1419
1420 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1421 r = &pdev->resource[i];
1422 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1423 continue;
1424 iova = reserve_iova(&reserved_iova_list,
1425 IOVA_PFN(r->start),
1426 IOVA_PFN(r->end));
1427 if (!iova) {
1428 printk(KERN_ERR "Reserve iova failed\n");
1429 return -ENODEV;
1430 }
1431 }
1432 }
1433 return 0;
1434}
1435
1436static void domain_reserve_special_ranges(struct dmar_domain *domain)
1437{
1438 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1439}
1440
1441static inline int guestwidth_to_adjustwidth(int gaw)
1442{
1443 int agaw;
1444 int r = (gaw - 12) % 9;
1445
1446 if (r == 0)
1447 agaw = gaw;
1448 else
1449 agaw = gaw + 9 - r;
1450 if (agaw > 64)
1451 agaw = 64;
1452 return agaw;
1453}
1454
1455static int domain_init(struct dmar_domain *domain, int guest_width)
1456{
1457 struct intel_iommu *iommu;
1458 int adjust_width, agaw;
1459 unsigned long sagaw;
1460
1461 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1462 spin_lock_init(&domain->iommu_lock);
1463
1464 domain_reserve_special_ranges(domain);
1465
1466
1467 iommu = domain_get_iommu(domain);
1468 if (guest_width > cap_mgaw(iommu->cap))
1469 guest_width = cap_mgaw(iommu->cap);
1470 domain->gaw = guest_width;
1471 adjust_width = guestwidth_to_adjustwidth(guest_width);
1472 agaw = width_to_agaw(adjust_width);
1473 sagaw = cap_sagaw(iommu->cap);
1474 if (!test_bit(agaw, &sagaw)) {
1475
1476 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1477 agaw = find_next_bit(&sagaw, 5, agaw);
1478 if (agaw >= 5)
1479 return -ENODEV;
1480 }
1481 domain->agaw = agaw;
1482 INIT_LIST_HEAD(&domain->devices);
1483
1484 if (ecap_coherent(iommu->ecap))
1485 domain->iommu_coherency = 1;
1486 else
1487 domain->iommu_coherency = 0;
1488
1489 if (ecap_sc_support(iommu->ecap))
1490 domain->iommu_snooping = 1;
1491 else
1492 domain->iommu_snooping = 0;
1493
1494 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1495 domain->iommu_count = 1;
1496 domain->nid = iommu->node;
1497
1498
1499 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1500 if (!domain->pgd)
1501 return -ENOMEM;
1502 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1503 return 0;
1504}
1505
1506static void domain_exit(struct dmar_domain *domain)
1507{
1508 struct dmar_drhd_unit *drhd;
1509 struct intel_iommu *iommu;
1510
1511
1512 if (!domain)
1513 return;
1514
1515
1516 if (!intel_iommu_strict)
1517 flush_unmaps_timeout(0);
1518
1519 domain_remove_dev_info(domain);
1520
1521 put_iova_domain(&domain->iovad);
1522
1523
1524 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1525
1526
1527 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1528
1529 for_each_active_iommu(iommu, drhd)
1530 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1531 iommu_detach_domain(domain, iommu);
1532
1533 free_domain_mem(domain);
1534}
1535
1536static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1537 u8 bus, u8 devfn, int translation)
1538{
1539 struct context_entry *context;
1540 unsigned long flags;
1541 struct intel_iommu *iommu;
1542 struct dma_pte *pgd;
1543 unsigned long num;
1544 unsigned long ndomains;
1545 int id;
1546 int agaw;
1547 struct device_domain_info *info = NULL;
1548
1549 pr_debug("Set context mapping for %02x:%02x.%d\n",
1550 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1551
1552 BUG_ON(!domain->pgd);
1553 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1554 translation != CONTEXT_TT_MULTI_LEVEL);
1555
1556 iommu = device_to_iommu(segment, bus, devfn);
1557 if (!iommu)
1558 return -ENODEV;
1559
1560 context = device_to_context_entry(iommu, bus, devfn);
1561 if (!context)
1562 return -ENOMEM;
1563 spin_lock_irqsave(&iommu->lock, flags);
1564 if (context_present(context)) {
1565 spin_unlock_irqrestore(&iommu->lock, flags);
1566 return 0;
1567 }
1568
1569 id = domain->id;
1570 pgd = domain->pgd;
1571
1572 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1573 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1574 int found = 0;
1575
1576
1577 ndomains = cap_ndoms(iommu->cap);
1578 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1579 if (iommu->domains[num] == domain) {
1580 id = num;
1581 found = 1;
1582 break;
1583 }
1584 }
1585
1586 if (found == 0) {
1587 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1588 if (num >= ndomains) {
1589 spin_unlock_irqrestore(&iommu->lock, flags);
1590 printk(KERN_ERR "IOMMU: no free domain ids\n");
1591 return -EFAULT;
1592 }
1593
1594 set_bit(num, iommu->domain_ids);
1595 iommu->domains[num] = domain;
1596 id = num;
1597 }
1598
1599
1600
1601
1602
1603 if (translation != CONTEXT_TT_PASS_THROUGH) {
1604 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1605 pgd = phys_to_virt(dma_pte_addr(pgd));
1606 if (!dma_pte_present(pgd)) {
1607 spin_unlock_irqrestore(&iommu->lock, flags);
1608 return -ENOMEM;
1609 }
1610 }
1611 }
1612 }
1613
1614 context_set_domain_id(context, id);
1615
1616 if (translation != CONTEXT_TT_PASS_THROUGH) {
1617 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1618 translation = info ? CONTEXT_TT_DEV_IOTLB :
1619 CONTEXT_TT_MULTI_LEVEL;
1620 }
1621
1622
1623
1624
1625 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1626 context_set_address_width(context, iommu->msagaw);
1627 else {
1628 context_set_address_root(context, virt_to_phys(pgd));
1629 context_set_address_width(context, iommu->agaw);
1630 }
1631
1632 context_set_translation_type(context, translation);
1633 context_set_fault_enable(context);
1634 context_set_present(context);
1635 domain_flush_cache(domain, context, sizeof(*context));
1636
1637
1638
1639
1640
1641
1642
1643 if (cap_caching_mode(iommu->cap)) {
1644 iommu->flush.flush_context(iommu, 0,
1645 (((u16)bus) << 8) | devfn,
1646 DMA_CCMD_MASK_NOBIT,
1647 DMA_CCMD_DEVICE_INVL);
1648 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1649 } else {
1650 iommu_flush_write_buffer(iommu);
1651 }
1652 iommu_enable_dev_iotlb(info);
1653 spin_unlock_irqrestore(&iommu->lock, flags);
1654
1655 spin_lock_irqsave(&domain->iommu_lock, flags);
1656 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1657 domain->iommu_count++;
1658 if (domain->iommu_count == 1)
1659 domain->nid = iommu->node;
1660 domain_update_iommu_cap(domain);
1661 }
1662 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1663 return 0;
1664}
1665
1666static int
1667domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1668 int translation)
1669{
1670 int ret;
1671 struct pci_dev *tmp, *parent;
1672
1673 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1674 pdev->bus->number, pdev->devfn,
1675 translation);
1676 if (ret)
1677 return ret;
1678
1679
1680 tmp = pci_find_upstream_pcie_bridge(pdev);
1681 if (!tmp)
1682 return 0;
1683
1684 parent = pdev->bus->self;
1685 while (parent != tmp) {
1686 ret = domain_context_mapping_one(domain,
1687 pci_domain_nr(parent->bus),
1688 parent->bus->number,
1689 parent->devfn, translation);
1690 if (ret)
1691 return ret;
1692 parent = parent->bus->self;
1693 }
1694 if (pci_is_pcie(tmp))
1695 return domain_context_mapping_one(domain,
1696 pci_domain_nr(tmp->subordinate),
1697 tmp->subordinate->number, 0,
1698 translation);
1699 else
1700 return domain_context_mapping_one(domain,
1701 pci_domain_nr(tmp->bus),
1702 tmp->bus->number,
1703 tmp->devfn,
1704 translation);
1705}
1706
1707static int domain_context_mapped(struct pci_dev *pdev)
1708{
1709 int ret;
1710 struct pci_dev *tmp, *parent;
1711 struct intel_iommu *iommu;
1712
1713 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1714 pdev->devfn);
1715 if (!iommu)
1716 return -ENODEV;
1717
1718 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1719 if (!ret)
1720 return ret;
1721
1722 tmp = pci_find_upstream_pcie_bridge(pdev);
1723 if (!tmp)
1724 return ret;
1725
1726 parent = pdev->bus->self;
1727 while (parent != tmp) {
1728 ret = device_context_mapped(iommu, parent->bus->number,
1729 parent->devfn);
1730 if (!ret)
1731 return ret;
1732 parent = parent->bus->self;
1733 }
1734 if (pci_is_pcie(tmp))
1735 return device_context_mapped(iommu, tmp->subordinate->number,
1736 0);
1737 else
1738 return device_context_mapped(iommu, tmp->bus->number,
1739 tmp->devfn);
1740}
1741
1742
1743static inline unsigned long aligned_nrpages(unsigned long host_addr,
1744 size_t size)
1745{
1746 host_addr &= ~PAGE_MASK;
1747 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1748}
1749
1750
1751static inline int hardware_largepage_caps(struct dmar_domain *domain,
1752 unsigned long iov_pfn,
1753 unsigned long phy_pfn,
1754 unsigned long pages)
1755{
1756 int support, level = 1;
1757 unsigned long pfnmerge;
1758
1759 support = domain->iommu_superpage;
1760
1761
1762
1763
1764
1765 pfnmerge = iov_pfn | phy_pfn;
1766
1767 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1768 pages >>= VTD_STRIDE_SHIFT;
1769 if (!pages)
1770 break;
1771 pfnmerge >>= VTD_STRIDE_SHIFT;
1772 level++;
1773 support--;
1774 }
1775 return level;
1776}
1777
1778static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1779 struct scatterlist *sg, unsigned long phys_pfn,
1780 unsigned long nr_pages, int prot)
1781{
1782 struct dma_pte *first_pte = NULL, *pte = NULL;
1783 phys_addr_t uninitialized_var(pteval);
1784 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1785 unsigned long sg_res;
1786 unsigned int largepage_lvl = 0;
1787 unsigned long lvl_pages = 0;
1788
1789 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1790
1791 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1792 return -EINVAL;
1793
1794 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1795
1796 if (sg)
1797 sg_res = 0;
1798 else {
1799 sg_res = nr_pages + 1;
1800 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1801 }
1802
1803 while (nr_pages > 0) {
1804 uint64_t tmp;
1805
1806 if (!sg_res) {
1807 sg_res = aligned_nrpages(sg->offset, sg->length);
1808 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1809 sg->dma_length = sg->length;
1810 pteval = page_to_phys(sg_page(sg)) | prot;
1811 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1812 }
1813
1814 if (!pte) {
1815 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1816
1817 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1818 if (!pte)
1819 return -ENOMEM;
1820
1821 if (largepage_lvl > 1)
1822 pteval |= DMA_PTE_LARGE_PAGE;
1823 else
1824 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1825
1826 }
1827
1828
1829
1830 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1831 if (tmp) {
1832 static int dumps = 5;
1833 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1834 iov_pfn, tmp, (unsigned long long)pteval);
1835 if (dumps) {
1836 dumps--;
1837 debug_dma_dump_mappings(NULL);
1838 }
1839 WARN_ON(1);
1840 }
1841
1842 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1843
1844 BUG_ON(nr_pages < lvl_pages);
1845 BUG_ON(sg_res < lvl_pages);
1846
1847 nr_pages -= lvl_pages;
1848 iov_pfn += lvl_pages;
1849 phys_pfn += lvl_pages;
1850 pteval += lvl_pages * VTD_PAGE_SIZE;
1851 sg_res -= lvl_pages;
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864 pte++;
1865 if (!nr_pages || first_pte_in_page(pte) ||
1866 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1867 domain_flush_cache(domain, first_pte,
1868 (void *)pte - (void *)first_pte);
1869 pte = NULL;
1870 }
1871
1872 if (!sg_res && nr_pages)
1873 sg = sg_next(sg);
1874 }
1875 return 0;
1876}
1877
1878static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1879 struct scatterlist *sg, unsigned long nr_pages,
1880 int prot)
1881{
1882 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1883}
1884
1885static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1886 unsigned long phys_pfn, unsigned long nr_pages,
1887 int prot)
1888{
1889 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1890}
1891
1892static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1893{
1894 if (!iommu)
1895 return;
1896
1897 clear_context_table(iommu, bus, devfn);
1898 iommu->flush.flush_context(iommu, 0, 0, 0,
1899 DMA_CCMD_GLOBAL_INVL);
1900 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1901}
1902
1903static void domain_remove_dev_info(struct dmar_domain *domain)
1904{
1905 struct device_domain_info *info;
1906 unsigned long flags;
1907 struct intel_iommu *iommu;
1908
1909 spin_lock_irqsave(&device_domain_lock, flags);
1910 while (!list_empty(&domain->devices)) {
1911 info = list_entry(domain->devices.next,
1912 struct device_domain_info, link);
1913 list_del(&info->link);
1914 list_del(&info->global);
1915 if (info->dev)
1916 info->dev->dev.archdata.iommu = NULL;
1917 spin_unlock_irqrestore(&device_domain_lock, flags);
1918
1919 iommu_disable_dev_iotlb(info);
1920 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1921 iommu_detach_dev(iommu, info->bus, info->devfn);
1922 free_devinfo_mem(info);
1923
1924 spin_lock_irqsave(&device_domain_lock, flags);
1925 }
1926 spin_unlock_irqrestore(&device_domain_lock, flags);
1927}
1928
1929
1930
1931
1932
1933static struct dmar_domain *
1934find_domain(struct pci_dev *pdev)
1935{
1936 struct device_domain_info *info;
1937
1938
1939 info = pdev->dev.archdata.iommu;
1940 if (info)
1941 return info->domain;
1942 return NULL;
1943}
1944
1945
1946static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1947{
1948 struct dmar_domain *domain, *found = NULL;
1949 struct intel_iommu *iommu;
1950 struct dmar_drhd_unit *drhd;
1951 struct device_domain_info *info, *tmp;
1952 struct pci_dev *dev_tmp;
1953 unsigned long flags;
1954 int bus = 0, devfn = 0;
1955 int segment;
1956 int ret;
1957
1958 domain = find_domain(pdev);
1959 if (domain)
1960 return domain;
1961
1962 segment = pci_domain_nr(pdev->bus);
1963
1964 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1965 if (dev_tmp) {
1966 if (pci_is_pcie(dev_tmp)) {
1967 bus = dev_tmp->subordinate->number;
1968 devfn = 0;
1969 } else {
1970 bus = dev_tmp->bus->number;
1971 devfn = dev_tmp->devfn;
1972 }
1973 spin_lock_irqsave(&device_domain_lock, flags);
1974 list_for_each_entry(info, &device_domain_list, global) {
1975 if (info->segment == segment &&
1976 info->bus == bus && info->devfn == devfn) {
1977 found = info->domain;
1978 break;
1979 }
1980 }
1981 spin_unlock_irqrestore(&device_domain_lock, flags);
1982
1983 if (found) {
1984 domain = found;
1985 goto found_domain;
1986 }
1987 }
1988
1989 domain = alloc_domain();
1990 if (!domain)
1991 goto error;
1992
1993
1994 drhd = dmar_find_matched_drhd_unit(pdev);
1995 if (!drhd) {
1996 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1997 pci_name(pdev));
1998 return NULL;
1999 }
2000 iommu = drhd->iommu;
2001
2002 ret = iommu_attach_domain(domain, iommu);
2003 if (ret) {
2004 free_domain_mem(domain);
2005 goto error;
2006 }
2007
2008 if (domain_init(domain, gaw)) {
2009 domain_exit(domain);
2010 goto error;
2011 }
2012
2013
2014 if (dev_tmp) {
2015 info = alloc_devinfo_mem();
2016 if (!info) {
2017 domain_exit(domain);
2018 goto error;
2019 }
2020 info->segment = segment;
2021 info->bus = bus;
2022 info->devfn = devfn;
2023 info->dev = NULL;
2024 info->domain = domain;
2025
2026 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2027
2028
2029 found = NULL;
2030 spin_lock_irqsave(&device_domain_lock, flags);
2031 list_for_each_entry(tmp, &device_domain_list, global) {
2032 if (tmp->segment == segment &&
2033 tmp->bus == bus && tmp->devfn == devfn) {
2034 found = tmp->domain;
2035 break;
2036 }
2037 }
2038 if (found) {
2039 spin_unlock_irqrestore(&device_domain_lock, flags);
2040 free_devinfo_mem(info);
2041 domain_exit(domain);
2042 domain = found;
2043 } else {
2044 list_add(&info->link, &domain->devices);
2045 list_add(&info->global, &device_domain_list);
2046 spin_unlock_irqrestore(&device_domain_lock, flags);
2047 }
2048 }
2049
2050found_domain:
2051 info = alloc_devinfo_mem();
2052 if (!info)
2053 goto error;
2054 info->segment = segment;
2055 info->bus = pdev->bus->number;
2056 info->devfn = pdev->devfn;
2057 info->dev = pdev;
2058 info->domain = domain;
2059 spin_lock_irqsave(&device_domain_lock, flags);
2060
2061 found = find_domain(pdev);
2062 if (found != NULL) {
2063 spin_unlock_irqrestore(&device_domain_lock, flags);
2064 if (found != domain) {
2065 domain_exit(domain);
2066 domain = found;
2067 }
2068 free_devinfo_mem(info);
2069 return domain;
2070 }
2071 list_add(&info->link, &domain->devices);
2072 list_add(&info->global, &device_domain_list);
2073 pdev->dev.archdata.iommu = info;
2074 spin_unlock_irqrestore(&device_domain_lock, flags);
2075 return domain;
2076error:
2077
2078 return find_domain(pdev);
2079}
2080
2081static int iommu_identity_mapping;
2082#define IDENTMAP_ALL 1
2083#define IDENTMAP_GFX 2
2084#define IDENTMAP_AZALIA 4
2085
2086static int iommu_domain_identity_map(struct dmar_domain *domain,
2087 unsigned long long start,
2088 unsigned long long end)
2089{
2090 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2091 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2092
2093 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2094 dma_to_mm_pfn(last_vpfn))) {
2095 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2096 return -ENOMEM;
2097 }
2098
2099 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2100 start, end, domain->id);
2101
2102
2103
2104
2105 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2106
2107 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2108 last_vpfn - first_vpfn + 1,
2109 DMA_PTE_READ|DMA_PTE_WRITE);
2110}
2111
2112static int iommu_prepare_identity_map(struct pci_dev *pdev,
2113 unsigned long long start,
2114 unsigned long long end)
2115{
2116 struct dmar_domain *domain;
2117 int ret;
2118
2119 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2120 if (!domain)
2121 return -ENOMEM;
2122
2123
2124
2125
2126
2127 if (domain == si_domain && hw_pass_through) {
2128 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2129 pci_name(pdev), start, end);
2130 return 0;
2131 }
2132
2133 printk(KERN_INFO
2134 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2135 pci_name(pdev), start, end);
2136
2137 if (end < start) {
2138 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2139 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2140 dmi_get_system_info(DMI_BIOS_VENDOR),
2141 dmi_get_system_info(DMI_BIOS_VERSION),
2142 dmi_get_system_info(DMI_PRODUCT_VERSION));
2143 ret = -EIO;
2144 goto error;
2145 }
2146
2147 if (end >> agaw_to_width(domain->agaw)) {
2148 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2149 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2150 agaw_to_width(domain->agaw),
2151 dmi_get_system_info(DMI_BIOS_VENDOR),
2152 dmi_get_system_info(DMI_BIOS_VERSION),
2153 dmi_get_system_info(DMI_PRODUCT_VERSION));
2154 ret = -EIO;
2155 goto error;
2156 }
2157
2158 ret = iommu_domain_identity_map(domain, start, end);
2159 if (ret)
2160 goto error;
2161
2162
2163 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2164 if (ret)
2165 goto error;
2166
2167 return 0;
2168
2169 error:
2170 domain_exit(domain);
2171 return ret;
2172}
2173
2174static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2175 struct pci_dev *pdev)
2176{
2177 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2178 return 0;
2179 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2180 rmrr->end_address);
2181}
2182
2183#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2184static inline void iommu_prepare_isa(void)
2185{
2186 struct pci_dev *pdev;
2187 int ret;
2188
2189 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2190 if (!pdev)
2191 return;
2192
2193 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2194 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2195
2196 if (ret)
2197 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2198 "floppy might not work\n");
2199
2200}
2201#else
2202static inline void iommu_prepare_isa(void)
2203{
2204 return;
2205}
2206#endif
2207
2208static int md_domain_init(struct dmar_domain *domain, int guest_width);
2209
2210static int __init si_domain_init(int hw)
2211{
2212 struct dmar_drhd_unit *drhd;
2213 struct intel_iommu *iommu;
2214 int nid, ret = 0;
2215
2216 si_domain = alloc_domain();
2217 if (!si_domain)
2218 return -EFAULT;
2219
2220 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2221
2222 for_each_active_iommu(iommu, drhd) {
2223 ret = iommu_attach_domain(si_domain, iommu);
2224 if (ret) {
2225 domain_exit(si_domain);
2226 return -EFAULT;
2227 }
2228 }
2229
2230 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2231 domain_exit(si_domain);
2232 return -EFAULT;
2233 }
2234
2235 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2236
2237 if (hw)
2238 return 0;
2239
2240 for_each_online_node(nid) {
2241 unsigned long start_pfn, end_pfn;
2242 int i;
2243
2244 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2245 ret = iommu_domain_identity_map(si_domain,
2246 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2247 if (ret)
2248 return ret;
2249 }
2250 }
2251
2252 return 0;
2253}
2254
2255static void domain_remove_one_dev_info(struct dmar_domain *domain,
2256 struct pci_dev *pdev);
2257static int identity_mapping(struct pci_dev *pdev)
2258{
2259 struct device_domain_info *info;
2260
2261 if (likely(!iommu_identity_mapping))
2262 return 0;
2263
2264 info = pdev->dev.archdata.iommu;
2265 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2266 return (info->domain == si_domain);
2267
2268 return 0;
2269}
2270
2271static int domain_add_dev_info(struct dmar_domain *domain,
2272 struct pci_dev *pdev,
2273 int translation)
2274{
2275 struct device_domain_info *info;
2276 unsigned long flags;
2277 int ret;
2278
2279 info = alloc_devinfo_mem();
2280 if (!info)
2281 return -ENOMEM;
2282
2283 ret = domain_context_mapping(domain, pdev, translation);
2284 if (ret) {
2285 free_devinfo_mem(info);
2286 return ret;
2287 }
2288
2289 info->segment = pci_domain_nr(pdev->bus);
2290 info->bus = pdev->bus->number;
2291 info->devfn = pdev->devfn;
2292 info->dev = pdev;
2293 info->domain = domain;
2294
2295 spin_lock_irqsave(&device_domain_lock, flags);
2296 list_add(&info->link, &domain->devices);
2297 list_add(&info->global, &device_domain_list);
2298 pdev->dev.archdata.iommu = info;
2299 spin_unlock_irqrestore(&device_domain_lock, flags);
2300
2301 return 0;
2302}
2303
2304static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2305{
2306 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2307 return 1;
2308
2309 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2310 return 1;
2311
2312 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2313 return 0;
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332 if (!pci_is_pcie(pdev)) {
2333 if (!pci_is_root_bus(pdev->bus))
2334 return 0;
2335 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2336 return 0;
2337 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2338 return 0;
2339
2340
2341
2342
2343
2344
2345 if (!startup) {
2346
2347
2348
2349
2350 u64 dma_mask = pdev->dma_mask;
2351
2352 if (pdev->dev.coherent_dma_mask &&
2353 pdev->dev.coherent_dma_mask < dma_mask)
2354 dma_mask = pdev->dev.coherent_dma_mask;
2355
2356 return dma_mask >= dma_get_required_mask(&pdev->dev);
2357 }
2358
2359 return 1;
2360}
2361
2362static int __init iommu_prepare_static_identity_mapping(int hw)
2363{
2364 struct pci_dev *pdev = NULL;
2365 int ret;
2366
2367 ret = si_domain_init(hw);
2368 if (ret)
2369 return -EFAULT;
2370
2371 for_each_pci_dev(pdev) {
2372
2373 if (IS_BRIDGE_HOST_DEVICE(pdev))
2374 continue;
2375 if (iommu_should_identity_map(pdev, 1)) {
2376 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2377 hw ? "hardware" : "software", pci_name(pdev));
2378
2379 ret = domain_add_dev_info(si_domain, pdev,
2380 hw ? CONTEXT_TT_PASS_THROUGH :
2381 CONTEXT_TT_MULTI_LEVEL);
2382 if (ret)
2383 return ret;
2384 }
2385 }
2386
2387 return 0;
2388}
2389
2390static int __init init_dmars(void)
2391{
2392 struct dmar_drhd_unit *drhd;
2393 struct dmar_rmrr_unit *rmrr;
2394 struct pci_dev *pdev;
2395 struct intel_iommu *iommu;
2396 int i, ret;
2397
2398
2399
2400
2401
2402
2403
2404 for_each_drhd_unit(drhd) {
2405 g_num_of_iommus++;
2406
2407
2408
2409
2410
2411 }
2412
2413 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2414 GFP_KERNEL);
2415 if (!g_iommus) {
2416 printk(KERN_ERR "Allocating global iommu array failed\n");
2417 ret = -ENOMEM;
2418 goto error;
2419 }
2420
2421 deferred_flush = kzalloc(g_num_of_iommus *
2422 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2423 if (!deferred_flush) {
2424 ret = -ENOMEM;
2425 goto error;
2426 }
2427
2428 for_each_drhd_unit(drhd) {
2429 if (drhd->ignored)
2430 continue;
2431
2432 iommu = drhd->iommu;
2433 g_iommus[iommu->seq_id] = iommu;
2434
2435 ret = iommu_init_domains(iommu);
2436 if (ret)
2437 goto error;
2438
2439
2440
2441
2442
2443
2444 ret = iommu_alloc_root_entry(iommu);
2445 if (ret) {
2446 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2447 goto error;
2448 }
2449 if (!ecap_pass_through(iommu->ecap))
2450 hw_pass_through = 0;
2451 }
2452
2453
2454
2455
2456 for_each_drhd_unit(drhd) {
2457 if (drhd->ignored)
2458 continue;
2459
2460 iommu = drhd->iommu;
2461
2462
2463
2464
2465
2466
2467 if (iommu->qi)
2468 continue;
2469
2470
2471
2472
2473 dmar_fault(-1, iommu);
2474
2475
2476
2477
2478 dmar_disable_qi(iommu);
2479 }
2480
2481 for_each_drhd_unit(drhd) {
2482 if (drhd->ignored)
2483 continue;
2484
2485 iommu = drhd->iommu;
2486
2487 if (dmar_enable_qi(iommu)) {
2488
2489
2490
2491
2492 iommu->flush.flush_context = __iommu_flush_context;
2493 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2494 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2495 "invalidation\n",
2496 iommu->seq_id,
2497 (unsigned long long)drhd->reg_base_addr);
2498 } else {
2499 iommu->flush.flush_context = qi_flush_context;
2500 iommu->flush.flush_iotlb = qi_flush_iotlb;
2501 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2502 "invalidation\n",
2503 iommu->seq_id,
2504 (unsigned long long)drhd->reg_base_addr);
2505 }
2506 }
2507
2508 if (iommu_pass_through)
2509 iommu_identity_mapping |= IDENTMAP_ALL;
2510
2511#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2512 iommu_identity_mapping |= IDENTMAP_GFX;
2513#endif
2514
2515 check_tylersburg_isoch();
2516
2517
2518
2519
2520
2521
2522 if (iommu_identity_mapping) {
2523 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2524 if (ret) {
2525 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2526 goto error;
2527 }
2528 }
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2544 for_each_rmrr_units(rmrr) {
2545 for (i = 0; i < rmrr->devices_cnt; i++) {
2546 pdev = rmrr->devices[i];
2547
2548
2549
2550
2551 if (!pdev)
2552 continue;
2553 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2554 if (ret)
2555 printk(KERN_ERR
2556 "IOMMU: mapping reserved region failed\n");
2557 }
2558 }
2559
2560 iommu_prepare_isa();
2561
2562
2563
2564
2565
2566
2567
2568
2569 for_each_drhd_unit(drhd) {
2570 if (drhd->ignored) {
2571
2572
2573
2574
2575 if (force_on)
2576 iommu_disable_protect_mem_regions(drhd->iommu);
2577 continue;
2578 }
2579 iommu = drhd->iommu;
2580
2581 iommu_flush_write_buffer(iommu);
2582
2583 ret = dmar_set_interrupt(iommu);
2584 if (ret)
2585 goto error;
2586
2587 iommu_set_root_entry(iommu);
2588
2589 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2590 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2591
2592 ret = iommu_enable_translation(iommu);
2593 if (ret)
2594 goto error;
2595
2596 iommu_disable_protect_mem_regions(iommu);
2597 }
2598
2599 return 0;
2600error:
2601 for_each_drhd_unit(drhd) {
2602 if (drhd->ignored)
2603 continue;
2604 iommu = drhd->iommu;
2605 free_iommu(iommu);
2606 }
2607 kfree(g_iommus);
2608 return ret;
2609}
2610
2611
2612static struct iova *intel_alloc_iova(struct device *dev,
2613 struct dmar_domain *domain,
2614 unsigned long nrpages, uint64_t dma_mask)
2615{
2616 struct pci_dev *pdev = to_pci_dev(dev);
2617 struct iova *iova = NULL;
2618
2619
2620 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2621
2622 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2623
2624
2625
2626
2627
2628 iova = alloc_iova(&domain->iovad, nrpages,
2629 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2630 if (iova)
2631 return iova;
2632 }
2633 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2634 if (unlikely(!iova)) {
2635 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2636 nrpages, pci_name(pdev));
2637 return NULL;
2638 }
2639
2640 return iova;
2641}
2642
2643static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2644{
2645 struct dmar_domain *domain;
2646 int ret;
2647
2648 domain = get_domain_for_dev(pdev,
2649 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2650 if (!domain) {
2651 printk(KERN_ERR
2652 "Allocating domain for %s failed", pci_name(pdev));
2653 return NULL;
2654 }
2655
2656
2657 if (unlikely(!domain_context_mapped(pdev))) {
2658 ret = domain_context_mapping(domain, pdev,
2659 CONTEXT_TT_MULTI_LEVEL);
2660 if (ret) {
2661 printk(KERN_ERR
2662 "Domain context map for %s failed",
2663 pci_name(pdev));
2664 return NULL;
2665 }
2666 }
2667
2668 return domain;
2669}
2670
2671static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2672{
2673 struct device_domain_info *info;
2674
2675
2676 info = dev->dev.archdata.iommu;
2677 if (likely(info))
2678 return info->domain;
2679
2680 return __get_valid_domain_for_dev(dev);
2681}
2682
2683static int iommu_dummy(struct pci_dev *pdev)
2684{
2685 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2686}
2687
2688
2689static int iommu_no_mapping(struct device *dev)
2690{
2691 struct pci_dev *pdev;
2692 int found;
2693
2694 if (unlikely(dev->bus != &pci_bus_type))
2695 return 1;
2696
2697 pdev = to_pci_dev(dev);
2698 if (iommu_dummy(pdev))
2699 return 1;
2700
2701 if (!iommu_identity_mapping)
2702 return 0;
2703
2704 found = identity_mapping(pdev);
2705 if (found) {
2706 if (iommu_should_identity_map(pdev, 0))
2707 return 1;
2708 else {
2709
2710
2711
2712
2713 domain_remove_one_dev_info(si_domain, pdev);
2714 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2715 pci_name(pdev));
2716 return 0;
2717 }
2718 } else {
2719
2720
2721
2722
2723 if (iommu_should_identity_map(pdev, 0)) {
2724 int ret;
2725 ret = domain_add_dev_info(si_domain, pdev,
2726 hw_pass_through ?
2727 CONTEXT_TT_PASS_THROUGH :
2728 CONTEXT_TT_MULTI_LEVEL);
2729 if (!ret) {
2730 printk(KERN_INFO "64bit %s uses identity mapping\n",
2731 pci_name(pdev));
2732 return 1;
2733 }
2734 }
2735 }
2736
2737 return 0;
2738}
2739
2740static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2741 size_t size, int dir, u64 dma_mask)
2742{
2743 struct pci_dev *pdev = to_pci_dev(hwdev);
2744 struct dmar_domain *domain;
2745 phys_addr_t start_paddr;
2746 struct iova *iova;
2747 int prot = 0;
2748 int ret;
2749 struct intel_iommu *iommu;
2750 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2751
2752 BUG_ON(dir == DMA_NONE);
2753
2754 if (iommu_no_mapping(hwdev))
2755 return paddr;
2756
2757 domain = get_valid_domain_for_dev(pdev);
2758 if (!domain)
2759 return 0;
2760
2761 iommu = domain_get_iommu(domain);
2762 size = aligned_nrpages(paddr, size);
2763
2764 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2765 if (!iova)
2766 goto error;
2767
2768
2769
2770
2771
2772 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2773 !cap_zlr(iommu->cap))
2774 prot |= DMA_PTE_READ;
2775 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2776 prot |= DMA_PTE_WRITE;
2777
2778
2779
2780
2781
2782
2783 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2784 mm_to_dma_pfn(paddr_pfn), size, prot);
2785 if (ret)
2786 goto error;
2787
2788
2789 if (cap_caching_mode(iommu->cap))
2790 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2791 else
2792 iommu_flush_write_buffer(iommu);
2793
2794 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2795 start_paddr += paddr & ~PAGE_MASK;
2796 return start_paddr;
2797
2798error:
2799 if (iova)
2800 __free_iova(&domain->iovad, iova);
2801 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2802 pci_name(pdev), size, (unsigned long long)paddr, dir);
2803 return 0;
2804}
2805
2806static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2807 unsigned long offset, size_t size,
2808 enum dma_data_direction dir,
2809 struct dma_attrs *attrs)
2810{
2811 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2812 dir, to_pci_dev(dev)->dma_mask);
2813}
2814
2815static void flush_unmaps(void)
2816{
2817 int i, j;
2818
2819 timer_on = 0;
2820
2821
2822 for (i = 0; i < g_num_of_iommus; i++) {
2823 struct intel_iommu *iommu = g_iommus[i];
2824 if (!iommu)
2825 continue;
2826
2827 if (!deferred_flush[i].next)
2828 continue;
2829
2830
2831 if (!cap_caching_mode(iommu->cap))
2832 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2833 DMA_TLB_GLOBAL_FLUSH);
2834 for (j = 0; j < deferred_flush[i].next; j++) {
2835 unsigned long mask;
2836 struct iova *iova = deferred_flush[i].iova[j];
2837 struct dmar_domain *domain = deferred_flush[i].domain[j];
2838
2839
2840 if (cap_caching_mode(iommu->cap))
2841 iommu_flush_iotlb_psi(iommu, domain->id,
2842 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2843 else {
2844 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2845 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2846 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2847 }
2848 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2849 }
2850 deferred_flush[i].next = 0;
2851 }
2852
2853 list_size = 0;
2854}
2855
2856static void flush_unmaps_timeout(unsigned long data)
2857{
2858 unsigned long flags;
2859
2860 spin_lock_irqsave(&async_umap_flush_lock, flags);
2861 flush_unmaps();
2862 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2863}
2864
2865static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2866{
2867 unsigned long flags;
2868 int next, iommu_id;
2869 struct intel_iommu *iommu;
2870
2871 spin_lock_irqsave(&async_umap_flush_lock, flags);
2872 if (list_size == HIGH_WATER_MARK)
2873 flush_unmaps();
2874
2875 iommu = domain_get_iommu(dom);
2876 iommu_id = iommu->seq_id;
2877
2878 next = deferred_flush[iommu_id].next;
2879 deferred_flush[iommu_id].domain[next] = dom;
2880 deferred_flush[iommu_id].iova[next] = iova;
2881 deferred_flush[iommu_id].next++;
2882
2883 if (!timer_on) {
2884 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2885 timer_on = 1;
2886 }
2887 list_size++;
2888 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2889}
2890
2891static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2892 size_t size, enum dma_data_direction dir,
2893 struct dma_attrs *attrs)
2894{
2895 struct pci_dev *pdev = to_pci_dev(dev);
2896 struct dmar_domain *domain;
2897 unsigned long start_pfn, last_pfn;
2898 struct iova *iova;
2899 struct intel_iommu *iommu;
2900
2901 if (iommu_no_mapping(dev))
2902 return;
2903
2904 domain = find_domain(pdev);
2905 BUG_ON(!domain);
2906
2907 iommu = domain_get_iommu(domain);
2908
2909 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2910 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2911 (unsigned long long)dev_addr))
2912 return;
2913
2914 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2915 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2916
2917 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2918 pci_name(pdev), start_pfn, last_pfn);
2919
2920
2921 dma_pte_clear_range(domain, start_pfn, last_pfn);
2922
2923
2924 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2925
2926 if (intel_iommu_strict) {
2927 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2928 last_pfn - start_pfn + 1, 0);
2929
2930 __free_iova(&domain->iovad, iova);
2931 } else {
2932 add_unmap(domain, iova);
2933
2934
2935
2936
2937 }
2938}
2939
2940static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2941 dma_addr_t *dma_handle, gfp_t flags)
2942{
2943 void *vaddr;
2944 int order;
2945
2946 size = PAGE_ALIGN(size);
2947 order = get_order(size);
2948
2949 if (!iommu_no_mapping(hwdev))
2950 flags &= ~(GFP_DMA | GFP_DMA32);
2951 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2952 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2953 flags |= GFP_DMA;
2954 else
2955 flags |= GFP_DMA32;
2956 }
2957
2958 vaddr = (void *)__get_free_pages(flags, order);
2959 if (!vaddr)
2960 return NULL;
2961 memset(vaddr, 0, size);
2962
2963 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2964 DMA_BIDIRECTIONAL,
2965 hwdev->coherent_dma_mask);
2966 if (*dma_handle)
2967 return vaddr;
2968 free_pages((unsigned long)vaddr, order);
2969 return NULL;
2970}
2971
2972static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2973 dma_addr_t dma_handle)
2974{
2975 int order;
2976
2977 size = PAGE_ALIGN(size);
2978 order = get_order(size);
2979
2980 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2981 free_pages((unsigned long)vaddr, order);
2982}
2983
2984static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2985 int nelems, enum dma_data_direction dir,
2986 struct dma_attrs *attrs)
2987{
2988 struct pci_dev *pdev = to_pci_dev(hwdev);
2989 struct dmar_domain *domain;
2990 unsigned long start_pfn, last_pfn;
2991 struct iova *iova;
2992 struct intel_iommu *iommu;
2993
2994 if (iommu_no_mapping(hwdev))
2995 return;
2996
2997 domain = find_domain(pdev);
2998 BUG_ON(!domain);
2999
3000 iommu = domain_get_iommu(domain);
3001
3002 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3003 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3004 (unsigned long long)sglist[0].dma_address))
3005 return;
3006
3007 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3008 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3009
3010
3011 dma_pte_clear_range(domain, start_pfn, last_pfn);
3012
3013
3014 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3015
3016 if (intel_iommu_strict) {
3017 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3018 last_pfn - start_pfn + 1, 0);
3019
3020 __free_iova(&domain->iovad, iova);
3021 } else {
3022 add_unmap(domain, iova);
3023
3024
3025
3026
3027 }
3028}
3029
3030static int intel_nontranslate_map_sg(struct device *hddev,
3031 struct scatterlist *sglist, int nelems, int dir)
3032{
3033 int i;
3034 struct scatterlist *sg;
3035
3036 for_each_sg(sglist, sg, nelems, i) {
3037 BUG_ON(!sg_page(sg));
3038 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3039 sg->dma_length = sg->length;
3040 }
3041 return nelems;
3042}
3043
3044static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3045 enum dma_data_direction dir, struct dma_attrs *attrs)
3046{
3047 int i;
3048 struct pci_dev *pdev = to_pci_dev(hwdev);
3049 struct dmar_domain *domain;
3050 size_t size = 0;
3051 int prot = 0;
3052 struct iova *iova = NULL;
3053 int ret;
3054 struct scatterlist *sg;
3055 unsigned long start_vpfn;
3056 struct intel_iommu *iommu;
3057
3058 BUG_ON(dir == DMA_NONE);
3059 if (iommu_no_mapping(hwdev))
3060 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3061
3062 domain = get_valid_domain_for_dev(pdev);
3063 if (!domain)
3064 return 0;
3065
3066 iommu = domain_get_iommu(domain);
3067
3068 for_each_sg(sglist, sg, nelems, i)
3069 size += aligned_nrpages(sg->offset, sg->length);
3070
3071 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3072 pdev->dma_mask);
3073 if (!iova) {
3074 sglist->dma_length = 0;
3075 return 0;
3076 }
3077
3078
3079
3080
3081
3082 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3083 !cap_zlr(iommu->cap))
3084 prot |= DMA_PTE_READ;
3085 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3086 prot |= DMA_PTE_WRITE;
3087
3088 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3089
3090 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3091 if (unlikely(ret)) {
3092
3093 dma_pte_clear_range(domain, start_vpfn,
3094 start_vpfn + size - 1);
3095
3096 dma_pte_free_pagetable(domain, start_vpfn,
3097 start_vpfn + size - 1);
3098
3099 __free_iova(&domain->iovad, iova);
3100 return 0;
3101 }
3102
3103
3104 if (cap_caching_mode(iommu->cap))
3105 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3106 else
3107 iommu_flush_write_buffer(iommu);
3108
3109 return nelems;
3110}
3111
3112static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3113{
3114 return !dma_addr;
3115}
3116
3117struct dma_map_ops intel_dma_ops = {
3118 .alloc_coherent = intel_alloc_coherent,
3119 .free_coherent = intel_free_coherent,
3120 .map_sg = intel_map_sg,
3121 .unmap_sg = intel_unmap_sg,
3122 .map_page = intel_map_page,
3123 .unmap_page = intel_unmap_page,
3124 .mapping_error = intel_mapping_error,
3125};
3126
3127static inline int iommu_domain_cache_init(void)
3128{
3129 int ret = 0;
3130
3131 iommu_domain_cache = kmem_cache_create("iommu_domain",
3132 sizeof(struct dmar_domain),
3133 0,
3134 SLAB_HWCACHE_ALIGN,
3135
3136 NULL);
3137 if (!iommu_domain_cache) {
3138 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3139 ret = -ENOMEM;
3140 }
3141
3142 return ret;
3143}
3144
3145static inline int iommu_devinfo_cache_init(void)
3146{
3147 int ret = 0;
3148
3149 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3150 sizeof(struct device_domain_info),
3151 0,
3152 SLAB_HWCACHE_ALIGN,
3153 NULL);
3154 if (!iommu_devinfo_cache) {
3155 printk(KERN_ERR "Couldn't create devinfo cache\n");
3156 ret = -ENOMEM;
3157 }
3158
3159 return ret;
3160}
3161
3162static inline int iommu_iova_cache_init(void)
3163{
3164 int ret = 0;
3165
3166 iommu_iova_cache = kmem_cache_create("iommu_iova",
3167 sizeof(struct iova),
3168 0,
3169 SLAB_HWCACHE_ALIGN,
3170 NULL);
3171 if (!iommu_iova_cache) {
3172 printk(KERN_ERR "Couldn't create iova cache\n");
3173 ret = -ENOMEM;
3174 }
3175
3176 return ret;
3177}
3178
3179static int __init iommu_init_mempool(void)
3180{
3181 int ret;
3182 ret = iommu_iova_cache_init();
3183 if (ret)
3184 return ret;
3185
3186 ret = iommu_domain_cache_init();
3187 if (ret)
3188 goto domain_error;
3189
3190 ret = iommu_devinfo_cache_init();
3191 if (!ret)
3192 return ret;
3193
3194 kmem_cache_destroy(iommu_domain_cache);
3195domain_error:
3196 kmem_cache_destroy(iommu_iova_cache);
3197
3198 return -ENOMEM;
3199}
3200
3201static void __init iommu_exit_mempool(void)
3202{
3203 kmem_cache_destroy(iommu_devinfo_cache);
3204 kmem_cache_destroy(iommu_domain_cache);
3205 kmem_cache_destroy(iommu_iova_cache);
3206
3207}
3208
3209static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3210{
3211 struct dmar_drhd_unit *drhd;
3212 u32 vtbar;
3213 int rc;
3214
3215
3216
3217
3218
3219
3220 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3221 if (rc) {
3222
3223 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3224 return;
3225 }
3226 vtbar &= 0xffff0000;
3227
3228
3229 drhd = dmar_find_matched_drhd_unit(pdev);
3230 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3231 TAINT_FIRMWARE_WORKAROUND,
3232 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3233 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3234}
3235DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3236
3237static void __init init_no_remapping_devices(void)
3238{
3239 struct dmar_drhd_unit *drhd;
3240
3241 for_each_drhd_unit(drhd) {
3242 if (!drhd->include_all) {
3243 int i;
3244 for (i = 0; i < drhd->devices_cnt; i++)
3245 if (drhd->devices[i] != NULL)
3246 break;
3247
3248 if (i == drhd->devices_cnt)
3249 drhd->ignored = 1;
3250 }
3251 }
3252
3253 for_each_drhd_unit(drhd) {
3254 int i;
3255 if (drhd->ignored || drhd->include_all)
3256 continue;
3257
3258 for (i = 0; i < drhd->devices_cnt; i++)
3259 if (drhd->devices[i] &&
3260 !IS_GFX_DEVICE(drhd->devices[i]))
3261 break;
3262
3263 if (i < drhd->devices_cnt)
3264 continue;
3265
3266
3267
3268 if (dmar_map_gfx) {
3269 intel_iommu_gfx_mapped = 1;
3270 } else {
3271 drhd->ignored = 1;
3272 for (i = 0; i < drhd->devices_cnt; i++) {
3273 if (!drhd->devices[i])
3274 continue;
3275 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3276 }
3277 }
3278 }
3279}
3280
3281#ifdef CONFIG_SUSPEND
3282static int init_iommu_hw(void)
3283{
3284 struct dmar_drhd_unit *drhd;
3285 struct intel_iommu *iommu = NULL;
3286
3287 for_each_active_iommu(iommu, drhd)
3288 if (iommu->qi)
3289 dmar_reenable_qi(iommu);
3290
3291 for_each_iommu(iommu, drhd) {
3292 if (drhd->ignored) {
3293
3294
3295
3296
3297 if (force_on)
3298 iommu_disable_protect_mem_regions(iommu);
3299 continue;
3300 }
3301
3302 iommu_flush_write_buffer(iommu);
3303
3304 iommu_set_root_entry(iommu);
3305
3306 iommu->flush.flush_context(iommu, 0, 0, 0,
3307 DMA_CCMD_GLOBAL_INVL);
3308 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3309 DMA_TLB_GLOBAL_FLUSH);
3310 if (iommu_enable_translation(iommu))
3311 return 1;
3312 iommu_disable_protect_mem_regions(iommu);
3313 }
3314
3315 return 0;
3316}
3317
3318static void iommu_flush_all(void)
3319{
3320 struct dmar_drhd_unit *drhd;
3321 struct intel_iommu *iommu;
3322
3323 for_each_active_iommu(iommu, drhd) {
3324 iommu->flush.flush_context(iommu, 0, 0, 0,
3325 DMA_CCMD_GLOBAL_INVL);
3326 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3327 DMA_TLB_GLOBAL_FLUSH);
3328 }
3329}
3330
3331static int iommu_suspend(void)
3332{
3333 struct dmar_drhd_unit *drhd;
3334 struct intel_iommu *iommu = NULL;
3335 unsigned long flag;
3336
3337 for_each_active_iommu(iommu, drhd) {
3338 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3339 GFP_ATOMIC);
3340 if (!iommu->iommu_state)
3341 goto nomem;
3342 }
3343
3344 iommu_flush_all();
3345
3346 for_each_active_iommu(iommu, drhd) {
3347 iommu_disable_translation(iommu);
3348
3349 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3350
3351 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3352 readl(iommu->reg + DMAR_FECTL_REG);
3353 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3354 readl(iommu->reg + DMAR_FEDATA_REG);
3355 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3356 readl(iommu->reg + DMAR_FEADDR_REG);
3357 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3358 readl(iommu->reg + DMAR_FEUADDR_REG);
3359
3360 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3361 }
3362 return 0;
3363
3364nomem:
3365 for_each_active_iommu(iommu, drhd)
3366 kfree(iommu->iommu_state);
3367
3368 return -ENOMEM;
3369}
3370
3371static void iommu_resume(void)
3372{
3373 struct dmar_drhd_unit *drhd;
3374 struct intel_iommu *iommu = NULL;
3375 unsigned long flag;
3376
3377 if (init_iommu_hw()) {
3378 if (force_on)
3379 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3380 else
3381 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3382 return;
3383 }
3384
3385 for_each_active_iommu(iommu, drhd) {
3386
3387 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3388
3389 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3390 iommu->reg + DMAR_FECTL_REG);
3391 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3392 iommu->reg + DMAR_FEDATA_REG);
3393 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3394 iommu->reg + DMAR_FEADDR_REG);
3395 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3396 iommu->reg + DMAR_FEUADDR_REG);
3397
3398 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3399 }
3400
3401 for_each_active_iommu(iommu, drhd)
3402 kfree(iommu->iommu_state);
3403}
3404
3405static struct syscore_ops iommu_syscore_ops = {
3406 .resume = iommu_resume,
3407 .suspend = iommu_suspend,
3408};
3409
3410static void __init init_iommu_pm_ops(void)
3411{
3412 register_syscore_ops(&iommu_syscore_ops);
3413}
3414
3415#else
3416static inline void init_iommu_pm_ops(void) {}
3417#endif
3418
3419LIST_HEAD(dmar_rmrr_units);
3420
3421static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3422{
3423 list_add(&rmrr->list, &dmar_rmrr_units);
3424}
3425
3426
3427int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3428{
3429 struct acpi_dmar_reserved_memory *rmrr;
3430 struct dmar_rmrr_unit *rmrru;
3431
3432 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3433 if (!rmrru)
3434 return -ENOMEM;
3435
3436 rmrru->hdr = header;
3437 rmrr = (struct acpi_dmar_reserved_memory *)header;
3438 rmrru->base_address = rmrr->base_address;
3439 rmrru->end_address = rmrr->end_address;
3440
3441 dmar_register_rmrr_unit(rmrru);
3442 return 0;
3443}
3444
3445static int __init
3446rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3447{
3448 struct acpi_dmar_reserved_memory *rmrr;
3449 int ret;
3450
3451 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3452 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3453 ((void *)rmrr) + rmrr->header.length,
3454 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3455
3456 if (ret || (rmrru->devices_cnt == 0)) {
3457 list_del(&rmrru->list);
3458 kfree(rmrru);
3459 }
3460 return ret;
3461}
3462
3463static LIST_HEAD(dmar_atsr_units);
3464
3465int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3466{
3467 struct acpi_dmar_atsr *atsr;
3468 struct dmar_atsr_unit *atsru;
3469
3470 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3471 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3472 if (!atsru)
3473 return -ENOMEM;
3474
3475 atsru->hdr = hdr;
3476 atsru->include_all = atsr->flags & 0x1;
3477
3478 list_add(&atsru->list, &dmar_atsr_units);
3479
3480 return 0;
3481}
3482
3483static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3484{
3485 int rc;
3486 struct acpi_dmar_atsr *atsr;
3487
3488 if (atsru->include_all)
3489 return 0;
3490
3491 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3492 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3493 (void *)atsr + atsr->header.length,
3494 &atsru->devices_cnt, &atsru->devices,
3495 atsr->segment);
3496 if (rc || !atsru->devices_cnt) {
3497 list_del(&atsru->list);
3498 kfree(atsru);
3499 }
3500
3501 return rc;
3502}
3503
3504int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3505{
3506 int i;
3507 struct pci_bus *bus;
3508 struct acpi_dmar_atsr *atsr;
3509 struct dmar_atsr_unit *atsru;
3510
3511 dev = pci_physfn(dev);
3512
3513 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3514 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3515 if (atsr->segment == pci_domain_nr(dev->bus))
3516 goto found;
3517 }
3518
3519 return 0;
3520
3521found:
3522 for (bus = dev->bus; bus; bus = bus->parent) {
3523 struct pci_dev *bridge = bus->self;
3524
3525 if (!bridge || !pci_is_pcie(bridge) ||
3526 bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
3527 return 0;
3528
3529 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
3530 for (i = 0; i < atsru->devices_cnt; i++)
3531 if (atsru->devices[i] == bridge)
3532 return 1;
3533 break;
3534 }
3535 }
3536
3537 if (atsru->include_all)
3538 return 1;
3539
3540 return 0;
3541}
3542
3543int __init dmar_parse_rmrr_atsr_dev(void)
3544{
3545 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3546 struct dmar_atsr_unit *atsr, *atsr_n;
3547 int ret = 0;
3548
3549 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3550 ret = rmrr_parse_dev(rmrr);
3551 if (ret)
3552 return ret;
3553 }
3554
3555 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3556 ret = atsr_parse_dev(atsr);
3557 if (ret)
3558 return ret;
3559 }
3560
3561 return ret;
3562}
3563
3564
3565
3566
3567
3568
3569
3570static int device_notifier(struct notifier_block *nb,
3571 unsigned long action, void *data)
3572{
3573 struct device *dev = data;
3574 struct pci_dev *pdev = to_pci_dev(dev);
3575 struct dmar_domain *domain;
3576
3577 if (iommu_no_mapping(dev))
3578 return 0;
3579
3580 domain = find_domain(pdev);
3581 if (!domain)
3582 return 0;
3583
3584 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3585 domain_remove_one_dev_info(domain, pdev);
3586
3587 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3588 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3589 list_empty(&domain->devices))
3590 domain_exit(domain);
3591 }
3592
3593 return 0;
3594}
3595
3596static struct notifier_block device_nb = {
3597 .notifier_call = device_notifier,
3598};
3599
3600int __init intel_iommu_init(void)
3601{
3602 int ret = 0;
3603
3604
3605 force_on = tboot_force_iommu();
3606
3607 if (dmar_table_init()) {
3608 if (force_on)
3609 panic("tboot: Failed to initialize DMAR table\n");
3610 return -ENODEV;
3611 }
3612
3613 if (dmar_dev_scope_init() < 0) {
3614 if (force_on)
3615 panic("tboot: Failed to initialize DMAR device scope\n");
3616 return -ENODEV;
3617 }
3618
3619 if (no_iommu || dmar_disabled)
3620 return -ENODEV;
3621
3622 if (iommu_init_mempool()) {
3623 if (force_on)
3624 panic("tboot: Failed to initialize iommu memory\n");
3625 return -ENODEV;
3626 }
3627
3628 if (list_empty(&dmar_rmrr_units))
3629 printk(KERN_INFO "DMAR: No RMRR found\n");
3630
3631 if (list_empty(&dmar_atsr_units))
3632 printk(KERN_INFO "DMAR: No ATSR found\n");
3633
3634 if (dmar_init_reserved_ranges()) {
3635 if (force_on)
3636 panic("tboot: Failed to reserve iommu ranges\n");
3637 return -ENODEV;
3638 }
3639
3640 init_no_remapping_devices();
3641
3642 ret = init_dmars();
3643 if (ret) {
3644 if (force_on)
3645 panic("tboot: Failed to initialize DMARs\n");
3646 printk(KERN_ERR "IOMMU: dmar init failed\n");
3647 put_iova_domain(&reserved_iova_list);
3648 iommu_exit_mempool();
3649 return ret;
3650 }
3651 printk(KERN_INFO
3652 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3653
3654 init_timer(&unmap_timer);
3655#ifdef CONFIG_SWIOTLB
3656 swiotlb = 0;
3657#endif
3658 dma_ops = &intel_dma_ops;
3659
3660 init_iommu_pm_ops();
3661
3662 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3663
3664 bus_register_notifier(&pci_bus_type, &device_nb);
3665
3666 intel_iommu_enabled = 1;
3667
3668 return 0;
3669}
3670
3671static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3672 struct pci_dev *pdev)
3673{
3674 struct pci_dev *tmp, *parent;
3675
3676 if (!iommu || !pdev)
3677 return;
3678
3679
3680 tmp = pci_find_upstream_pcie_bridge(pdev);
3681
3682 if (tmp) {
3683 parent = pdev->bus->self;
3684 while (parent != tmp) {
3685 iommu_detach_dev(iommu, parent->bus->number,
3686 parent->devfn);
3687 parent = parent->bus->self;
3688 }
3689 if (pci_is_pcie(tmp))
3690 iommu_detach_dev(iommu,
3691 tmp->subordinate->number, 0);
3692 else
3693 iommu_detach_dev(iommu, tmp->bus->number,
3694 tmp->devfn);
3695 }
3696}
3697
3698static void domain_remove_one_dev_info(struct dmar_domain *domain,
3699 struct pci_dev *pdev)
3700{
3701 struct device_domain_info *info;
3702 struct intel_iommu *iommu;
3703 unsigned long flags;
3704 int found = 0;
3705 struct list_head *entry, *tmp;
3706
3707 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3708 pdev->devfn);
3709 if (!iommu)
3710 return;
3711
3712 spin_lock_irqsave(&device_domain_lock, flags);
3713 list_for_each_safe(entry, tmp, &domain->devices) {
3714 info = list_entry(entry, struct device_domain_info, link);
3715 if (info->segment == pci_domain_nr(pdev->bus) &&
3716 info->bus == pdev->bus->number &&
3717 info->devfn == pdev->devfn) {
3718 list_del(&info->link);
3719 list_del(&info->global);
3720 if (info->dev)
3721 info->dev->dev.archdata.iommu = NULL;
3722 spin_unlock_irqrestore(&device_domain_lock, flags);
3723
3724 iommu_disable_dev_iotlb(info);
3725 iommu_detach_dev(iommu, info->bus, info->devfn);
3726 iommu_detach_dependent_devices(iommu, pdev);
3727 free_devinfo_mem(info);
3728
3729 spin_lock_irqsave(&device_domain_lock, flags);
3730
3731 if (found)
3732 break;
3733 else
3734 continue;
3735 }
3736
3737
3738
3739
3740
3741 if (iommu == device_to_iommu(info->segment, info->bus,
3742 info->devfn))
3743 found = 1;
3744 }
3745
3746 spin_unlock_irqrestore(&device_domain_lock, flags);
3747
3748 if (found == 0) {
3749 unsigned long tmp_flags;
3750 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3751 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3752 domain->iommu_count--;
3753 domain_update_iommu_cap(domain);
3754 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3755
3756 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3757 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3758 spin_lock_irqsave(&iommu->lock, tmp_flags);
3759 clear_bit(domain->id, iommu->domain_ids);
3760 iommu->domains[domain->id] = NULL;
3761 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3762 }
3763 }
3764}
3765
3766static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3767{
3768 struct device_domain_info *info;
3769 struct intel_iommu *iommu;
3770 unsigned long flags1, flags2;
3771
3772 spin_lock_irqsave(&device_domain_lock, flags1);
3773 while (!list_empty(&domain->devices)) {
3774 info = list_entry(domain->devices.next,
3775 struct device_domain_info, link);
3776 list_del(&info->link);
3777 list_del(&info->global);
3778 if (info->dev)
3779 info->dev->dev.archdata.iommu = NULL;
3780
3781 spin_unlock_irqrestore(&device_domain_lock, flags1);
3782
3783 iommu_disable_dev_iotlb(info);
3784 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3785 iommu_detach_dev(iommu, info->bus, info->devfn);
3786 iommu_detach_dependent_devices(iommu, info->dev);
3787
3788
3789
3790
3791 spin_lock_irqsave(&domain->iommu_lock, flags2);
3792 if (test_and_clear_bit(iommu->seq_id,
3793 &domain->iommu_bmp)) {
3794 domain->iommu_count--;
3795 domain_update_iommu_cap(domain);
3796 }
3797 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3798
3799 free_devinfo_mem(info);
3800 spin_lock_irqsave(&device_domain_lock, flags1);
3801 }
3802 spin_unlock_irqrestore(&device_domain_lock, flags1);
3803}
3804
3805
3806static unsigned long vm_domid;
3807
3808static struct dmar_domain *iommu_alloc_vm_domain(void)
3809{
3810 struct dmar_domain *domain;
3811
3812 domain = alloc_domain_mem();
3813 if (!domain)
3814 return NULL;
3815
3816 domain->id = vm_domid++;
3817 domain->nid = -1;
3818 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3819 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3820
3821 return domain;
3822}
3823
3824static int md_domain_init(struct dmar_domain *domain, int guest_width)
3825{
3826 int adjust_width;
3827
3828 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3829 spin_lock_init(&domain->iommu_lock);
3830
3831 domain_reserve_special_ranges(domain);
3832
3833
3834 domain->gaw = guest_width;
3835 adjust_width = guestwidth_to_adjustwidth(guest_width);
3836 domain->agaw = width_to_agaw(adjust_width);
3837
3838 INIT_LIST_HEAD(&domain->devices);
3839
3840 domain->iommu_count = 0;
3841 domain->iommu_coherency = 0;
3842 domain->iommu_snooping = 0;
3843 domain->iommu_superpage = 0;
3844 domain->max_addr = 0;
3845 domain->nid = -1;
3846
3847
3848 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3849 if (!domain->pgd)
3850 return -ENOMEM;
3851 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3852 return 0;
3853}
3854
3855static void iommu_free_vm_domain(struct dmar_domain *domain)
3856{
3857 unsigned long flags;
3858 struct dmar_drhd_unit *drhd;
3859 struct intel_iommu *iommu;
3860 unsigned long i;
3861 unsigned long ndomains;
3862
3863 for_each_drhd_unit(drhd) {
3864 if (drhd->ignored)
3865 continue;
3866 iommu = drhd->iommu;
3867
3868 ndomains = cap_ndoms(iommu->cap);
3869 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3870 if (iommu->domains[i] == domain) {
3871 spin_lock_irqsave(&iommu->lock, flags);
3872 clear_bit(i, iommu->domain_ids);
3873 iommu->domains[i] = NULL;
3874 spin_unlock_irqrestore(&iommu->lock, flags);
3875 break;
3876 }
3877 }
3878 }
3879}
3880
3881static void vm_domain_exit(struct dmar_domain *domain)
3882{
3883
3884 if (!domain)
3885 return;
3886
3887 vm_domain_remove_all_dev_info(domain);
3888
3889 put_iova_domain(&domain->iovad);
3890
3891
3892 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3893
3894
3895 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3896
3897 iommu_free_vm_domain(domain);
3898 free_domain_mem(domain);
3899}
3900
3901static int intel_iommu_domain_init(struct iommu_domain *domain)
3902{
3903 struct dmar_domain *dmar_domain;
3904
3905 dmar_domain = iommu_alloc_vm_domain();
3906 if (!dmar_domain) {
3907 printk(KERN_ERR
3908 "intel_iommu_domain_init: dmar_domain == NULL\n");
3909 return -ENOMEM;
3910 }
3911 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3912 printk(KERN_ERR
3913 "intel_iommu_domain_init() failed\n");
3914 vm_domain_exit(dmar_domain);
3915 return -ENOMEM;
3916 }
3917 domain_update_iommu_cap(dmar_domain);
3918 domain->priv = dmar_domain;
3919
3920 return 0;
3921}
3922
3923static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3924{
3925 struct dmar_domain *dmar_domain = domain->priv;
3926
3927 domain->priv = NULL;
3928 vm_domain_exit(dmar_domain);
3929}
3930
3931static int intel_iommu_attach_device(struct iommu_domain *domain,
3932 struct device *dev)
3933{
3934 struct dmar_domain *dmar_domain = domain->priv;
3935 struct pci_dev *pdev = to_pci_dev(dev);
3936 struct intel_iommu *iommu;
3937 int addr_width;
3938
3939
3940 if (unlikely(domain_context_mapped(pdev))) {
3941 struct dmar_domain *old_domain;
3942
3943 old_domain = find_domain(pdev);
3944 if (old_domain) {
3945 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3946 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3947 domain_remove_one_dev_info(old_domain, pdev);
3948 else
3949 domain_remove_dev_info(old_domain);
3950 }
3951 }
3952
3953 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3954 pdev->devfn);
3955 if (!iommu)
3956 return -ENODEV;
3957
3958
3959 addr_width = agaw_to_width(iommu->agaw);
3960 if (addr_width > cap_mgaw(iommu->cap))
3961 addr_width = cap_mgaw(iommu->cap);
3962
3963 if (dmar_domain->max_addr > (1LL << addr_width)) {
3964 printk(KERN_ERR "%s: iommu width (%d) is not "
3965 "sufficient for the mapped address (%llx)\n",
3966 __func__, addr_width, dmar_domain->max_addr);
3967 return -EFAULT;
3968 }
3969 dmar_domain->gaw = addr_width;
3970
3971
3972
3973
3974 while (iommu->agaw < dmar_domain->agaw) {
3975 struct dma_pte *pte;
3976
3977 pte = dmar_domain->pgd;
3978 if (dma_pte_present(pte)) {
3979 dmar_domain->pgd = (struct dma_pte *)
3980 phys_to_virt(dma_pte_addr(pte));
3981 free_pgtable_page(pte);
3982 }
3983 dmar_domain->agaw--;
3984 }
3985
3986 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3987}
3988
3989static void intel_iommu_detach_device(struct iommu_domain *domain,
3990 struct device *dev)
3991{
3992 struct dmar_domain *dmar_domain = domain->priv;
3993 struct pci_dev *pdev = to_pci_dev(dev);
3994
3995 domain_remove_one_dev_info(dmar_domain, pdev);
3996}
3997
3998static int intel_iommu_map(struct iommu_domain *domain,
3999 unsigned long iova, phys_addr_t hpa,
4000 size_t size, int iommu_prot)
4001{
4002 struct dmar_domain *dmar_domain = domain->priv;
4003 u64 max_addr;
4004 int prot = 0;
4005 int ret;
4006
4007 if (iommu_prot & IOMMU_READ)
4008 prot |= DMA_PTE_READ;
4009 if (iommu_prot & IOMMU_WRITE)
4010 prot |= DMA_PTE_WRITE;
4011 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4012 prot |= DMA_PTE_SNP;
4013
4014 max_addr = iova + size;
4015 if (dmar_domain->max_addr < max_addr) {
4016 u64 end;
4017
4018
4019 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4020 if (end < max_addr) {
4021 printk(KERN_ERR "%s: iommu width (%d) is not "
4022 "sufficient for the mapped address (%llx)\n",
4023 __func__, dmar_domain->gaw, max_addr);
4024 return -EFAULT;
4025 }
4026 dmar_domain->max_addr = max_addr;
4027 }
4028
4029
4030 size = aligned_nrpages(hpa, size);
4031 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4032 hpa >> VTD_PAGE_SHIFT, size, prot);
4033 return ret;
4034}
4035
4036static size_t intel_iommu_unmap(struct iommu_domain *domain,
4037 unsigned long iova, size_t size)
4038{
4039 struct dmar_domain *dmar_domain = domain->priv;
4040 int order;
4041
4042 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4043 (iova + size - 1) >> VTD_PAGE_SHIFT);
4044
4045 if (dmar_domain->max_addr == iova + size)
4046 dmar_domain->max_addr = iova;
4047
4048 return PAGE_SIZE << order;
4049}
4050
4051static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4052 unsigned long iova)
4053{
4054 struct dmar_domain *dmar_domain = domain->priv;
4055 struct dma_pte *pte;
4056 u64 phys = 0;
4057
4058 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4059 if (pte)
4060 phys = dma_pte_addr(pte);
4061
4062 return phys;
4063}
4064
4065static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4066 unsigned long cap)
4067{
4068 struct dmar_domain *dmar_domain = domain->priv;
4069
4070 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4071 return dmar_domain->iommu_snooping;
4072 if (cap == IOMMU_CAP_INTR_REMAP)
4073 return intr_remapping_enabled;
4074
4075 return 0;
4076}
4077
4078
4079
4080
4081
4082
4083
4084static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
4085{
4086 struct pci_dev *pdev = to_pci_dev(dev);
4087 struct pci_dev *bridge;
4088 union {
4089 struct {
4090 u8 devfn;
4091 u8 bus;
4092 u16 segment;
4093 } pci;
4094 u32 group;
4095 } id;
4096
4097 if (iommu_no_mapping(dev))
4098 return -ENODEV;
4099
4100 id.pci.segment = pci_domain_nr(pdev->bus);
4101 id.pci.bus = pdev->bus->number;
4102 id.pci.devfn = pdev->devfn;
4103
4104 if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
4105 return -ENODEV;
4106
4107 bridge = pci_find_upstream_pcie_bridge(pdev);
4108 if (bridge) {
4109 if (pci_is_pcie(bridge)) {
4110 id.pci.bus = bridge->subordinate->number;
4111 id.pci.devfn = 0;
4112 } else {
4113 id.pci.bus = bridge->bus->number;
4114 id.pci.devfn = bridge->devfn;
4115 }
4116 }
4117
4118 if (!pdev->is_virtfn && iommu_group_mf)
4119 id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
4120
4121 *groupid = id.group;
4122
4123 return 0;
4124}
4125
4126static struct iommu_ops intel_iommu_ops = {
4127 .domain_init = intel_iommu_domain_init,
4128 .domain_destroy = intel_iommu_domain_destroy,
4129 .attach_dev = intel_iommu_attach_device,
4130 .detach_dev = intel_iommu_detach_device,
4131 .map = intel_iommu_map,
4132 .unmap = intel_iommu_unmap,
4133 .iova_to_phys = intel_iommu_iova_to_phys,
4134 .domain_has_cap = intel_iommu_domain_has_cap,
4135 .device_group = intel_iommu_device_group,
4136 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4137};
4138
4139static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4140{
4141
4142
4143
4144
4145 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4146 rwbf_quirk = 1;
4147
4148
4149 if (dev->revision == 0x07) {
4150 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4151 dmar_map_gfx = 0;
4152 }
4153}
4154
4155DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4156
4157#define GGC 0x52
4158#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4159#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4160#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4161#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4162#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4163#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4164#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4165#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4166
4167static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4168{
4169 unsigned short ggc;
4170
4171 if (pci_read_config_word(dev, GGC, &ggc))
4172 return;
4173
4174 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4175 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4176 dmar_map_gfx = 0;
4177 } else if (dmar_map_gfx) {
4178
4179 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4180 intel_iommu_strict = 1;
4181 }
4182}
4183DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4184DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4185DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4186DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4187
4188
4189
4190
4191
4192
4193
4194
4195static void __init check_tylersburg_isoch(void)
4196{
4197 struct pci_dev *pdev;
4198 uint32_t vtisochctrl;
4199
4200
4201 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4202 if (!pdev)
4203 return;
4204 pci_dev_put(pdev);
4205
4206
4207
4208
4209 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4210 if (!pdev)
4211 return;
4212
4213 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4214 pci_dev_put(pdev);
4215 return;
4216 }
4217
4218 pci_dev_put(pdev);
4219
4220
4221 if (vtisochctrl & 1)
4222 return;
4223
4224
4225 vtisochctrl &= 0x1c;
4226
4227
4228 if (vtisochctrl == 0x10)
4229 return;
4230
4231
4232 if (!vtisochctrl) {
4233 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4234 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4235 dmi_get_system_info(DMI_BIOS_VENDOR),
4236 dmi_get_system_info(DMI_BIOS_VERSION),
4237 dmi_get_system_info(DMI_PRODUCT_VERSION));
4238 iommu_identity_mapping |= IDENTMAP_AZALIA;
4239 return;
4240 }
4241
4242 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4243 vtisochctrl);
4244}
4245