1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <linux/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 void *private;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static LIST_HEAD(vmap_area_list);
265static struct rb_root vmap_area_root = RB_ROOT;
266
267
268static struct rb_node *free_vmap_cache;
269static unsigned long cached_hole_size;
270static unsigned long cached_vstart;
271static unsigned long cached_align;
272
273static unsigned long vmap_area_pcpu_hole;
274
275static struct vmap_area *__find_vmap_area(unsigned long addr)
276{
277 struct rb_node *n = vmap_area_root.rb_node;
278
279 while (n) {
280 struct vmap_area *va;
281
282 va = rb_entry(n, struct vmap_area, rb_node);
283 if (addr < va->va_start)
284 n = n->rb_left;
285 else if (addr > va->va_start)
286 n = n->rb_right;
287 else
288 return va;
289 }
290
291 return NULL;
292}
293
294static void __insert_vmap_area(struct vmap_area *va)
295{
296 struct rb_node **p = &vmap_area_root.rb_node;
297 struct rb_node *parent = NULL;
298 struct rb_node *tmp;
299
300 while (*p) {
301 struct vmap_area *tmp_va;
302
303 parent = *p;
304 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
305 if (va->va_start < tmp_va->va_end)
306 p = &(*p)->rb_left;
307 else if (va->va_end > tmp_va->va_start)
308 p = &(*p)->rb_right;
309 else
310 BUG();
311 }
312
313 rb_link_node(&va->rb_node, parent, p);
314 rb_insert_color(&va->rb_node, &vmap_area_root);
315
316
317 tmp = rb_prev(&va->rb_node);
318 if (tmp) {
319 struct vmap_area *prev;
320 prev = rb_entry(tmp, struct vmap_area, rb_node);
321 list_add_rcu(&va->list, &prev->list);
322 } else
323 list_add_rcu(&va->list, &vmap_area_list);
324}
325
326static void purge_vmap_area_lazy(void);
327
328
329
330
331
332static struct vmap_area *alloc_vmap_area(unsigned long size,
333 unsigned long align,
334 unsigned long vstart, unsigned long vend,
335 int node, gfp_t gfp_mask)
336{
337 struct vmap_area *va;
338 struct rb_node *n;
339 unsigned long addr;
340 int purged = 0;
341 struct vmap_area *first;
342
343 BUG_ON(!size);
344 BUG_ON(size & ~PAGE_MASK);
345 BUG_ON(!is_power_of_2(align));
346
347 va = kmalloc_node(sizeof(struct vmap_area),
348 gfp_mask & GFP_RECLAIM_MASK, node);
349 if (unlikely(!va))
350 return ERR_PTR(-ENOMEM);
351
352retry:
353 spin_lock(&vmap_area_lock);
354
355
356
357
358
359
360
361
362
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
367nocache:
368 cached_hole_size = 0;
369 free_vmap_cache = NULL;
370 }
371
372 cached_vstart = vstart;
373 cached_align = align;
374
375
376 if (free_vmap_cache) {
377 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
378 addr = ALIGN(first->va_end, align);
379 if (addr < vstart)
380 goto nocache;
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 } else {
385 addr = ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
387 goto overflow;
388
389 n = vmap_area_root.rb_node;
390 first = NULL;
391
392 while (n) {
393 struct vmap_area *tmp;
394 tmp = rb_entry(n, struct vmap_area, rb_node);
395 if (tmp->va_end >= addr) {
396 first = tmp;
397 if (tmp->va_start <= addr)
398 break;
399 n = n->rb_left;
400 } else
401 n = n->rb_right;
402 }
403
404 if (!first)
405 goto found;
406 }
407
408
409 while (addr + size > first->va_start && addr + size <= vend) {
410 if (addr + cached_hole_size < first->va_start)
411 cached_hole_size = first->va_start - addr;
412 addr = ALIGN(first->va_end, align);
413 if (addr + size - 1 < addr)
414 goto overflow;
415
416 n = rb_next(&first->rb_node);
417 if (n)
418 first = rb_entry(n, struct vmap_area, rb_node);
419 else
420 goto found;
421 }
422
423found:
424 if (addr + size > vend)
425 goto overflow;
426
427 va->va_start = addr;
428 va->va_end = addr + size;
429 va->flags = 0;
430 __insert_vmap_area(va);
431 free_vmap_cache = &va->rb_node;
432 spin_unlock(&vmap_area_lock);
433
434 BUG_ON(va->va_start & (align-1));
435 BUG_ON(va->va_start < vstart);
436 BUG_ON(va->va_end > vend);
437
438 return va;
439
440overflow:
441 spin_unlock(&vmap_area_lock);
442 if (!purged) {
443 purge_vmap_area_lazy();
444 purged = 1;
445 goto retry;
446 }
447 if (printk_ratelimit())
448 printk(KERN_WARNING
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
451 kfree(va);
452 return ERR_PTR(-EBUSY);
453}
454
455static void __free_vmap_area(struct vmap_area *va)
456{
457 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
458
459 if (free_vmap_cache) {
460 if (va->va_end < cached_vstart) {
461 free_vmap_cache = NULL;
462 } else {
463 struct vmap_area *cache;
464 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
465 if (va->va_start <= cache->va_start) {
466 free_vmap_cache = rb_prev(&va->rb_node);
467
468
469
470
471 }
472 }
473 }
474 rb_erase(&va->rb_node, &vmap_area_root);
475 RB_CLEAR_NODE(&va->rb_node);
476 list_del_rcu(&va->list);
477
478
479
480
481
482
483
484 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
485 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
486
487 kfree_rcu(va, rcu_head);
488}
489
490
491
492
493static void free_vmap_area(struct vmap_area *va)
494{
495 spin_lock(&vmap_area_lock);
496 __free_vmap_area(va);
497 spin_unlock(&vmap_area_lock);
498}
499
500
501
502
503static void unmap_vmap_area(struct vmap_area *va)
504{
505 vunmap_page_range(va->va_start, va->va_end);
506}
507
508static void vmap_debug_free_range(unsigned long start, unsigned long end)
509{
510
511
512
513
514
515
516
517
518
519
520
521
522
523#ifdef CONFIG_DEBUG_PAGEALLOC
524 vunmap_page_range(start, end);
525 flush_tlb_kernel_range(start, end);
526#endif
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545static unsigned long lazy_max_pages(void)
546{
547 unsigned int log;
548
549 log = fls(num_online_cpus());
550
551 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
552}
553
554static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
555
556
557static void purge_fragmented_blocks_allcpus(void);
558
559
560
561
562
563void set_iounmap_nonlazy(void)
564{
565 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
566}
567
568
569
570
571
572
573
574
575
576
577
578static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
579 int sync, int force_flush)
580{
581 static DEFINE_SPINLOCK(purge_lock);
582 LIST_HEAD(valist);
583 struct vmap_area *va;
584 struct vmap_area *n_va;
585 int nr = 0;
586
587
588
589
590
591
592 if (!sync && !force_flush) {
593 if (!spin_trylock(&purge_lock))
594 return;
595 } else
596 spin_lock(&purge_lock);
597
598 if (sync)
599 purge_fragmented_blocks_allcpus();
600
601 rcu_read_lock();
602 list_for_each_entry_rcu(va, &vmap_area_list, list) {
603 if (va->flags & VM_LAZY_FREE) {
604 if (va->va_start < *start)
605 *start = va->va_start;
606 if (va->va_end > *end)
607 *end = va->va_end;
608 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
609 list_add_tail(&va->purge_list, &valist);
610 va->flags |= VM_LAZY_FREEING;
611 va->flags &= ~VM_LAZY_FREE;
612 }
613 }
614 rcu_read_unlock();
615
616 if (nr)
617 atomic_sub(nr, &vmap_lazy_nr);
618
619 if (nr || force_flush)
620 flush_tlb_kernel_range(*start, *end);
621
622 if (nr) {
623 spin_lock(&vmap_area_lock);
624 list_for_each_entry_safe(va, n_va, &valist, purge_list)
625 __free_vmap_area(va);
626 spin_unlock(&vmap_area_lock);
627 }
628 spin_unlock(&purge_lock);
629}
630
631
632
633
634
635static void try_purge_vmap_area_lazy(void)
636{
637 unsigned long start = ULONG_MAX, end = 0;
638
639 __purge_vmap_area_lazy(&start, &end, 0, 0);
640}
641
642
643
644
645static void purge_vmap_area_lazy(void)
646{
647 unsigned long start = ULONG_MAX, end = 0;
648
649 __purge_vmap_area_lazy(&start, &end, 1, 0);
650}
651
652
653
654
655
656
657static void free_vmap_area_noflush(struct vmap_area *va)
658{
659 va->flags |= VM_LAZY_FREE;
660 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
661 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
662 try_purge_vmap_area_lazy();
663}
664
665
666
667
668
669static void free_unmap_vmap_area_noflush(struct vmap_area *va)
670{
671 unmap_vmap_area(va);
672 free_vmap_area_noflush(va);
673}
674
675
676
677
678static void free_unmap_vmap_area(struct vmap_area *va)
679{
680 flush_cache_vunmap(va->va_start, va->va_end);
681 free_unmap_vmap_area_noflush(va);
682}
683
684static struct vmap_area *find_vmap_area(unsigned long addr)
685{
686 struct vmap_area *va;
687
688 spin_lock(&vmap_area_lock);
689 va = __find_vmap_area(addr);
690 spin_unlock(&vmap_area_lock);
691
692 return va;
693}
694
695static void free_unmap_vmap_area_addr(unsigned long addr)
696{
697 struct vmap_area *va;
698
699 va = find_vmap_area(addr);
700 BUG_ON(!va);
701 free_unmap_vmap_area(va);
702}
703
704
705
706
707
708
709
710
711
712
713
714
715
716#if BITS_PER_LONG == 32
717#define VMALLOC_SPACE (128UL*1024*1024)
718#else
719#define VMALLOC_SPACE (128UL*1024*1024*1024)
720#endif
721
722#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
723#define VMAP_MAX_ALLOC BITS_PER_LONG
724#define VMAP_BBMAP_BITS_MAX 1024
725#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
726#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
727#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
728#define VMAP_BBMAP_BITS \
729 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
730 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
731 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
732
733#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
734
735static bool vmap_initialized __read_mostly = false;
736
737struct vmap_block_queue {
738 spinlock_t lock;
739 struct list_head free;
740};
741
742struct vmap_block {
743 spinlock_t lock;
744 struct vmap_area *va;
745 struct vmap_block_queue *vbq;
746 unsigned long free, dirty;
747 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
748 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
749 struct list_head free_list;
750 struct rcu_head rcu_head;
751 struct list_head purge;
752};
753
754
755static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
756
757
758
759
760
761
762static DEFINE_SPINLOCK(vmap_block_tree_lock);
763static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
764
765
766
767
768
769
770
771
772static unsigned long addr_to_vb_idx(unsigned long addr)
773{
774 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
775 addr /= VMAP_BLOCK_SIZE;
776 return addr;
777}
778
779static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
780{
781 struct vmap_block_queue *vbq;
782 struct vmap_block *vb;
783 struct vmap_area *va;
784 unsigned long vb_idx;
785 int node, err;
786
787 node = numa_node_id();
788
789 vb = kmalloc_node(sizeof(struct vmap_block),
790 gfp_mask & GFP_RECLAIM_MASK, node);
791 if (unlikely(!vb))
792 return ERR_PTR(-ENOMEM);
793
794 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
795 VMALLOC_START, VMALLOC_END,
796 node, gfp_mask);
797 if (IS_ERR(va)) {
798 kfree(vb);
799 return ERR_CAST(va);
800 }
801
802 err = radix_tree_preload(gfp_mask);
803 if (unlikely(err)) {
804 kfree(vb);
805 free_vmap_area(va);
806 return ERR_PTR(err);
807 }
808
809 spin_lock_init(&vb->lock);
810 vb->va = va;
811 vb->free = VMAP_BBMAP_BITS;
812 vb->dirty = 0;
813 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
814 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
815 INIT_LIST_HEAD(&vb->free_list);
816
817 vb_idx = addr_to_vb_idx(va->va_start);
818 spin_lock(&vmap_block_tree_lock);
819 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
820 spin_unlock(&vmap_block_tree_lock);
821 BUG_ON(err);
822 radix_tree_preload_end();
823
824 vbq = &get_cpu_var(vmap_block_queue);
825 vb->vbq = vbq;
826 spin_lock(&vbq->lock);
827 list_add_rcu(&vb->free_list, &vbq->free);
828 spin_unlock(&vbq->lock);
829 put_cpu_var(vmap_block_queue);
830
831 return vb;
832}
833
834static void free_vmap_block(struct vmap_block *vb)
835{
836 struct vmap_block *tmp;
837 unsigned long vb_idx;
838
839 vb_idx = addr_to_vb_idx(vb->va->va_start);
840 spin_lock(&vmap_block_tree_lock);
841 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
842 spin_unlock(&vmap_block_tree_lock);
843 BUG_ON(tmp != vb);
844
845 free_vmap_area_noflush(vb->va);
846 kfree_rcu(vb, rcu_head);
847}
848
849static void purge_fragmented_blocks(int cpu)
850{
851 LIST_HEAD(purge);
852 struct vmap_block *vb;
853 struct vmap_block *n_vb;
854 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
855
856 rcu_read_lock();
857 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
858
859 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
860 continue;
861
862 spin_lock(&vb->lock);
863 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
864 vb->free = 0;
865 vb->dirty = VMAP_BBMAP_BITS;
866 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
867 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
868 spin_lock(&vbq->lock);
869 list_del_rcu(&vb->free_list);
870 spin_unlock(&vbq->lock);
871 spin_unlock(&vb->lock);
872 list_add_tail(&vb->purge, &purge);
873 } else
874 spin_unlock(&vb->lock);
875 }
876 rcu_read_unlock();
877
878 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
879 list_del(&vb->purge);
880 free_vmap_block(vb);
881 }
882}
883
884static void purge_fragmented_blocks_thiscpu(void)
885{
886 purge_fragmented_blocks(smp_processor_id());
887}
888
889static void purge_fragmented_blocks_allcpus(void)
890{
891 int cpu;
892
893 for_each_possible_cpu(cpu)
894 purge_fragmented_blocks(cpu);
895}
896
897static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
898{
899 struct vmap_block_queue *vbq;
900 struct vmap_block *vb;
901 unsigned long addr = 0;
902 unsigned int order;
903 int purge = 0;
904
905 BUG_ON(size & ~PAGE_MASK);
906 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
907 order = get_order(size);
908
909again:
910 rcu_read_lock();
911 vbq = &get_cpu_var(vmap_block_queue);
912 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
913 int i;
914
915 spin_lock(&vb->lock);
916 if (vb->free < 1UL << order)
917 goto next;
918
919 i = bitmap_find_free_region(vb->alloc_map,
920 VMAP_BBMAP_BITS, order);
921
922 if (i < 0) {
923 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
924
925 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
926 purge = 1;
927 }
928 goto next;
929 }
930 addr = vb->va->va_start + (i << PAGE_SHIFT);
931 BUG_ON(addr_to_vb_idx(addr) !=
932 addr_to_vb_idx(vb->va->va_start));
933 vb->free -= 1UL << order;
934 if (vb->free == 0) {
935 spin_lock(&vbq->lock);
936 list_del_rcu(&vb->free_list);
937 spin_unlock(&vbq->lock);
938 }
939 spin_unlock(&vb->lock);
940 break;
941next:
942 spin_unlock(&vb->lock);
943 }
944
945 if (purge)
946 purge_fragmented_blocks_thiscpu();
947
948 put_cpu_var(vmap_block_queue);
949 rcu_read_unlock();
950
951 if (!addr) {
952 vb = new_vmap_block(gfp_mask);
953 if (IS_ERR(vb))
954 return vb;
955 goto again;
956 }
957
958 return (void *)addr;
959}
960
961static void vb_free(const void *addr, unsigned long size)
962{
963 unsigned long offset;
964 unsigned long vb_idx;
965 unsigned int order;
966 struct vmap_block *vb;
967
968 BUG_ON(size & ~PAGE_MASK);
969 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
970
971 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
972
973 order = get_order(size);
974
975 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
976
977 vb_idx = addr_to_vb_idx((unsigned long)addr);
978 rcu_read_lock();
979 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
980 rcu_read_unlock();
981 BUG_ON(!vb);
982
983 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
984
985 spin_lock(&vb->lock);
986 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
987
988 vb->dirty += 1UL << order;
989 if (vb->dirty == VMAP_BBMAP_BITS) {
990 BUG_ON(vb->free);
991 spin_unlock(&vb->lock);
992 free_vmap_block(vb);
993 } else
994 spin_unlock(&vb->lock);
995}
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010void vm_unmap_aliases(void)
1011{
1012 unsigned long start = ULONG_MAX, end = 0;
1013 int cpu;
1014 int flush = 0;
1015
1016 if (unlikely(!vmap_initialized))
1017 return;
1018
1019 for_each_possible_cpu(cpu) {
1020 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1021 struct vmap_block *vb;
1022
1023 rcu_read_lock();
1024 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1025 int i;
1026
1027 spin_lock(&vb->lock);
1028 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1029 while (i < VMAP_BBMAP_BITS) {
1030 unsigned long s, e;
1031 int j;
1032 j = find_next_zero_bit(vb->dirty_map,
1033 VMAP_BBMAP_BITS, i);
1034
1035 s = vb->va->va_start + (i << PAGE_SHIFT);
1036 e = vb->va->va_start + (j << PAGE_SHIFT);
1037 flush = 1;
1038
1039 if (s < start)
1040 start = s;
1041 if (e > end)
1042 end = e;
1043
1044 i = j;
1045 i = find_next_bit(vb->dirty_map,
1046 VMAP_BBMAP_BITS, i);
1047 }
1048 spin_unlock(&vb->lock);
1049 }
1050 rcu_read_unlock();
1051 }
1052
1053 __purge_vmap_area_lazy(&start, &end, 1, flush);
1054}
1055EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1056
1057
1058
1059
1060
1061
1062void vm_unmap_ram(const void *mem, unsigned int count)
1063{
1064 unsigned long size = count << PAGE_SHIFT;
1065 unsigned long addr = (unsigned long)mem;
1066
1067 BUG_ON(!addr);
1068 BUG_ON(addr < VMALLOC_START);
1069 BUG_ON(addr > VMALLOC_END);
1070 BUG_ON(addr & (PAGE_SIZE-1));
1071
1072 debug_check_no_locks_freed(mem, size);
1073 vmap_debug_free_range(addr, addr+size);
1074
1075 if (likely(count <= VMAP_MAX_ALLOC))
1076 vb_free(mem, size);
1077 else
1078 free_unmap_vmap_area_addr(addr);
1079}
1080EXPORT_SYMBOL(vm_unmap_ram);
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1092{
1093 unsigned long size = count << PAGE_SHIFT;
1094 unsigned long addr;
1095 void *mem;
1096
1097 if (likely(count <= VMAP_MAX_ALLOC)) {
1098 mem = vb_alloc(size, GFP_KERNEL);
1099 if (IS_ERR(mem))
1100 return NULL;
1101 addr = (unsigned long)mem;
1102 } else {
1103 struct vmap_area *va;
1104 va = alloc_vmap_area(size, PAGE_SIZE,
1105 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1106 if (IS_ERR(va))
1107 return NULL;
1108
1109 addr = va->va_start;
1110 mem = (void *)addr;
1111 }
1112 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1113 vm_unmap_ram(mem, count);
1114 return NULL;
1115 }
1116 return mem;
1117}
1118EXPORT_SYMBOL(vm_map_ram);
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1133{
1134 static size_t vm_init_off __initdata;
1135 unsigned long addr;
1136
1137 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1138 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1139
1140 vm->addr = (void *)addr;
1141
1142 vm->next = vmlist;
1143 vmlist = vm;
1144}
1145
1146void __init vmalloc_init(void)
1147{
1148 struct vmap_area *va;
1149 struct vm_struct *tmp;
1150 int i;
1151
1152 for_each_possible_cpu(i) {
1153 struct vmap_block_queue *vbq;
1154
1155 vbq = &per_cpu(vmap_block_queue, i);
1156 spin_lock_init(&vbq->lock);
1157 INIT_LIST_HEAD(&vbq->free);
1158 }
1159
1160
1161 for (tmp = vmlist; tmp; tmp = tmp->next) {
1162 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1163 va->flags = tmp->flags | VM_VM_AREA;
1164 va->va_start = (unsigned long)tmp->addr;
1165 va->va_end = va->va_start + tmp->size;
1166 __insert_vmap_area(va);
1167 }
1168
1169 vmap_area_pcpu_hole = VMALLOC_END;
1170
1171 vmap_initialized = true;
1172}
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1194 pgprot_t prot, struct page **pages)
1195{
1196 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1197}
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1214{
1215 vunmap_page_range(addr, addr + size);
1216}
1217EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227void unmap_kernel_range(unsigned long addr, unsigned long size)
1228{
1229 unsigned long end = addr + size;
1230
1231 flush_cache_vunmap(addr, end);
1232 vunmap_page_range(addr, end);
1233 flush_tlb_kernel_range(addr, end);
1234}
1235
1236int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1237{
1238 unsigned long addr = (unsigned long)area->addr;
1239 unsigned long end = addr + area->size - PAGE_SIZE;
1240 int err;
1241
1242 err = vmap_page_range(addr, end, prot, *pages);
1243 if (err > 0) {
1244 *pages += err;
1245 err = 0;
1246 }
1247
1248 return err;
1249}
1250EXPORT_SYMBOL_GPL(map_vm_area);
1251
1252
1253DEFINE_RWLOCK(vmlist_lock);
1254struct vm_struct *vmlist;
1255
1256static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1257 unsigned long flags, void *caller)
1258{
1259 vm->flags = flags;
1260 vm->addr = (void *)va->va_start;
1261 vm->size = va->va_end - va->va_start;
1262 vm->caller = caller;
1263 va->private = vm;
1264 va->flags |= VM_VM_AREA;
1265}
1266
1267static void insert_vmalloc_vmlist(struct vm_struct *vm)
1268{
1269 struct vm_struct *tmp, **p;
1270
1271 vm->flags &= ~VM_UNLIST;
1272 write_lock(&vmlist_lock);
1273 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1274 if (tmp->addr >= vm->addr)
1275 break;
1276 }
1277 vm->next = *p;
1278 *p = vm;
1279 write_unlock(&vmlist_lock);
1280}
1281
1282static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1283 unsigned long flags, void *caller)
1284{
1285 setup_vmalloc_vm(vm, va, flags, caller);
1286 insert_vmalloc_vmlist(vm);
1287}
1288
1289static struct vm_struct *__get_vm_area_node(unsigned long size,
1290 unsigned long align, unsigned long flags, unsigned long start,
1291 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1292{
1293 struct vmap_area *va;
1294 struct vm_struct *area;
1295
1296 BUG_ON(in_interrupt());
1297 if (flags & VM_IOREMAP) {
1298 int bit = fls(size);
1299
1300 if (bit > IOREMAP_MAX_ORDER)
1301 bit = IOREMAP_MAX_ORDER;
1302 else if (bit < PAGE_SHIFT)
1303 bit = PAGE_SHIFT;
1304
1305 align = 1ul << bit;
1306 }
1307
1308 size = PAGE_ALIGN(size);
1309 if (unlikely(!size))
1310 return NULL;
1311
1312 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1313 if (unlikely(!area))
1314 return NULL;
1315
1316
1317
1318
1319 size += PAGE_SIZE;
1320
1321 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1322 if (IS_ERR(va)) {
1323 kfree(area);
1324 return NULL;
1325 }
1326
1327
1328
1329
1330
1331
1332
1333
1334 if (flags & VM_UNLIST)
1335 setup_vmalloc_vm(area, va, flags, caller);
1336 else
1337 insert_vmalloc_vm(area, va, flags, caller);
1338
1339 return area;
1340}
1341
1342struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1343 unsigned long start, unsigned long end)
1344{
1345 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1346 __builtin_return_address(0));
1347}
1348EXPORT_SYMBOL_GPL(__get_vm_area);
1349
1350struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1351 unsigned long start, unsigned long end,
1352 void *caller)
1353{
1354 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1355 caller);
1356}
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1368{
1369 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1370 -1, GFP_KERNEL, __builtin_return_address(0));
1371}
1372
1373struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1374 void *caller)
1375{
1376 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1377 -1, GFP_KERNEL, caller);
1378}
1379
1380static struct vm_struct *find_vm_area(const void *addr)
1381{
1382 struct vmap_area *va;
1383
1384 va = find_vmap_area((unsigned long)addr);
1385 if (va && va->flags & VM_VM_AREA)
1386 return va->private;
1387
1388 return NULL;
1389}
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399struct vm_struct *remove_vm_area(const void *addr)
1400{
1401 struct vmap_area *va;
1402
1403 va = find_vmap_area((unsigned long)addr);
1404 if (va && va->flags & VM_VM_AREA) {
1405 struct vm_struct *vm = va->private;
1406
1407 if (!(vm->flags & VM_UNLIST)) {
1408 struct vm_struct *tmp, **p;
1409
1410
1411
1412
1413
1414 write_lock(&vmlist_lock);
1415 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1416 ;
1417 *p = tmp->next;
1418 write_unlock(&vmlist_lock);
1419 }
1420
1421 vmap_debug_free_range(va->va_start, va->va_end);
1422 free_unmap_vmap_area(va);
1423 vm->size -= PAGE_SIZE;
1424
1425 return vm;
1426 }
1427 return NULL;
1428}
1429
1430static void __vunmap(const void *addr, int deallocate_pages)
1431{
1432 struct vm_struct *area;
1433
1434 if (!addr)
1435 return;
1436
1437 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1438 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1439 return;
1440 }
1441
1442 area = remove_vm_area(addr);
1443 if (unlikely(!area)) {
1444 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1445 addr);
1446 return;
1447 }
1448
1449 debug_check_no_locks_freed(addr, area->size);
1450 debug_check_no_obj_freed(addr, area->size);
1451
1452 if (deallocate_pages) {
1453 int i;
1454
1455 for (i = 0; i < area->nr_pages; i++) {
1456 struct page *page = area->pages[i];
1457
1458 BUG_ON(!page);
1459 __free_page(page);
1460 }
1461
1462 if (area->flags & VM_VPAGES)
1463 vfree(area->pages);
1464 else
1465 kfree(area->pages);
1466 }
1467
1468 kfree(area);
1469 return;
1470}
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482void vfree(const void *addr)
1483{
1484 BUG_ON(in_interrupt());
1485
1486 kmemleak_free(addr);
1487
1488 __vunmap(addr, 1);
1489}
1490EXPORT_SYMBOL(vfree);
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501void vunmap(const void *addr)
1502{
1503 BUG_ON(in_interrupt());
1504 might_sleep();
1505 __vunmap(addr, 0);
1506}
1507EXPORT_SYMBOL(vunmap);
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519void *vmap(struct page **pages, unsigned int count,
1520 unsigned long flags, pgprot_t prot)
1521{
1522 struct vm_struct *area;
1523
1524 might_sleep();
1525
1526 if (count > totalram_pages)
1527 return NULL;
1528
1529 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1530 __builtin_return_address(0));
1531 if (!area)
1532 return NULL;
1533
1534 if (map_vm_area(area, prot, &pages)) {
1535 vunmap(area->addr);
1536 return NULL;
1537 }
1538
1539 return area->addr;
1540}
1541EXPORT_SYMBOL(vmap);
1542
1543static void *__vmalloc_node(unsigned long size, unsigned long align,
1544 gfp_t gfp_mask, pgprot_t prot,
1545 int node, void *caller);
1546static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1547 pgprot_t prot, int node, void *caller)
1548{
1549 const int order = 0;
1550 struct page **pages;
1551 unsigned int nr_pages, array_size, i;
1552 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1553
1554 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1555 array_size = (nr_pages * sizeof(struct page *));
1556
1557 area->nr_pages = nr_pages;
1558
1559 if (array_size > PAGE_SIZE) {
1560 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1561 PAGE_KERNEL, node, caller);
1562 area->flags |= VM_VPAGES;
1563 } else {
1564 pages = kmalloc_node(array_size, nested_gfp, node);
1565 }
1566 area->pages = pages;
1567 area->caller = caller;
1568 if (!area->pages) {
1569 remove_vm_area(area->addr);
1570 kfree(area);
1571 return NULL;
1572 }
1573
1574 for (i = 0; i < area->nr_pages; i++) {
1575 struct page *page;
1576 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1577
1578 if (node < 0)
1579 page = alloc_page(tmp_mask);
1580 else
1581 page = alloc_pages_node(node, tmp_mask, order);
1582
1583 if (unlikely(!page)) {
1584
1585 area->nr_pages = i;
1586 goto fail;
1587 }
1588 area->pages[i] = page;
1589 }
1590
1591 if (map_vm_area(area, prot, &pages))
1592 goto fail;
1593 return area->addr;
1594
1595fail:
1596 warn_alloc_failed(gfp_mask, order,
1597 "vmalloc: allocation failure, allocated %ld of %ld bytes\n",
1598 (area->nr_pages*PAGE_SIZE), area->size);
1599 vfree(area->addr);
1600 return NULL;
1601}
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618void *__vmalloc_node_range(unsigned long size, unsigned long align,
1619 unsigned long start, unsigned long end, gfp_t gfp_mask,
1620 pgprot_t prot, int node, void *caller)
1621{
1622 struct vm_struct *area;
1623 void *addr;
1624 unsigned long real_size = size;
1625
1626 size = PAGE_ALIGN(size);
1627 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1628 goto fail;
1629
1630 area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
1631 start, end, node, gfp_mask, caller);
1632 if (!area)
1633 goto fail;
1634
1635 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1636 if (!addr)
1637 return NULL;
1638
1639
1640
1641
1642
1643 insert_vmalloc_vmlist(area);
1644
1645
1646
1647
1648
1649
1650 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1651
1652 return addr;
1653
1654fail:
1655 warn_alloc_failed(gfp_mask, 0,
1656 "vmalloc: allocation failure: %lu bytes\n",
1657 real_size);
1658 return NULL;
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674static void *__vmalloc_node(unsigned long size, unsigned long align,
1675 gfp_t gfp_mask, pgprot_t prot,
1676 int node, void *caller)
1677{
1678 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1679 gfp_mask, prot, node, caller);
1680}
1681
1682void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1683{
1684 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1685 __builtin_return_address(0));
1686}
1687EXPORT_SYMBOL(__vmalloc);
1688
1689static inline void *__vmalloc_node_flags(unsigned long size,
1690 int node, gfp_t flags)
1691{
1692 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1693 node, __builtin_return_address(0));
1694}
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705void *vmalloc(unsigned long size)
1706{
1707 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1708}
1709EXPORT_SYMBOL(vmalloc);
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721void *vzalloc(unsigned long size)
1722{
1723 return __vmalloc_node_flags(size, -1,
1724 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1725}
1726EXPORT_SYMBOL(vzalloc);
1727
1728
1729
1730
1731
1732
1733
1734
1735void *vmalloc_user(unsigned long size)
1736{
1737 struct vm_struct *area;
1738 void *ret;
1739
1740 ret = __vmalloc_node(size, SHMLBA,
1741 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1742 PAGE_KERNEL, -1, __builtin_return_address(0));
1743 if (ret) {
1744 area = find_vm_area(ret);
1745 area->flags |= VM_USERMAP;
1746 }
1747 return ret;
1748}
1749EXPORT_SYMBOL(vmalloc_user);
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762void *vmalloc_node(unsigned long size, int node)
1763{
1764 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1765 node, __builtin_return_address(0));
1766}
1767EXPORT_SYMBOL(vmalloc_node);
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781void *vzalloc_node(unsigned long size, int node)
1782{
1783 return __vmalloc_node_flags(size, node,
1784 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1785}
1786EXPORT_SYMBOL(vzalloc_node);
1787
1788#ifndef PAGE_KERNEL_EXEC
1789# define PAGE_KERNEL_EXEC PAGE_KERNEL
1790#endif
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804void *vmalloc_exec(unsigned long size)
1805{
1806 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1807 -1, __builtin_return_address(0));
1808}
1809
1810#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1811#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1812#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1813#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1814#else
1815#define GFP_VMALLOC32 GFP_KERNEL
1816#endif
1817
1818
1819
1820
1821
1822
1823
1824
1825void *vmalloc_32(unsigned long size)
1826{
1827 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1828 -1, __builtin_return_address(0));
1829}
1830EXPORT_SYMBOL(vmalloc_32);
1831
1832
1833
1834
1835
1836
1837
1838
1839void *vmalloc_32_user(unsigned long size)
1840{
1841 struct vm_struct *area;
1842 void *ret;
1843
1844 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1845 -1, __builtin_return_address(0));
1846 if (ret) {
1847 area = find_vm_area(ret);
1848 area->flags |= VM_USERMAP;
1849 }
1850 return ret;
1851}
1852EXPORT_SYMBOL(vmalloc_32_user);
1853
1854
1855
1856
1857
1858
1859static int aligned_vread(char *buf, char *addr, unsigned long count)
1860{
1861 struct page *p;
1862 int copied = 0;
1863
1864 while (count) {
1865 unsigned long offset, length;
1866
1867 offset = (unsigned long)addr & ~PAGE_MASK;
1868 length = PAGE_SIZE - offset;
1869 if (length > count)
1870 length = count;
1871 p = vmalloc_to_page(addr);
1872
1873
1874
1875
1876
1877
1878
1879 if (p) {
1880
1881
1882
1883
1884 void *map = kmap_atomic(p, KM_USER0);
1885 memcpy(buf, map + offset, length);
1886 kunmap_atomic(map, KM_USER0);
1887 } else
1888 memset(buf, 0, length);
1889
1890 addr += length;
1891 buf += length;
1892 copied += length;
1893 count -= length;
1894 }
1895 return copied;
1896}
1897
1898static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1899{
1900 struct page *p;
1901 int copied = 0;
1902
1903 while (count) {
1904 unsigned long offset, length;
1905
1906 offset = (unsigned long)addr & ~PAGE_MASK;
1907 length = PAGE_SIZE - offset;
1908 if (length > count)
1909 length = count;
1910 p = vmalloc_to_page(addr);
1911
1912
1913
1914
1915
1916
1917
1918 if (p) {
1919
1920
1921
1922
1923 void *map = kmap_atomic(p, KM_USER0);
1924 memcpy(map + offset, buf, length);
1925 kunmap_atomic(map, KM_USER0);
1926 }
1927 addr += length;
1928 buf += length;
1929 copied += length;
1930 count -= length;
1931 }
1932 return copied;
1933}
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963long vread(char *buf, char *addr, unsigned long count)
1964{
1965 struct vm_struct *tmp;
1966 char *vaddr, *buf_start = buf;
1967 unsigned long buflen = count;
1968 unsigned long n;
1969
1970
1971 if ((unsigned long) addr + count < count)
1972 count = -(unsigned long) addr;
1973
1974 read_lock(&vmlist_lock);
1975 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1976 vaddr = (char *) tmp->addr;
1977 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1978 continue;
1979 while (addr < vaddr) {
1980 if (count == 0)
1981 goto finished;
1982 *buf = '\0';
1983 buf++;
1984 addr++;
1985 count--;
1986 }
1987 n = vaddr + tmp->size - PAGE_SIZE - addr;
1988 if (n > count)
1989 n = count;
1990 if (!(tmp->flags & VM_IOREMAP))
1991 aligned_vread(buf, addr, n);
1992 else
1993 memset(buf, 0, n);
1994 buf += n;
1995 addr += n;
1996 count -= n;
1997 }
1998finished:
1999 read_unlock(&vmlist_lock);
2000
2001 if (buf == buf_start)
2002 return 0;
2003
2004 if (buf != buf_start + buflen)
2005 memset(buf, 0, buflen - (buf - buf_start));
2006
2007 return buflen;
2008}
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038long vwrite(char *buf, char *addr, unsigned long count)
2039{
2040 struct vm_struct *tmp;
2041 char *vaddr;
2042 unsigned long n, buflen;
2043 int copied = 0;
2044
2045
2046 if ((unsigned long) addr + count < count)
2047 count = -(unsigned long) addr;
2048 buflen = count;
2049
2050 read_lock(&vmlist_lock);
2051 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2052 vaddr = (char *) tmp->addr;
2053 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2054 continue;
2055 while (addr < vaddr) {
2056 if (count == 0)
2057 goto finished;
2058 buf++;
2059 addr++;
2060 count--;
2061 }
2062 n = vaddr + tmp->size - PAGE_SIZE - addr;
2063 if (n > count)
2064 n = count;
2065 if (!(tmp->flags & VM_IOREMAP)) {
2066 aligned_vwrite(buf, addr, n);
2067 copied++;
2068 }
2069 buf += n;
2070 addr += n;
2071 count -= n;
2072 }
2073finished:
2074 read_unlock(&vmlist_lock);
2075 if (!copied)
2076 return 0;
2077 return buflen;
2078}
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2095 unsigned long pgoff)
2096{
2097 struct vm_struct *area;
2098 unsigned long uaddr = vma->vm_start;
2099 unsigned long usize = vma->vm_end - vma->vm_start;
2100
2101 if ((PAGE_SIZE-1) & (unsigned long)addr)
2102 return -EINVAL;
2103
2104 area = find_vm_area(addr);
2105 if (!area)
2106 return -EINVAL;
2107
2108 if (!(area->flags & VM_USERMAP))
2109 return -EINVAL;
2110
2111 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2112 return -EINVAL;
2113
2114 addr += pgoff << PAGE_SHIFT;
2115 do {
2116 struct page *page = vmalloc_to_page(addr);
2117 int ret;
2118
2119 ret = vm_insert_page(vma, uaddr, page);
2120 if (ret)
2121 return ret;
2122
2123 uaddr += PAGE_SIZE;
2124 addr += PAGE_SIZE;
2125 usize -= PAGE_SIZE;
2126 } while (usize > 0);
2127
2128
2129 vma->vm_flags |= VM_RESERVED;
2130
2131 return 0;
2132}
2133EXPORT_SYMBOL(remap_vmalloc_range);
2134
2135
2136
2137
2138
2139void __attribute__((weak)) vmalloc_sync_all(void)
2140{
2141}
2142
2143
2144static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2145{
2146 pte_t ***p = data;
2147
2148 if (p) {
2149 *(*p) = pte;
2150 (*p)++;
2151 }
2152 return 0;
2153}
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2170{
2171 struct vm_struct *area;
2172
2173 area = get_vm_area_caller(size, VM_IOREMAP,
2174 __builtin_return_address(0));
2175 if (area == NULL)
2176 return NULL;
2177
2178
2179
2180
2181
2182 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2183 size, f, ptes ? &ptes : NULL)) {
2184 free_vm_area(area);
2185 return NULL;
2186 }
2187
2188 return area;
2189}
2190EXPORT_SYMBOL_GPL(alloc_vm_area);
2191
2192void free_vm_area(struct vm_struct *area)
2193{
2194 struct vm_struct *ret;
2195 ret = remove_vm_area(area->addr);
2196 BUG_ON(ret != area);
2197 kfree(area);
2198}
2199EXPORT_SYMBOL_GPL(free_vm_area);
2200
2201#ifdef CONFIG_SMP
2202static struct vmap_area *node_to_va(struct rb_node *n)
2203{
2204 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2205}
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219static bool pvm_find_next_prev(unsigned long end,
2220 struct vmap_area **pnext,
2221 struct vmap_area **pprev)
2222{
2223 struct rb_node *n = vmap_area_root.rb_node;
2224 struct vmap_area *va = NULL;
2225
2226 while (n) {
2227 va = rb_entry(n, struct vmap_area, rb_node);
2228 if (end < va->va_end)
2229 n = n->rb_left;
2230 else if (end > va->va_end)
2231 n = n->rb_right;
2232 else
2233 break;
2234 }
2235
2236 if (!va)
2237 return false;
2238
2239 if (va->va_end > end) {
2240 *pnext = va;
2241 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2242 } else {
2243 *pprev = va;
2244 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2245 }
2246 return true;
2247}
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265static unsigned long pvm_determine_end(struct vmap_area **pnext,
2266 struct vmap_area **pprev,
2267 unsigned long align)
2268{
2269 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2270 unsigned long addr;
2271
2272 if (*pnext)
2273 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2274 else
2275 addr = vmalloc_end;
2276
2277 while (*pprev && (*pprev)->va_end > addr) {
2278 *pnext = *pprev;
2279 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2280 }
2281
2282 return addr;
2283}
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2310 const size_t *sizes, int nr_vms,
2311 size_t align)
2312{
2313 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2314 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2315 struct vmap_area **vas, *prev, *next;
2316 struct vm_struct **vms;
2317 int area, area2, last_area, term_area;
2318 unsigned long base, start, end, last_end;
2319 bool purged = false;
2320
2321
2322 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2323 for (last_area = 0, area = 0; area < nr_vms; area++) {
2324 start = offsets[area];
2325 end = start + sizes[area];
2326
2327
2328 BUG_ON(!IS_ALIGNED(offsets[area], align));
2329 BUG_ON(!IS_ALIGNED(sizes[area], align));
2330
2331
2332 if (start > offsets[last_area])
2333 last_area = area;
2334
2335 for (area2 = 0; area2 < nr_vms; area2++) {
2336 unsigned long start2 = offsets[area2];
2337 unsigned long end2 = start2 + sizes[area2];
2338
2339 if (area2 == area)
2340 continue;
2341
2342 BUG_ON(start2 >= start && start2 < end);
2343 BUG_ON(end2 <= end && end2 > start);
2344 }
2345 }
2346 last_end = offsets[last_area] + sizes[last_area];
2347
2348 if (vmalloc_end - vmalloc_start < last_end) {
2349 WARN_ON(true);
2350 return NULL;
2351 }
2352
2353 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
2354 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
2355 if (!vas || !vms)
2356 goto err_free;
2357
2358 for (area = 0; area < nr_vms; area++) {
2359 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2360 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2361 if (!vas[area] || !vms[area])
2362 goto err_free;
2363 }
2364retry:
2365 spin_lock(&vmap_area_lock);
2366
2367
2368 area = term_area = last_area;
2369 start = offsets[area];
2370 end = start + sizes[area];
2371
2372 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2373 base = vmalloc_end - last_end;
2374 goto found;
2375 }
2376 base = pvm_determine_end(&next, &prev, align) - end;
2377
2378 while (true) {
2379 BUG_ON(next && next->va_end <= base + end);
2380 BUG_ON(prev && prev->va_end > base + end);
2381
2382
2383
2384
2385
2386 if (base + last_end < vmalloc_start + last_end) {
2387 spin_unlock(&vmap_area_lock);
2388 if (!purged) {
2389 purge_vmap_area_lazy();
2390 purged = true;
2391 goto retry;
2392 }
2393 goto err_free;
2394 }
2395
2396
2397
2398
2399
2400 if (next && next->va_start < base + end) {
2401 base = pvm_determine_end(&next, &prev, align) - end;
2402 term_area = area;
2403 continue;
2404 }
2405
2406
2407
2408
2409
2410
2411 if (prev && prev->va_end > base + start) {
2412 next = prev;
2413 prev = node_to_va(rb_prev(&next->rb_node));
2414 base = pvm_determine_end(&next, &prev, align) - end;
2415 term_area = area;
2416 continue;
2417 }
2418
2419
2420
2421
2422
2423 area = (area + nr_vms - 1) % nr_vms;
2424 if (area == term_area)
2425 break;
2426 start = offsets[area];
2427 end = start + sizes[area];
2428 pvm_find_next_prev(base + end, &next, &prev);
2429 }
2430found:
2431
2432 for (area = 0; area < nr_vms; area++) {
2433 struct vmap_area *va = vas[area];
2434
2435 va->va_start = base + offsets[area];
2436 va->va_end = va->va_start + sizes[area];
2437 __insert_vmap_area(va);
2438 }
2439
2440 vmap_area_pcpu_hole = base + offsets[last_area];
2441
2442 spin_unlock(&vmap_area_lock);
2443
2444
2445 for (area = 0; area < nr_vms; area++)
2446 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2447 pcpu_get_vm_areas);
2448
2449 kfree(vas);
2450 return vms;
2451
2452err_free:
2453 for (area = 0; area < nr_vms; area++) {
2454 if (vas)
2455 kfree(vas[area]);
2456 if (vms)
2457 kfree(vms[area]);
2458 }
2459 kfree(vas);
2460 kfree(vms);
2461 return NULL;
2462}
2463
2464
2465
2466
2467
2468
2469
2470
2471void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2472{
2473 int i;
2474
2475 for (i = 0; i < nr_vms; i++)
2476 free_vm_area(vms[i]);
2477 kfree(vms);
2478}
2479#endif
2480
2481#ifdef CONFIG_PROC_FS
2482static void *s_start(struct seq_file *m, loff_t *pos)
2483 __acquires(&vmlist_lock)
2484{
2485 loff_t n = *pos;
2486 struct vm_struct *v;
2487
2488 read_lock(&vmlist_lock);
2489 v = vmlist;
2490 while (n > 0 && v) {
2491 n--;
2492 v = v->next;
2493 }
2494 if (!n)
2495 return v;
2496
2497 return NULL;
2498
2499}
2500
2501static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2502{
2503 struct vm_struct *v = p;
2504
2505 ++*pos;
2506 return v->next;
2507}
2508
2509static void s_stop(struct seq_file *m, void *p)
2510 __releases(&vmlist_lock)
2511{
2512 read_unlock(&vmlist_lock);
2513}
2514
2515static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2516{
2517 if (NUMA_BUILD) {
2518 unsigned int nr, *counters = m->private;
2519
2520 if (!counters)
2521 return;
2522
2523 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2524
2525 for (nr = 0; nr < v->nr_pages; nr++)
2526 counters[page_to_nid(v->pages[nr])]++;
2527
2528 for_each_node_state(nr, N_HIGH_MEMORY)
2529 if (counters[nr])
2530 seq_printf(m, " N%u=%u", nr, counters[nr]);
2531 }
2532}
2533
2534static int s_show(struct seq_file *m, void *p)
2535{
2536 struct vm_struct *v = p;
2537
2538 seq_printf(m, "0x%p-0x%p %7ld",
2539 v->addr, v->addr + v->size, v->size);
2540
2541 if (v->caller)
2542 seq_printf(m, " %pS", v->caller);
2543
2544 if (v->nr_pages)
2545 seq_printf(m, " pages=%d", v->nr_pages);
2546
2547 if (v->phys_addr)
2548 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2549
2550 if (v->flags & VM_IOREMAP)
2551 seq_printf(m, " ioremap");
2552
2553 if (v->flags & VM_ALLOC)
2554 seq_printf(m, " vmalloc");
2555
2556 if (v->flags & VM_MAP)
2557 seq_printf(m, " vmap");
2558
2559 if (v->flags & VM_USERMAP)
2560 seq_printf(m, " user");
2561
2562 if (v->flags & VM_VPAGES)
2563 seq_printf(m, " vpages");
2564
2565 show_numa_info(m, v);
2566 seq_putc(m, '\n');
2567 return 0;
2568}
2569
2570static const struct seq_operations vmalloc_op = {
2571 .start = s_start,
2572 .next = s_next,
2573 .stop = s_stop,
2574 .show = s_show,
2575};
2576
2577static int vmalloc_open(struct inode *inode, struct file *file)
2578{
2579 unsigned int *ptr = NULL;
2580 int ret;
2581
2582 if (NUMA_BUILD) {
2583 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2584 if (ptr == NULL)
2585 return -ENOMEM;
2586 }
2587 ret = seq_open(file, &vmalloc_op);
2588 if (!ret) {
2589 struct seq_file *m = file->private_data;
2590 m->private = ptr;
2591 } else
2592 kfree(ptr);
2593 return ret;
2594}
2595
2596static const struct file_operations proc_vmalloc_operations = {
2597 .open = vmalloc_open,
2598 .read = seq_read,
2599 .llseek = seq_lseek,
2600 .release = seq_release_private,
2601};
2602
2603static int __init proc_vmalloc_init(void)
2604{
2605 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2606 return 0;
2607}
2608module_init(proc_vmalloc_init);
2609#endif
2610
2611