1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <linux/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 struct vm_struct *vm;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static LIST_HEAD(vmap_area_list);
265static struct rb_root vmap_area_root = RB_ROOT;
266
267
268static struct rb_node *free_vmap_cache;
269static unsigned long cached_hole_size;
270static unsigned long cached_vstart;
271static unsigned long cached_align;
272
273static unsigned long vmap_area_pcpu_hole;
274
275static struct vmap_area *__find_vmap_area(unsigned long addr)
276{
277 struct rb_node *n = vmap_area_root.rb_node;
278
279 while (n) {
280 struct vmap_area *va;
281
282 va = rb_entry(n, struct vmap_area, rb_node);
283 if (addr < va->va_start)
284 n = n->rb_left;
285 else if (addr > va->va_start)
286 n = n->rb_right;
287 else
288 return va;
289 }
290
291 return NULL;
292}
293
294static void __insert_vmap_area(struct vmap_area *va)
295{
296 struct rb_node **p = &vmap_area_root.rb_node;
297 struct rb_node *parent = NULL;
298 struct rb_node *tmp;
299
300 while (*p) {
301 struct vmap_area *tmp_va;
302
303 parent = *p;
304 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
305 if (va->va_start < tmp_va->va_end)
306 p = &(*p)->rb_left;
307 else if (va->va_end > tmp_va->va_start)
308 p = &(*p)->rb_right;
309 else
310 BUG();
311 }
312
313 rb_link_node(&va->rb_node, parent, p);
314 rb_insert_color(&va->rb_node, &vmap_area_root);
315
316
317 tmp = rb_prev(&va->rb_node);
318 if (tmp) {
319 struct vmap_area *prev;
320 prev = rb_entry(tmp, struct vmap_area, rb_node);
321 list_add_rcu(&va->list, &prev->list);
322 } else
323 list_add_rcu(&va->list, &vmap_area_list);
324}
325
326static void purge_vmap_area_lazy(void);
327
328
329
330
331
332static struct vmap_area *alloc_vmap_area(unsigned long size,
333 unsigned long align,
334 unsigned long vstart, unsigned long vend,
335 int node, gfp_t gfp_mask)
336{
337 struct vmap_area *va;
338 struct rb_node *n;
339 unsigned long addr;
340 int purged = 0;
341 struct vmap_area *first;
342
343 BUG_ON(!size);
344 BUG_ON(size & ~PAGE_MASK);
345 BUG_ON(!is_power_of_2(align));
346
347 va = kmalloc_node(sizeof(struct vmap_area),
348 gfp_mask & GFP_RECLAIM_MASK, node);
349 if (unlikely(!va))
350 return ERR_PTR(-ENOMEM);
351
352retry:
353 spin_lock(&vmap_area_lock);
354
355
356
357
358
359
360
361
362
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
367nocache:
368 cached_hole_size = 0;
369 free_vmap_cache = NULL;
370 }
371
372 cached_vstart = vstart;
373 cached_align = align;
374
375
376 if (free_vmap_cache) {
377 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
378 addr = ALIGN(first->va_end, align);
379 if (addr < vstart)
380 goto nocache;
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 } else {
385 addr = ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
387 goto overflow;
388
389 n = vmap_area_root.rb_node;
390 first = NULL;
391
392 while (n) {
393 struct vmap_area *tmp;
394 tmp = rb_entry(n, struct vmap_area, rb_node);
395 if (tmp->va_end >= addr) {
396 first = tmp;
397 if (tmp->va_start <= addr)
398 break;
399 n = n->rb_left;
400 } else
401 n = n->rb_right;
402 }
403
404 if (!first)
405 goto found;
406 }
407
408
409 while (addr + size > first->va_start && addr + size <= vend) {
410 if (addr + cached_hole_size < first->va_start)
411 cached_hole_size = first->va_start - addr;
412 addr = ALIGN(first->va_end, align);
413 if (addr + size - 1 < addr)
414 goto overflow;
415
416 n = rb_next(&first->rb_node);
417 if (n)
418 first = rb_entry(n, struct vmap_area, rb_node);
419 else
420 goto found;
421 }
422
423found:
424 if (addr + size > vend)
425 goto overflow;
426
427 va->va_start = addr;
428 va->va_end = addr + size;
429 va->flags = 0;
430 __insert_vmap_area(va);
431 free_vmap_cache = &va->rb_node;
432 spin_unlock(&vmap_area_lock);
433
434 BUG_ON(va->va_start & (align-1));
435 BUG_ON(va->va_start < vstart);
436 BUG_ON(va->va_end > vend);
437
438 return va;
439
440overflow:
441 spin_unlock(&vmap_area_lock);
442 if (!purged) {
443 purge_vmap_area_lazy();
444 purged = 1;
445 goto retry;
446 }
447 if (printk_ratelimit())
448 printk(KERN_WARNING
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
451 kfree(va);
452 return ERR_PTR(-EBUSY);
453}
454
455static void __free_vmap_area(struct vmap_area *va)
456{
457 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
458
459 if (free_vmap_cache) {
460 if (va->va_end < cached_vstart) {
461 free_vmap_cache = NULL;
462 } else {
463 struct vmap_area *cache;
464 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
465 if (va->va_start <= cache->va_start) {
466 free_vmap_cache = rb_prev(&va->rb_node);
467
468
469
470
471 }
472 }
473 }
474 rb_erase(&va->rb_node, &vmap_area_root);
475 RB_CLEAR_NODE(&va->rb_node);
476 list_del_rcu(&va->list);
477
478
479
480
481
482
483
484 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
485 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
486
487 kfree_rcu(va, rcu_head);
488}
489
490
491
492
493static void free_vmap_area(struct vmap_area *va)
494{
495 spin_lock(&vmap_area_lock);
496 __free_vmap_area(va);
497 spin_unlock(&vmap_area_lock);
498}
499
500
501
502
503static void unmap_vmap_area(struct vmap_area *va)
504{
505 vunmap_page_range(va->va_start, va->va_end);
506}
507
508static void vmap_debug_free_range(unsigned long start, unsigned long end)
509{
510
511
512
513
514
515
516
517
518
519
520
521
522
523#ifdef CONFIG_DEBUG_PAGEALLOC
524 vunmap_page_range(start, end);
525 flush_tlb_kernel_range(start, end);
526#endif
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545static unsigned long lazy_max_pages(void)
546{
547 unsigned int log;
548
549 log = fls(num_online_cpus());
550
551 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
552}
553
554static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
555
556
557static void purge_fragmented_blocks_allcpus(void);
558
559
560
561
562
563void set_iounmap_nonlazy(void)
564{
565 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
566}
567
568
569
570
571
572
573
574
575
576
577
578static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
579 int sync, int force_flush)
580{
581 static DEFINE_SPINLOCK(purge_lock);
582 LIST_HEAD(valist);
583 struct vmap_area *va;
584 struct vmap_area *n_va;
585 int nr = 0;
586
587
588
589
590
591
592 if (!sync && !force_flush) {
593 if (!spin_trylock(&purge_lock))
594 return;
595 } else
596 spin_lock(&purge_lock);
597
598 if (sync)
599 purge_fragmented_blocks_allcpus();
600
601 rcu_read_lock();
602 list_for_each_entry_rcu(va, &vmap_area_list, list) {
603 if (va->flags & VM_LAZY_FREE) {
604 if (va->va_start < *start)
605 *start = va->va_start;
606 if (va->va_end > *end)
607 *end = va->va_end;
608 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
609 list_add_tail(&va->purge_list, &valist);
610 va->flags |= VM_LAZY_FREEING;
611 va->flags &= ~VM_LAZY_FREE;
612 }
613 }
614 rcu_read_unlock();
615
616 if (nr)
617 atomic_sub(nr, &vmap_lazy_nr);
618
619 if (nr || force_flush)
620 flush_tlb_kernel_range(*start, *end);
621
622 if (nr) {
623 spin_lock(&vmap_area_lock);
624 list_for_each_entry_safe(va, n_va, &valist, purge_list)
625 __free_vmap_area(va);
626 spin_unlock(&vmap_area_lock);
627 }
628 spin_unlock(&purge_lock);
629}
630
631
632
633
634
635static void try_purge_vmap_area_lazy(void)
636{
637 unsigned long start = ULONG_MAX, end = 0;
638
639 __purge_vmap_area_lazy(&start, &end, 0, 0);
640}
641
642
643
644
645static void purge_vmap_area_lazy(void)
646{
647 unsigned long start = ULONG_MAX, end = 0;
648
649 __purge_vmap_area_lazy(&start, &end, 1, 0);
650}
651
652
653
654
655
656
657static void free_vmap_area_noflush(struct vmap_area *va)
658{
659 va->flags |= VM_LAZY_FREE;
660 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
661 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
662 try_purge_vmap_area_lazy();
663}
664
665
666
667
668
669static void free_unmap_vmap_area_noflush(struct vmap_area *va)
670{
671 unmap_vmap_area(va);
672 free_vmap_area_noflush(va);
673}
674
675
676
677
678static void free_unmap_vmap_area(struct vmap_area *va)
679{
680 flush_cache_vunmap(va->va_start, va->va_end);
681 free_unmap_vmap_area_noflush(va);
682}
683
684static struct vmap_area *find_vmap_area(unsigned long addr)
685{
686 struct vmap_area *va;
687
688 spin_lock(&vmap_area_lock);
689 va = __find_vmap_area(addr);
690 spin_unlock(&vmap_area_lock);
691
692 return va;
693}
694
695static void free_unmap_vmap_area_addr(unsigned long addr)
696{
697 struct vmap_area *va;
698
699 va = find_vmap_area(addr);
700 BUG_ON(!va);
701 free_unmap_vmap_area(va);
702}
703
704
705
706
707
708
709
710
711
712
713
714
715
716#if BITS_PER_LONG == 32
717#define VMALLOC_SPACE (128UL*1024*1024)
718#else
719#define VMALLOC_SPACE (128UL*1024*1024*1024)
720#endif
721
722#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
723#define VMAP_MAX_ALLOC BITS_PER_LONG
724#define VMAP_BBMAP_BITS_MAX 1024
725#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
726#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
727#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
728#define VMAP_BBMAP_BITS \
729 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
730 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
731 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
732
733#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
734
735static bool vmap_initialized __read_mostly = false;
736
737struct vmap_block_queue {
738 spinlock_t lock;
739 struct list_head free;
740};
741
742struct vmap_block {
743 spinlock_t lock;
744 struct vmap_area *va;
745 struct vmap_block_queue *vbq;
746 unsigned long free, dirty;
747 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
748 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
749 struct list_head free_list;
750 struct rcu_head rcu_head;
751 struct list_head purge;
752};
753
754
755static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
756
757
758
759
760
761
762static DEFINE_SPINLOCK(vmap_block_tree_lock);
763static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
764
765
766
767
768
769
770
771
772static unsigned long addr_to_vb_idx(unsigned long addr)
773{
774 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
775 addr /= VMAP_BLOCK_SIZE;
776 return addr;
777}
778
779static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
780{
781 struct vmap_block_queue *vbq;
782 struct vmap_block *vb;
783 struct vmap_area *va;
784 unsigned long vb_idx;
785 int node, err;
786
787 node = numa_node_id();
788
789 vb = kmalloc_node(sizeof(struct vmap_block),
790 gfp_mask & GFP_RECLAIM_MASK, node);
791 if (unlikely(!vb))
792 return ERR_PTR(-ENOMEM);
793
794 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
795 VMALLOC_START, VMALLOC_END,
796 node, gfp_mask);
797 if (IS_ERR(va)) {
798 kfree(vb);
799 return ERR_CAST(va);
800 }
801
802 err = radix_tree_preload(gfp_mask);
803 if (unlikely(err)) {
804 kfree(vb);
805 free_vmap_area(va);
806 return ERR_PTR(err);
807 }
808
809 spin_lock_init(&vb->lock);
810 vb->va = va;
811 vb->free = VMAP_BBMAP_BITS;
812 vb->dirty = 0;
813 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
814 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
815 INIT_LIST_HEAD(&vb->free_list);
816
817 vb_idx = addr_to_vb_idx(va->va_start);
818 spin_lock(&vmap_block_tree_lock);
819 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
820 spin_unlock(&vmap_block_tree_lock);
821 BUG_ON(err);
822 radix_tree_preload_end();
823
824 vbq = &get_cpu_var(vmap_block_queue);
825 vb->vbq = vbq;
826 spin_lock(&vbq->lock);
827 list_add_rcu(&vb->free_list, &vbq->free);
828 spin_unlock(&vbq->lock);
829 put_cpu_var(vmap_block_queue);
830
831 return vb;
832}
833
834static void free_vmap_block(struct vmap_block *vb)
835{
836 struct vmap_block *tmp;
837 unsigned long vb_idx;
838
839 vb_idx = addr_to_vb_idx(vb->va->va_start);
840 spin_lock(&vmap_block_tree_lock);
841 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
842 spin_unlock(&vmap_block_tree_lock);
843 BUG_ON(tmp != vb);
844
845 free_vmap_area_noflush(vb->va);
846 kfree_rcu(vb, rcu_head);
847}
848
849static void purge_fragmented_blocks(int cpu)
850{
851 LIST_HEAD(purge);
852 struct vmap_block *vb;
853 struct vmap_block *n_vb;
854 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
855
856 rcu_read_lock();
857 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
858
859 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
860 continue;
861
862 spin_lock(&vb->lock);
863 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
864 vb->free = 0;
865 vb->dirty = VMAP_BBMAP_BITS;
866 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
867 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
868 spin_lock(&vbq->lock);
869 list_del_rcu(&vb->free_list);
870 spin_unlock(&vbq->lock);
871 spin_unlock(&vb->lock);
872 list_add_tail(&vb->purge, &purge);
873 } else
874 spin_unlock(&vb->lock);
875 }
876 rcu_read_unlock();
877
878 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
879 list_del(&vb->purge);
880 free_vmap_block(vb);
881 }
882}
883
884static void purge_fragmented_blocks_thiscpu(void)
885{
886 purge_fragmented_blocks(smp_processor_id());
887}
888
889static void purge_fragmented_blocks_allcpus(void)
890{
891 int cpu;
892
893 for_each_possible_cpu(cpu)
894 purge_fragmented_blocks(cpu);
895}
896
897static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
898{
899 struct vmap_block_queue *vbq;
900 struct vmap_block *vb;
901 unsigned long addr = 0;
902 unsigned int order;
903 int purge = 0;
904
905 BUG_ON(size & ~PAGE_MASK);
906 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
907 order = get_order(size);
908
909again:
910 rcu_read_lock();
911 vbq = &get_cpu_var(vmap_block_queue);
912 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
913 int i;
914
915 spin_lock(&vb->lock);
916 if (vb->free < 1UL << order)
917 goto next;
918
919 i = bitmap_find_free_region(vb->alloc_map,
920 VMAP_BBMAP_BITS, order);
921
922 if (i < 0) {
923 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
924
925 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
926 purge = 1;
927 }
928 goto next;
929 }
930 addr = vb->va->va_start + (i << PAGE_SHIFT);
931 BUG_ON(addr_to_vb_idx(addr) !=
932 addr_to_vb_idx(vb->va->va_start));
933 vb->free -= 1UL << order;
934 if (vb->free == 0) {
935 spin_lock(&vbq->lock);
936 list_del_rcu(&vb->free_list);
937 spin_unlock(&vbq->lock);
938 }
939 spin_unlock(&vb->lock);
940 break;
941next:
942 spin_unlock(&vb->lock);
943 }
944
945 if (purge)
946 purge_fragmented_blocks_thiscpu();
947
948 put_cpu_var(vmap_block_queue);
949 rcu_read_unlock();
950
951 if (!addr) {
952 vb = new_vmap_block(gfp_mask);
953 if (IS_ERR(vb))
954 return vb;
955 goto again;
956 }
957
958 return (void *)addr;
959}
960
961static void vb_free(const void *addr, unsigned long size)
962{
963 unsigned long offset;
964 unsigned long vb_idx;
965 unsigned int order;
966 struct vmap_block *vb;
967
968 BUG_ON(size & ~PAGE_MASK);
969 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
970
971 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
972
973 order = get_order(size);
974
975 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
976
977 vb_idx = addr_to_vb_idx((unsigned long)addr);
978 rcu_read_lock();
979 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
980 rcu_read_unlock();
981 BUG_ON(!vb);
982
983 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
984
985 spin_lock(&vb->lock);
986 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
987
988 vb->dirty += 1UL << order;
989 if (vb->dirty == VMAP_BBMAP_BITS) {
990 BUG_ON(vb->free);
991 spin_unlock(&vb->lock);
992 free_vmap_block(vb);
993 } else
994 spin_unlock(&vb->lock);
995}
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010void vm_unmap_aliases(void)
1011{
1012 unsigned long start = ULONG_MAX, end = 0;
1013 int cpu;
1014 int flush = 0;
1015
1016 if (unlikely(!vmap_initialized))
1017 return;
1018
1019 for_each_possible_cpu(cpu) {
1020 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1021 struct vmap_block *vb;
1022
1023 rcu_read_lock();
1024 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1025 int i;
1026
1027 spin_lock(&vb->lock);
1028 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1029 while (i < VMAP_BBMAP_BITS) {
1030 unsigned long s, e;
1031 int j;
1032 j = find_next_zero_bit(vb->dirty_map,
1033 VMAP_BBMAP_BITS, i);
1034
1035 s = vb->va->va_start + (i << PAGE_SHIFT);
1036 e = vb->va->va_start + (j << PAGE_SHIFT);
1037 flush = 1;
1038
1039 if (s < start)
1040 start = s;
1041 if (e > end)
1042 end = e;
1043
1044 i = j;
1045 i = find_next_bit(vb->dirty_map,
1046 VMAP_BBMAP_BITS, i);
1047 }
1048 spin_unlock(&vb->lock);
1049 }
1050 rcu_read_unlock();
1051 }
1052
1053 __purge_vmap_area_lazy(&start, &end, 1, flush);
1054}
1055EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1056
1057
1058
1059
1060
1061
1062void vm_unmap_ram(const void *mem, unsigned int count)
1063{
1064 unsigned long size = count << PAGE_SHIFT;
1065 unsigned long addr = (unsigned long)mem;
1066
1067 BUG_ON(!addr);
1068 BUG_ON(addr < VMALLOC_START);
1069 BUG_ON(addr > VMALLOC_END);
1070 BUG_ON(addr & (PAGE_SIZE-1));
1071
1072 debug_check_no_locks_freed(mem, size);
1073 vmap_debug_free_range(addr, addr+size);
1074
1075 if (likely(count <= VMAP_MAX_ALLOC))
1076 vb_free(mem, size);
1077 else
1078 free_unmap_vmap_area_addr(addr);
1079}
1080EXPORT_SYMBOL(vm_unmap_ram);
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1092{
1093 unsigned long size = count << PAGE_SHIFT;
1094 unsigned long addr;
1095 void *mem;
1096
1097 if (likely(count <= VMAP_MAX_ALLOC)) {
1098 mem = vb_alloc(size, GFP_KERNEL);
1099 if (IS_ERR(mem))
1100 return NULL;
1101 addr = (unsigned long)mem;
1102 } else {
1103 struct vmap_area *va;
1104 va = alloc_vmap_area(size, PAGE_SIZE,
1105 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1106 if (IS_ERR(va))
1107 return NULL;
1108
1109 addr = va->va_start;
1110 mem = (void *)addr;
1111 }
1112 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1113 vm_unmap_ram(mem, count);
1114 return NULL;
1115 }
1116 return mem;
1117}
1118EXPORT_SYMBOL(vm_map_ram);
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130void __init vm_area_add_early(struct vm_struct *vm)
1131{
1132 struct vm_struct *tmp, **p;
1133
1134 BUG_ON(vmap_initialized);
1135 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1136 if (tmp->addr >= vm->addr) {
1137 BUG_ON(tmp->addr < vm->addr + vm->size);
1138 break;
1139 } else
1140 BUG_ON(tmp->addr + tmp->size > vm->addr);
1141 }
1142 vm->next = *p;
1143 *p = vm;
1144}
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1159{
1160 static size_t vm_init_off __initdata;
1161 unsigned long addr;
1162
1163 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1164 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1165
1166 vm->addr = (void *)addr;
1167
1168 vm_area_add_early(vm);
1169}
1170
1171void __init vmalloc_init(void)
1172{
1173 struct vmap_area *va;
1174 struct vm_struct *tmp;
1175 int i;
1176
1177 for_each_possible_cpu(i) {
1178 struct vmap_block_queue *vbq;
1179
1180 vbq = &per_cpu(vmap_block_queue, i);
1181 spin_lock_init(&vbq->lock);
1182 INIT_LIST_HEAD(&vbq->free);
1183 }
1184
1185
1186 for (tmp = vmlist; tmp; tmp = tmp->next) {
1187 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1188 va->flags = VM_VM_AREA;
1189 va->va_start = (unsigned long)tmp->addr;
1190 va->va_end = va->va_start + tmp->size;
1191 va->vm = tmp;
1192 __insert_vmap_area(va);
1193 }
1194
1195 vmap_area_pcpu_hole = VMALLOC_END;
1196
1197 vmap_initialized = true;
1198}
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1220 pgprot_t prot, struct page **pages)
1221{
1222 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1223}
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1240{
1241 vunmap_page_range(addr, addr + size);
1242}
1243EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253void unmap_kernel_range(unsigned long addr, unsigned long size)
1254{
1255 unsigned long end = addr + size;
1256
1257 flush_cache_vunmap(addr, end);
1258 vunmap_page_range(addr, end);
1259 flush_tlb_kernel_range(addr, end);
1260}
1261
1262int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1263{
1264 unsigned long addr = (unsigned long)area->addr;
1265 unsigned long end = addr + area->size - PAGE_SIZE;
1266 int err;
1267
1268 err = vmap_page_range(addr, end, prot, *pages);
1269 if (err > 0) {
1270 *pages += err;
1271 err = 0;
1272 }
1273
1274 return err;
1275}
1276EXPORT_SYMBOL_GPL(map_vm_area);
1277
1278
1279DEFINE_RWLOCK(vmlist_lock);
1280struct vm_struct *vmlist;
1281
1282static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1283 unsigned long flags, void *caller)
1284{
1285 vm->flags = flags;
1286 vm->addr = (void *)va->va_start;
1287 vm->size = va->va_end - va->va_start;
1288 vm->caller = caller;
1289 va->vm = vm;
1290 va->flags |= VM_VM_AREA;
1291}
1292
1293static void insert_vmalloc_vmlist(struct vm_struct *vm)
1294{
1295 struct vm_struct *tmp, **p;
1296
1297 vm->flags &= ~VM_UNLIST;
1298 write_lock(&vmlist_lock);
1299 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1300 if (tmp->addr >= vm->addr)
1301 break;
1302 }
1303 vm->next = *p;
1304 *p = vm;
1305 write_unlock(&vmlist_lock);
1306}
1307
1308static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1309 unsigned long flags, void *caller)
1310{
1311 setup_vmalloc_vm(vm, va, flags, caller);
1312 insert_vmalloc_vmlist(vm);
1313}
1314
1315static struct vm_struct *__get_vm_area_node(unsigned long size,
1316 unsigned long align, unsigned long flags, unsigned long start,
1317 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1318{
1319 struct vmap_area *va;
1320 struct vm_struct *area;
1321
1322 BUG_ON(in_interrupt());
1323 if (flags & VM_IOREMAP) {
1324 int bit = fls(size);
1325
1326 if (bit > IOREMAP_MAX_ORDER)
1327 bit = IOREMAP_MAX_ORDER;
1328 else if (bit < PAGE_SHIFT)
1329 bit = PAGE_SHIFT;
1330
1331 align = 1ul << bit;
1332 }
1333
1334 size = PAGE_ALIGN(size);
1335 if (unlikely(!size))
1336 return NULL;
1337
1338 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1339 if (unlikely(!area))
1340 return NULL;
1341
1342
1343
1344
1345 size += PAGE_SIZE;
1346
1347 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1348 if (IS_ERR(va)) {
1349 kfree(area);
1350 return NULL;
1351 }
1352
1353
1354
1355
1356
1357
1358
1359
1360 if (flags & VM_UNLIST)
1361 setup_vmalloc_vm(area, va, flags, caller);
1362 else
1363 insert_vmalloc_vm(area, va, flags, caller);
1364
1365 return area;
1366}
1367
1368struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1369 unsigned long start, unsigned long end)
1370{
1371 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1372 __builtin_return_address(0));
1373}
1374EXPORT_SYMBOL_GPL(__get_vm_area);
1375
1376struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1377 unsigned long start, unsigned long end,
1378 void *caller)
1379{
1380 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1381 caller);
1382}
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1394{
1395 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1396 -1, GFP_KERNEL, __builtin_return_address(0));
1397}
1398
1399struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1400 void *caller)
1401{
1402 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1403 -1, GFP_KERNEL, caller);
1404}
1405
1406static struct vm_struct *find_vm_area(const void *addr)
1407{
1408 struct vmap_area *va;
1409
1410 va = find_vmap_area((unsigned long)addr);
1411 if (va && va->flags & VM_VM_AREA)
1412 return va->vm;
1413
1414 return NULL;
1415}
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425struct vm_struct *remove_vm_area(const void *addr)
1426{
1427 struct vmap_area *va;
1428
1429 va = find_vmap_area((unsigned long)addr);
1430 if (va && va->flags & VM_VM_AREA) {
1431 struct vm_struct *vm = va->vm;
1432
1433 if (!(vm->flags & VM_UNLIST)) {
1434 struct vm_struct *tmp, **p;
1435
1436
1437
1438
1439
1440 write_lock(&vmlist_lock);
1441 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1442 ;
1443 *p = tmp->next;
1444 write_unlock(&vmlist_lock);
1445 }
1446
1447 vmap_debug_free_range(va->va_start, va->va_end);
1448 free_unmap_vmap_area(va);
1449 vm->size -= PAGE_SIZE;
1450
1451 return vm;
1452 }
1453 return NULL;
1454}
1455
1456static void __vunmap(const void *addr, int deallocate_pages)
1457{
1458 struct vm_struct *area;
1459
1460 if (!addr)
1461 return;
1462
1463 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1464 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1465 return;
1466 }
1467
1468 area = remove_vm_area(addr);
1469 if (unlikely(!area)) {
1470 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1471 addr);
1472 return;
1473 }
1474
1475 debug_check_no_locks_freed(addr, area->size);
1476 debug_check_no_obj_freed(addr, area->size);
1477
1478 if (deallocate_pages) {
1479 int i;
1480
1481 for (i = 0; i < area->nr_pages; i++) {
1482 struct page *page = area->pages[i];
1483
1484 BUG_ON(!page);
1485 __free_page(page);
1486 }
1487
1488 if (area->flags & VM_VPAGES)
1489 vfree(area->pages);
1490 else
1491 kfree(area->pages);
1492 }
1493
1494 kfree(area);
1495 return;
1496}
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508void vfree(const void *addr)
1509{
1510 BUG_ON(in_interrupt());
1511
1512 kmemleak_free(addr);
1513
1514 __vunmap(addr, 1);
1515}
1516EXPORT_SYMBOL(vfree);
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527void vunmap(const void *addr)
1528{
1529 BUG_ON(in_interrupt());
1530 might_sleep();
1531 __vunmap(addr, 0);
1532}
1533EXPORT_SYMBOL(vunmap);
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545void *vmap(struct page **pages, unsigned int count,
1546 unsigned long flags, pgprot_t prot)
1547{
1548 struct vm_struct *area;
1549
1550 might_sleep();
1551
1552 if (count > totalram_pages)
1553 return NULL;
1554
1555 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1556 __builtin_return_address(0));
1557 if (!area)
1558 return NULL;
1559
1560 if (map_vm_area(area, prot, &pages)) {
1561 vunmap(area->addr);
1562 return NULL;
1563 }
1564
1565 return area->addr;
1566}
1567EXPORT_SYMBOL(vmap);
1568
1569static void *__vmalloc_node(unsigned long size, unsigned long align,
1570 gfp_t gfp_mask, pgprot_t prot,
1571 int node, void *caller);
1572static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1573 pgprot_t prot, int node, void *caller)
1574{
1575 const int order = 0;
1576 struct page **pages;
1577 unsigned int nr_pages, array_size, i;
1578 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1579
1580 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1581 array_size = (nr_pages * sizeof(struct page *));
1582
1583 area->nr_pages = nr_pages;
1584
1585 if (array_size > PAGE_SIZE) {
1586 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1587 PAGE_KERNEL, node, caller);
1588 area->flags |= VM_VPAGES;
1589 } else {
1590 pages = kmalloc_node(array_size, nested_gfp, node);
1591 }
1592 area->pages = pages;
1593 area->caller = caller;
1594 if (!area->pages) {
1595 remove_vm_area(area->addr);
1596 kfree(area);
1597 return NULL;
1598 }
1599
1600 for (i = 0; i < area->nr_pages; i++) {
1601 struct page *page;
1602 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1603
1604 if (node < 0)
1605 page = alloc_page(tmp_mask);
1606 else
1607 page = alloc_pages_node(node, tmp_mask, order);
1608
1609 if (unlikely(!page)) {
1610
1611 area->nr_pages = i;
1612 goto fail;
1613 }
1614 area->pages[i] = page;
1615 }
1616
1617 if (map_vm_area(area, prot, &pages))
1618 goto fail;
1619 return area->addr;
1620
1621fail:
1622 warn_alloc_failed(gfp_mask, order,
1623 "vmalloc: allocation failure, allocated %ld of %ld bytes\n",
1624 (area->nr_pages*PAGE_SIZE), area->size);
1625 vfree(area->addr);
1626 return NULL;
1627}
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644void *__vmalloc_node_range(unsigned long size, unsigned long align,
1645 unsigned long start, unsigned long end, gfp_t gfp_mask,
1646 pgprot_t prot, int node, void *caller)
1647{
1648 struct vm_struct *area;
1649 void *addr;
1650 unsigned long real_size = size;
1651
1652 size = PAGE_ALIGN(size);
1653 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1654 goto fail;
1655
1656 area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
1657 start, end, node, gfp_mask, caller);
1658 if (!area)
1659 goto fail;
1660
1661 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1662 if (!addr)
1663 return NULL;
1664
1665
1666
1667
1668
1669 insert_vmalloc_vmlist(area);
1670
1671
1672
1673
1674
1675
1676 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1677
1678 return addr;
1679
1680fail:
1681 warn_alloc_failed(gfp_mask, 0,
1682 "vmalloc: allocation failure: %lu bytes\n",
1683 real_size);
1684 return NULL;
1685}
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700static void *__vmalloc_node(unsigned long size, unsigned long align,
1701 gfp_t gfp_mask, pgprot_t prot,
1702 int node, void *caller)
1703{
1704 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1705 gfp_mask, prot, node, caller);
1706}
1707
1708void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1709{
1710 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1711 __builtin_return_address(0));
1712}
1713EXPORT_SYMBOL(__vmalloc);
1714
1715static inline void *__vmalloc_node_flags(unsigned long size,
1716 int node, gfp_t flags)
1717{
1718 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1719 node, __builtin_return_address(0));
1720}
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731void *vmalloc(unsigned long size)
1732{
1733 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1734}
1735EXPORT_SYMBOL(vmalloc);
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747void *vzalloc(unsigned long size)
1748{
1749 return __vmalloc_node_flags(size, -1,
1750 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1751}
1752EXPORT_SYMBOL(vzalloc);
1753
1754
1755
1756
1757
1758
1759
1760
1761void *vmalloc_user(unsigned long size)
1762{
1763 struct vm_struct *area;
1764 void *ret;
1765
1766 ret = __vmalloc_node(size, SHMLBA,
1767 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1768 PAGE_KERNEL, -1, __builtin_return_address(0));
1769 if (ret) {
1770 area = find_vm_area(ret);
1771 area->flags |= VM_USERMAP;
1772 }
1773 return ret;
1774}
1775EXPORT_SYMBOL(vmalloc_user);
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788void *vmalloc_node(unsigned long size, int node)
1789{
1790 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1791 node, __builtin_return_address(0));
1792}
1793EXPORT_SYMBOL(vmalloc_node);
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807void *vzalloc_node(unsigned long size, int node)
1808{
1809 return __vmalloc_node_flags(size, node,
1810 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1811}
1812EXPORT_SYMBOL(vzalloc_node);
1813
1814#ifndef PAGE_KERNEL_EXEC
1815# define PAGE_KERNEL_EXEC PAGE_KERNEL
1816#endif
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830void *vmalloc_exec(unsigned long size)
1831{
1832 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1833 -1, __builtin_return_address(0));
1834}
1835
1836#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1837#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1838#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1839#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1840#else
1841#define GFP_VMALLOC32 GFP_KERNEL
1842#endif
1843
1844
1845
1846
1847
1848
1849
1850
1851void *vmalloc_32(unsigned long size)
1852{
1853 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1854 -1, __builtin_return_address(0));
1855}
1856EXPORT_SYMBOL(vmalloc_32);
1857
1858
1859
1860
1861
1862
1863
1864
1865void *vmalloc_32_user(unsigned long size)
1866{
1867 struct vm_struct *area;
1868 void *ret;
1869
1870 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1871 -1, __builtin_return_address(0));
1872 if (ret) {
1873 area = find_vm_area(ret);
1874 area->flags |= VM_USERMAP;
1875 }
1876 return ret;
1877}
1878EXPORT_SYMBOL(vmalloc_32_user);
1879
1880
1881
1882
1883
1884
1885static int aligned_vread(char *buf, char *addr, unsigned long count)
1886{
1887 struct page *p;
1888 int copied = 0;
1889
1890 while (count) {
1891 unsigned long offset, length;
1892
1893 offset = (unsigned long)addr & ~PAGE_MASK;
1894 length = PAGE_SIZE - offset;
1895 if (length > count)
1896 length = count;
1897 p = vmalloc_to_page(addr);
1898
1899
1900
1901
1902
1903
1904
1905 if (p) {
1906
1907
1908
1909
1910 void *map = kmap_atomic(p);
1911 memcpy(buf, map + offset, length);
1912 kunmap_atomic(map);
1913 } else
1914 memset(buf, 0, length);
1915
1916 addr += length;
1917 buf += length;
1918 copied += length;
1919 count -= length;
1920 }
1921 return copied;
1922}
1923
1924static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1925{
1926 struct page *p;
1927 int copied = 0;
1928
1929 while (count) {
1930 unsigned long offset, length;
1931
1932 offset = (unsigned long)addr & ~PAGE_MASK;
1933 length = PAGE_SIZE - offset;
1934 if (length > count)
1935 length = count;
1936 p = vmalloc_to_page(addr);
1937
1938
1939
1940
1941
1942
1943
1944 if (p) {
1945
1946
1947
1948
1949 void *map = kmap_atomic(p);
1950 memcpy(map + offset, buf, length);
1951 kunmap_atomic(map);
1952 }
1953 addr += length;
1954 buf += length;
1955 copied += length;
1956 count -= length;
1957 }
1958 return copied;
1959}
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989long vread(char *buf, char *addr, unsigned long count)
1990{
1991 struct vm_struct *tmp;
1992 char *vaddr, *buf_start = buf;
1993 unsigned long buflen = count;
1994 unsigned long n;
1995
1996
1997 if ((unsigned long) addr + count < count)
1998 count = -(unsigned long) addr;
1999
2000 read_lock(&vmlist_lock);
2001 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2002 vaddr = (char *) tmp->addr;
2003 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2004 continue;
2005 while (addr < vaddr) {
2006 if (count == 0)
2007 goto finished;
2008 *buf = '\0';
2009 buf++;
2010 addr++;
2011 count--;
2012 }
2013 n = vaddr + tmp->size - PAGE_SIZE - addr;
2014 if (n > count)
2015 n = count;
2016 if (!(tmp->flags & VM_IOREMAP))
2017 aligned_vread(buf, addr, n);
2018 else
2019 memset(buf, 0, n);
2020 buf += n;
2021 addr += n;
2022 count -= n;
2023 }
2024finished:
2025 read_unlock(&vmlist_lock);
2026
2027 if (buf == buf_start)
2028 return 0;
2029
2030 if (buf != buf_start + buflen)
2031 memset(buf, 0, buflen - (buf - buf_start));
2032
2033 return buflen;
2034}
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064long vwrite(char *buf, char *addr, unsigned long count)
2065{
2066 struct vm_struct *tmp;
2067 char *vaddr;
2068 unsigned long n, buflen;
2069 int copied = 0;
2070
2071
2072 if ((unsigned long) addr + count < count)
2073 count = -(unsigned long) addr;
2074 buflen = count;
2075
2076 read_lock(&vmlist_lock);
2077 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2078 vaddr = (char *) tmp->addr;
2079 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2080 continue;
2081 while (addr < vaddr) {
2082 if (count == 0)
2083 goto finished;
2084 buf++;
2085 addr++;
2086 count--;
2087 }
2088 n = vaddr + tmp->size - PAGE_SIZE - addr;
2089 if (n > count)
2090 n = count;
2091 if (!(tmp->flags & VM_IOREMAP)) {
2092 aligned_vwrite(buf, addr, n);
2093 copied++;
2094 }
2095 buf += n;
2096 addr += n;
2097 count -= n;
2098 }
2099finished:
2100 read_unlock(&vmlist_lock);
2101 if (!copied)
2102 return 0;
2103 return buflen;
2104}
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2121 unsigned long pgoff)
2122{
2123 struct vm_struct *area;
2124 unsigned long uaddr = vma->vm_start;
2125 unsigned long usize = vma->vm_end - vma->vm_start;
2126
2127 if ((PAGE_SIZE-1) & (unsigned long)addr)
2128 return -EINVAL;
2129
2130 area = find_vm_area(addr);
2131 if (!area)
2132 return -EINVAL;
2133
2134 if (!(area->flags & VM_USERMAP))
2135 return -EINVAL;
2136
2137 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2138 return -EINVAL;
2139
2140 addr += pgoff << PAGE_SHIFT;
2141 do {
2142 struct page *page = vmalloc_to_page(addr);
2143 int ret;
2144
2145 ret = vm_insert_page(vma, uaddr, page);
2146 if (ret)
2147 return ret;
2148
2149 uaddr += PAGE_SIZE;
2150 addr += PAGE_SIZE;
2151 usize -= PAGE_SIZE;
2152 } while (usize > 0);
2153
2154
2155 vma->vm_flags |= VM_RESERVED;
2156
2157 return 0;
2158}
2159EXPORT_SYMBOL(remap_vmalloc_range);
2160
2161
2162
2163
2164
2165void __attribute__((weak)) vmalloc_sync_all(void)
2166{
2167}
2168
2169
2170static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2171{
2172 pte_t ***p = data;
2173
2174 if (p) {
2175 *(*p) = pte;
2176 (*p)++;
2177 }
2178 return 0;
2179}
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2196{
2197 struct vm_struct *area;
2198
2199 area = get_vm_area_caller(size, VM_IOREMAP,
2200 __builtin_return_address(0));
2201 if (area == NULL)
2202 return NULL;
2203
2204
2205
2206
2207
2208 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2209 size, f, ptes ? &ptes : NULL)) {
2210 free_vm_area(area);
2211 return NULL;
2212 }
2213
2214 return area;
2215}
2216EXPORT_SYMBOL_GPL(alloc_vm_area);
2217
2218void free_vm_area(struct vm_struct *area)
2219{
2220 struct vm_struct *ret;
2221 ret = remove_vm_area(area->addr);
2222 BUG_ON(ret != area);
2223 kfree(area);
2224}
2225EXPORT_SYMBOL_GPL(free_vm_area);
2226
2227#ifdef CONFIG_SMP
2228static struct vmap_area *node_to_va(struct rb_node *n)
2229{
2230 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2231}
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245static bool pvm_find_next_prev(unsigned long end,
2246 struct vmap_area **pnext,
2247 struct vmap_area **pprev)
2248{
2249 struct rb_node *n = vmap_area_root.rb_node;
2250 struct vmap_area *va = NULL;
2251
2252 while (n) {
2253 va = rb_entry(n, struct vmap_area, rb_node);
2254 if (end < va->va_end)
2255 n = n->rb_left;
2256 else if (end > va->va_end)
2257 n = n->rb_right;
2258 else
2259 break;
2260 }
2261
2262 if (!va)
2263 return false;
2264
2265 if (va->va_end > end) {
2266 *pnext = va;
2267 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2268 } else {
2269 *pprev = va;
2270 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2271 }
2272 return true;
2273}
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291static unsigned long pvm_determine_end(struct vmap_area **pnext,
2292 struct vmap_area **pprev,
2293 unsigned long align)
2294{
2295 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2296 unsigned long addr;
2297
2298 if (*pnext)
2299 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2300 else
2301 addr = vmalloc_end;
2302
2303 while (*pprev && (*pprev)->va_end > addr) {
2304 *pnext = *pprev;
2305 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2306 }
2307
2308 return addr;
2309}
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2336 const size_t *sizes, int nr_vms,
2337 size_t align)
2338{
2339 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2340 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2341 struct vmap_area **vas, *prev, *next;
2342 struct vm_struct **vms;
2343 int area, area2, last_area, term_area;
2344 unsigned long base, start, end, last_end;
2345 bool purged = false;
2346
2347
2348 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2349 for (last_area = 0, area = 0; area < nr_vms; area++) {
2350 start = offsets[area];
2351 end = start + sizes[area];
2352
2353
2354 BUG_ON(!IS_ALIGNED(offsets[area], align));
2355 BUG_ON(!IS_ALIGNED(sizes[area], align));
2356
2357
2358 if (start > offsets[last_area])
2359 last_area = area;
2360
2361 for (area2 = 0; area2 < nr_vms; area2++) {
2362 unsigned long start2 = offsets[area2];
2363 unsigned long end2 = start2 + sizes[area2];
2364
2365 if (area2 == area)
2366 continue;
2367
2368 BUG_ON(start2 >= start && start2 < end);
2369 BUG_ON(end2 <= end && end2 > start);
2370 }
2371 }
2372 last_end = offsets[last_area] + sizes[last_area];
2373
2374 if (vmalloc_end - vmalloc_start < last_end) {
2375 WARN_ON(true);
2376 return NULL;
2377 }
2378
2379 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
2380 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
2381 if (!vas || !vms)
2382 goto err_free2;
2383
2384 for (area = 0; area < nr_vms; area++) {
2385 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2386 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2387 if (!vas[area] || !vms[area])
2388 goto err_free;
2389 }
2390retry:
2391 spin_lock(&vmap_area_lock);
2392
2393
2394 area = term_area = last_area;
2395 start = offsets[area];
2396 end = start + sizes[area];
2397
2398 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2399 base = vmalloc_end - last_end;
2400 goto found;
2401 }
2402 base = pvm_determine_end(&next, &prev, align) - end;
2403
2404 while (true) {
2405 BUG_ON(next && next->va_end <= base + end);
2406 BUG_ON(prev && prev->va_end > base + end);
2407
2408
2409
2410
2411
2412 if (base + last_end < vmalloc_start + last_end) {
2413 spin_unlock(&vmap_area_lock);
2414 if (!purged) {
2415 purge_vmap_area_lazy();
2416 purged = true;
2417 goto retry;
2418 }
2419 goto err_free;
2420 }
2421
2422
2423
2424
2425
2426 if (next && next->va_start < base + end) {
2427 base = pvm_determine_end(&next, &prev, align) - end;
2428 term_area = area;
2429 continue;
2430 }
2431
2432
2433
2434
2435
2436
2437 if (prev && prev->va_end > base + start) {
2438 next = prev;
2439 prev = node_to_va(rb_prev(&next->rb_node));
2440 base = pvm_determine_end(&next, &prev, align) - end;
2441 term_area = area;
2442 continue;
2443 }
2444
2445
2446
2447
2448
2449 area = (area + nr_vms - 1) % nr_vms;
2450 if (area == term_area)
2451 break;
2452 start = offsets[area];
2453 end = start + sizes[area];
2454 pvm_find_next_prev(base + end, &next, &prev);
2455 }
2456found:
2457
2458 for (area = 0; area < nr_vms; area++) {
2459 struct vmap_area *va = vas[area];
2460
2461 va->va_start = base + offsets[area];
2462 va->va_end = va->va_start + sizes[area];
2463 __insert_vmap_area(va);
2464 }
2465
2466 vmap_area_pcpu_hole = base + offsets[last_area];
2467
2468 spin_unlock(&vmap_area_lock);
2469
2470
2471 for (area = 0; area < nr_vms; area++)
2472 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2473 pcpu_get_vm_areas);
2474
2475 kfree(vas);
2476 return vms;
2477
2478err_free:
2479 for (area = 0; area < nr_vms; area++) {
2480 kfree(vas[area]);
2481 kfree(vms[area]);
2482 }
2483err_free2:
2484 kfree(vas);
2485 kfree(vms);
2486 return NULL;
2487}
2488
2489
2490
2491
2492
2493
2494
2495
2496void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2497{
2498 int i;
2499
2500 for (i = 0; i < nr_vms; i++)
2501 free_vm_area(vms[i]);
2502 kfree(vms);
2503}
2504#endif
2505
2506#ifdef CONFIG_PROC_FS
2507static void *s_start(struct seq_file *m, loff_t *pos)
2508 __acquires(&vmlist_lock)
2509{
2510 loff_t n = *pos;
2511 struct vm_struct *v;
2512
2513 read_lock(&vmlist_lock);
2514 v = vmlist;
2515 while (n > 0 && v) {
2516 n--;
2517 v = v->next;
2518 }
2519 if (!n)
2520 return v;
2521
2522 return NULL;
2523
2524}
2525
2526static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2527{
2528 struct vm_struct *v = p;
2529
2530 ++*pos;
2531 return v->next;
2532}
2533
2534static void s_stop(struct seq_file *m, void *p)
2535 __releases(&vmlist_lock)
2536{
2537 read_unlock(&vmlist_lock);
2538}
2539
2540static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2541{
2542 if (NUMA_BUILD) {
2543 unsigned int nr, *counters = m->private;
2544
2545 if (!counters)
2546 return;
2547
2548 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2549
2550 for (nr = 0; nr < v->nr_pages; nr++)
2551 counters[page_to_nid(v->pages[nr])]++;
2552
2553 for_each_node_state(nr, N_HIGH_MEMORY)
2554 if (counters[nr])
2555 seq_printf(m, " N%u=%u", nr, counters[nr]);
2556 }
2557}
2558
2559static int s_show(struct seq_file *m, void *p)
2560{
2561 struct vm_struct *v = p;
2562
2563 seq_printf(m, "0x%p-0x%p %7ld",
2564 v->addr, v->addr + v->size, v->size);
2565
2566 if (v->caller)
2567 seq_printf(m, " %pS", v->caller);
2568
2569 if (v->nr_pages)
2570 seq_printf(m, " pages=%d", v->nr_pages);
2571
2572 if (v->phys_addr)
2573 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2574
2575 if (v->flags & VM_IOREMAP)
2576 seq_printf(m, " ioremap");
2577
2578 if (v->flags & VM_ALLOC)
2579 seq_printf(m, " vmalloc");
2580
2581 if (v->flags & VM_MAP)
2582 seq_printf(m, " vmap");
2583
2584 if (v->flags & VM_USERMAP)
2585 seq_printf(m, " user");
2586
2587 if (v->flags & VM_VPAGES)
2588 seq_printf(m, " vpages");
2589
2590 show_numa_info(m, v);
2591 seq_putc(m, '\n');
2592 return 0;
2593}
2594
2595static const struct seq_operations vmalloc_op = {
2596 .start = s_start,
2597 .next = s_next,
2598 .stop = s_stop,
2599 .show = s_show,
2600};
2601
2602static int vmalloc_open(struct inode *inode, struct file *file)
2603{
2604 unsigned int *ptr = NULL;
2605 int ret;
2606
2607 if (NUMA_BUILD) {
2608 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2609 if (ptr == NULL)
2610 return -ENOMEM;
2611 }
2612 ret = seq_open(file, &vmalloc_op);
2613 if (!ret) {
2614 struct seq_file *m = file->private_data;
2615 m->private = ptr;
2616 } else
2617 kfree(ptr);
2618 return ret;
2619}
2620
2621static const struct file_operations proc_vmalloc_operations = {
2622 .open = vmalloc_open,
2623 .read = seq_read,
2624 .llseek = seq_lseek,
2625 .release = seq_release_private,
2626};
2627
2628static int __init proc_vmalloc_init(void)
2629{
2630 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2631 return 0;
2632}
2633module_init(proc_vmalloc_init);
2634#endif
2635
2636