1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <linux/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 struct vm_struct *vm;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static LIST_HEAD(vmap_area_list);
265static struct rb_root vmap_area_root = RB_ROOT;
266
267
268static struct rb_node *free_vmap_cache;
269static unsigned long cached_hole_size;
270static unsigned long cached_vstart;
271static unsigned long cached_align;
272
273static unsigned long vmap_area_pcpu_hole;
274
275static struct vmap_area *__find_vmap_area(unsigned long addr)
276{
277 struct rb_node *n = vmap_area_root.rb_node;
278
279 while (n) {
280 struct vmap_area *va;
281
282 va = rb_entry(n, struct vmap_area, rb_node);
283 if (addr < va->va_start)
284 n = n->rb_left;
285 else if (addr > va->va_start)
286 n = n->rb_right;
287 else
288 return va;
289 }
290
291 return NULL;
292}
293
294static void __insert_vmap_area(struct vmap_area *va)
295{
296 struct rb_node **p = &vmap_area_root.rb_node;
297 struct rb_node *parent = NULL;
298 struct rb_node *tmp;
299
300 while (*p) {
301 struct vmap_area *tmp_va;
302
303 parent = *p;
304 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
305 if (va->va_start < tmp_va->va_end)
306 p = &(*p)->rb_left;
307 else if (va->va_end > tmp_va->va_start)
308 p = &(*p)->rb_right;
309 else
310 BUG();
311 }
312
313 rb_link_node(&va->rb_node, parent, p);
314 rb_insert_color(&va->rb_node, &vmap_area_root);
315
316
317 tmp = rb_prev(&va->rb_node);
318 if (tmp) {
319 struct vmap_area *prev;
320 prev = rb_entry(tmp, struct vmap_area, rb_node);
321 list_add_rcu(&va->list, &prev->list);
322 } else
323 list_add_rcu(&va->list, &vmap_area_list);
324}
325
326static void purge_vmap_area_lazy(void);
327
328
329
330
331
332static struct vmap_area *alloc_vmap_area(unsigned long size,
333 unsigned long align,
334 unsigned long vstart, unsigned long vend,
335 int node, gfp_t gfp_mask)
336{
337 struct vmap_area *va;
338 struct rb_node *n;
339 unsigned long addr;
340 int purged = 0;
341 struct vmap_area *first;
342
343 BUG_ON(!size);
344 BUG_ON(size & ~PAGE_MASK);
345 BUG_ON(!is_power_of_2(align));
346
347 va = kmalloc_node(sizeof(struct vmap_area),
348 gfp_mask & GFP_RECLAIM_MASK, node);
349 if (unlikely(!va))
350 return ERR_PTR(-ENOMEM);
351
352retry:
353 spin_lock(&vmap_area_lock);
354
355
356
357
358
359
360
361
362
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
367nocache:
368 cached_hole_size = 0;
369 free_vmap_cache = NULL;
370 }
371
372 cached_vstart = vstart;
373 cached_align = align;
374
375
376 if (free_vmap_cache) {
377 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
378 addr = ALIGN(first->va_end, align);
379 if (addr < vstart)
380 goto nocache;
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 } else {
385 addr = ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
387 goto overflow;
388
389 n = vmap_area_root.rb_node;
390 first = NULL;
391
392 while (n) {
393 struct vmap_area *tmp;
394 tmp = rb_entry(n, struct vmap_area, rb_node);
395 if (tmp->va_end >= addr) {
396 first = tmp;
397 if (tmp->va_start <= addr)
398 break;
399 n = n->rb_left;
400 } else
401 n = n->rb_right;
402 }
403
404 if (!first)
405 goto found;
406 }
407
408
409 while (addr + size > first->va_start && addr + size <= vend) {
410 if (addr + cached_hole_size < first->va_start)
411 cached_hole_size = first->va_start - addr;
412 addr = ALIGN(first->va_end, align);
413 if (addr + size - 1 < addr)
414 goto overflow;
415
416 n = rb_next(&first->rb_node);
417 if (n)
418 first = rb_entry(n, struct vmap_area, rb_node);
419 else
420 goto found;
421 }
422
423found:
424 if (addr + size > vend)
425 goto overflow;
426
427 va->va_start = addr;
428 va->va_end = addr + size;
429 va->flags = 0;
430 __insert_vmap_area(va);
431 free_vmap_cache = &va->rb_node;
432 spin_unlock(&vmap_area_lock);
433
434 BUG_ON(va->va_start & (align-1));
435 BUG_ON(va->va_start < vstart);
436 BUG_ON(va->va_end > vend);
437
438 return va;
439
440overflow:
441 spin_unlock(&vmap_area_lock);
442 if (!purged) {
443 purge_vmap_area_lazy();
444 purged = 1;
445 goto retry;
446 }
447 if (printk_ratelimit())
448 printk(KERN_WARNING
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
451 kfree(va);
452 return ERR_PTR(-EBUSY);
453}
454
455static void __free_vmap_area(struct vmap_area *va)
456{
457 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
458
459 if (free_vmap_cache) {
460 if (va->va_end < cached_vstart) {
461 free_vmap_cache = NULL;
462 } else {
463 struct vmap_area *cache;
464 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
465 if (va->va_start <= cache->va_start) {
466 free_vmap_cache = rb_prev(&va->rb_node);
467
468
469
470
471 }
472 }
473 }
474 rb_erase(&va->rb_node, &vmap_area_root);
475 RB_CLEAR_NODE(&va->rb_node);
476 list_del_rcu(&va->list);
477
478
479
480
481
482
483
484 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
485 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
486
487 kfree_rcu(va, rcu_head);
488}
489
490
491
492
493static void free_vmap_area(struct vmap_area *va)
494{
495 spin_lock(&vmap_area_lock);
496 __free_vmap_area(va);
497 spin_unlock(&vmap_area_lock);
498}
499
500
501
502
503static void unmap_vmap_area(struct vmap_area *va)
504{
505 vunmap_page_range(va->va_start, va->va_end);
506}
507
508static void vmap_debug_free_range(unsigned long start, unsigned long end)
509{
510
511
512
513
514
515
516
517
518
519
520
521
522
523#ifdef CONFIG_DEBUG_PAGEALLOC
524 vunmap_page_range(start, end);
525 flush_tlb_kernel_range(start, end);
526#endif
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545static unsigned long lazy_max_pages(void)
546{
547 unsigned int log;
548
549 log = fls(num_online_cpus());
550
551 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
552}
553
554static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
555
556
557static void purge_fragmented_blocks_allcpus(void);
558
559
560
561
562
563void set_iounmap_nonlazy(void)
564{
565 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
566}
567
568
569
570
571
572
573
574
575
576
577
578static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
579 int sync, int force_flush)
580{
581 static DEFINE_SPINLOCK(purge_lock);
582 LIST_HEAD(valist);
583 struct vmap_area *va;
584 struct vmap_area *n_va;
585 int nr = 0;
586
587
588
589
590
591
592 if (!sync && !force_flush) {
593 if (!spin_trylock(&purge_lock))
594 return;
595 } else
596 spin_lock(&purge_lock);
597
598 if (sync)
599 purge_fragmented_blocks_allcpus();
600
601 rcu_read_lock();
602 list_for_each_entry_rcu(va, &vmap_area_list, list) {
603 if (va->flags & VM_LAZY_FREE) {
604 if (va->va_start < *start)
605 *start = va->va_start;
606 if (va->va_end > *end)
607 *end = va->va_end;
608 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
609 list_add_tail(&va->purge_list, &valist);
610 va->flags |= VM_LAZY_FREEING;
611 va->flags &= ~VM_LAZY_FREE;
612 }
613 }
614 rcu_read_unlock();
615
616 if (nr)
617 atomic_sub(nr, &vmap_lazy_nr);
618
619 if (nr || force_flush)
620 flush_tlb_kernel_range(*start, *end);
621
622 if (nr) {
623 spin_lock(&vmap_area_lock);
624 list_for_each_entry_safe(va, n_va, &valist, purge_list)
625 __free_vmap_area(va);
626 spin_unlock(&vmap_area_lock);
627 }
628 spin_unlock(&purge_lock);
629}
630
631
632
633
634
635static void try_purge_vmap_area_lazy(void)
636{
637 unsigned long start = ULONG_MAX, end = 0;
638
639 __purge_vmap_area_lazy(&start, &end, 0, 0);
640}
641
642
643
644
645static void purge_vmap_area_lazy(void)
646{
647 unsigned long start = ULONG_MAX, end = 0;
648
649 __purge_vmap_area_lazy(&start, &end, 1, 0);
650}
651
652
653
654
655
656
657static void free_vmap_area_noflush(struct vmap_area *va)
658{
659 va->flags |= VM_LAZY_FREE;
660 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
661 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
662 try_purge_vmap_area_lazy();
663}
664
665
666
667
668
669static void free_unmap_vmap_area_noflush(struct vmap_area *va)
670{
671 unmap_vmap_area(va);
672 free_vmap_area_noflush(va);
673}
674
675
676
677
678static void free_unmap_vmap_area(struct vmap_area *va)
679{
680 flush_cache_vunmap(va->va_start, va->va_end);
681 free_unmap_vmap_area_noflush(va);
682}
683
684static struct vmap_area *find_vmap_area(unsigned long addr)
685{
686 struct vmap_area *va;
687
688 spin_lock(&vmap_area_lock);
689 va = __find_vmap_area(addr);
690 spin_unlock(&vmap_area_lock);
691
692 return va;
693}
694
695static void free_unmap_vmap_area_addr(unsigned long addr)
696{
697 struct vmap_area *va;
698
699 va = find_vmap_area(addr);
700 BUG_ON(!va);
701 free_unmap_vmap_area(va);
702}
703
704
705
706
707
708
709
710
711
712
713
714
715
716#if BITS_PER_LONG == 32
717#define VMALLOC_SPACE (128UL*1024*1024)
718#else
719#define VMALLOC_SPACE (128UL*1024*1024*1024)
720#endif
721
722#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
723#define VMAP_MAX_ALLOC BITS_PER_LONG
724#define VMAP_BBMAP_BITS_MAX 1024
725#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
726#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
727#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
728#define VMAP_BBMAP_BITS \
729 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
730 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
731 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
732
733#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
734
735static bool vmap_initialized __read_mostly = false;
736
737struct vmap_block_queue {
738 spinlock_t lock;
739 struct list_head free;
740};
741
742struct vmap_block {
743 spinlock_t lock;
744 struct vmap_area *va;
745 struct vmap_block_queue *vbq;
746 unsigned long free, dirty;
747 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
748 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
749 struct list_head free_list;
750 struct rcu_head rcu_head;
751 struct list_head purge;
752};
753
754
755static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
756
757
758
759
760
761
762static DEFINE_SPINLOCK(vmap_block_tree_lock);
763static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
764
765
766
767
768
769
770
771
772static unsigned long addr_to_vb_idx(unsigned long addr)
773{
774 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
775 addr /= VMAP_BLOCK_SIZE;
776 return addr;
777}
778
779static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
780{
781 struct vmap_block_queue *vbq;
782 struct vmap_block *vb;
783 struct vmap_area *va;
784 unsigned long vb_idx;
785 int node, err;
786
787 node = numa_node_id();
788
789 vb = kmalloc_node(sizeof(struct vmap_block),
790 gfp_mask & GFP_RECLAIM_MASK, node);
791 if (unlikely(!vb))
792 return ERR_PTR(-ENOMEM);
793
794 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
795 VMALLOC_START, VMALLOC_END,
796 node, gfp_mask);
797 if (IS_ERR(va)) {
798 kfree(vb);
799 return ERR_CAST(va);
800 }
801
802 err = radix_tree_preload(gfp_mask);
803 if (unlikely(err)) {
804 kfree(vb);
805 free_vmap_area(va);
806 return ERR_PTR(err);
807 }
808
809 spin_lock_init(&vb->lock);
810 vb->va = va;
811 vb->free = VMAP_BBMAP_BITS;
812 vb->dirty = 0;
813 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
814 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
815 INIT_LIST_HEAD(&vb->free_list);
816
817 vb_idx = addr_to_vb_idx(va->va_start);
818 spin_lock(&vmap_block_tree_lock);
819 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
820 spin_unlock(&vmap_block_tree_lock);
821 BUG_ON(err);
822 radix_tree_preload_end();
823
824 vbq = &get_cpu_var(vmap_block_queue);
825 vb->vbq = vbq;
826 spin_lock(&vbq->lock);
827 list_add_rcu(&vb->free_list, &vbq->free);
828 spin_unlock(&vbq->lock);
829 put_cpu_var(vmap_block_queue);
830
831 return vb;
832}
833
834static void free_vmap_block(struct vmap_block *vb)
835{
836 struct vmap_block *tmp;
837 unsigned long vb_idx;
838
839 vb_idx = addr_to_vb_idx(vb->va->va_start);
840 spin_lock(&vmap_block_tree_lock);
841 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
842 spin_unlock(&vmap_block_tree_lock);
843 BUG_ON(tmp != vb);
844
845 free_vmap_area_noflush(vb->va);
846 kfree_rcu(vb, rcu_head);
847}
848
849static void purge_fragmented_blocks(int cpu)
850{
851 LIST_HEAD(purge);
852 struct vmap_block *vb;
853 struct vmap_block *n_vb;
854 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
855
856 rcu_read_lock();
857 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
858
859 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
860 continue;
861
862 spin_lock(&vb->lock);
863 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
864 vb->free = 0;
865 vb->dirty = VMAP_BBMAP_BITS;
866 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
867 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
868 spin_lock(&vbq->lock);
869 list_del_rcu(&vb->free_list);
870 spin_unlock(&vbq->lock);
871 spin_unlock(&vb->lock);
872 list_add_tail(&vb->purge, &purge);
873 } else
874 spin_unlock(&vb->lock);
875 }
876 rcu_read_unlock();
877
878 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
879 list_del(&vb->purge);
880 free_vmap_block(vb);
881 }
882}
883
884static void purge_fragmented_blocks_thiscpu(void)
885{
886 purge_fragmented_blocks(smp_processor_id());
887}
888
889static void purge_fragmented_blocks_allcpus(void)
890{
891 int cpu;
892
893 for_each_possible_cpu(cpu)
894 purge_fragmented_blocks(cpu);
895}
896
897static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
898{
899 struct vmap_block_queue *vbq;
900 struct vmap_block *vb;
901 unsigned long addr = 0;
902 unsigned int order;
903 int purge = 0;
904
905 BUG_ON(size & ~PAGE_MASK);
906 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
907 order = get_order(size);
908
909again:
910 rcu_read_lock();
911 vbq = &get_cpu_var(vmap_block_queue);
912 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
913 int i;
914
915 spin_lock(&vb->lock);
916 if (vb->free < 1UL << order)
917 goto next;
918
919 i = bitmap_find_free_region(vb->alloc_map,
920 VMAP_BBMAP_BITS, order);
921
922 if (i < 0) {
923 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
924
925 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
926 purge = 1;
927 }
928 goto next;
929 }
930 addr = vb->va->va_start + (i << PAGE_SHIFT);
931 BUG_ON(addr_to_vb_idx(addr) !=
932 addr_to_vb_idx(vb->va->va_start));
933 vb->free -= 1UL << order;
934 if (vb->free == 0) {
935 spin_lock(&vbq->lock);
936 list_del_rcu(&vb->free_list);
937 spin_unlock(&vbq->lock);
938 }
939 spin_unlock(&vb->lock);
940 break;
941next:
942 spin_unlock(&vb->lock);
943 }
944
945 if (purge)
946 purge_fragmented_blocks_thiscpu();
947
948 put_cpu_var(vmap_block_queue);
949 rcu_read_unlock();
950
951 if (!addr) {
952 vb = new_vmap_block(gfp_mask);
953 if (IS_ERR(vb))
954 return vb;
955 goto again;
956 }
957
958 return (void *)addr;
959}
960
961static void vb_free(const void *addr, unsigned long size)
962{
963 unsigned long offset;
964 unsigned long vb_idx;
965 unsigned int order;
966 struct vmap_block *vb;
967
968 BUG_ON(size & ~PAGE_MASK);
969 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
970
971 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
972
973 order = get_order(size);
974
975 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
976
977 vb_idx = addr_to_vb_idx((unsigned long)addr);
978 rcu_read_lock();
979 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
980 rcu_read_unlock();
981 BUG_ON(!vb);
982
983 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
984
985 spin_lock(&vb->lock);
986 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
987
988 vb->dirty += 1UL << order;
989 if (vb->dirty == VMAP_BBMAP_BITS) {
990 BUG_ON(vb->free);
991 spin_unlock(&vb->lock);
992 free_vmap_block(vb);
993 } else
994 spin_unlock(&vb->lock);
995}
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010void vm_unmap_aliases(void)
1011{
1012 unsigned long start = ULONG_MAX, end = 0;
1013 int cpu;
1014 int flush = 0;
1015
1016 if (unlikely(!vmap_initialized))
1017 return;
1018
1019 for_each_possible_cpu(cpu) {
1020 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1021 struct vmap_block *vb;
1022
1023 rcu_read_lock();
1024 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1025 int i;
1026
1027 spin_lock(&vb->lock);
1028 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1029 while (i < VMAP_BBMAP_BITS) {
1030 unsigned long s, e;
1031 int j;
1032 j = find_next_zero_bit(vb->dirty_map,
1033 VMAP_BBMAP_BITS, i);
1034
1035 s = vb->va->va_start + (i << PAGE_SHIFT);
1036 e = vb->va->va_start + (j << PAGE_SHIFT);
1037 flush = 1;
1038
1039 if (s < start)
1040 start = s;
1041 if (e > end)
1042 end = e;
1043
1044 i = j;
1045 i = find_next_bit(vb->dirty_map,
1046 VMAP_BBMAP_BITS, i);
1047 }
1048 spin_unlock(&vb->lock);
1049 }
1050 rcu_read_unlock();
1051 }
1052
1053 __purge_vmap_area_lazy(&start, &end, 1, flush);
1054}
1055EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1056
1057
1058
1059
1060
1061
1062void vm_unmap_ram(const void *mem, unsigned int count)
1063{
1064 unsigned long size = count << PAGE_SHIFT;
1065 unsigned long addr = (unsigned long)mem;
1066
1067 BUG_ON(!addr);
1068 BUG_ON(addr < VMALLOC_START);
1069 BUG_ON(addr > VMALLOC_END);
1070 BUG_ON(addr & (PAGE_SIZE-1));
1071
1072 debug_check_no_locks_freed(mem, size);
1073 vmap_debug_free_range(addr, addr+size);
1074
1075 if (likely(count <= VMAP_MAX_ALLOC))
1076 vb_free(mem, size);
1077 else
1078 free_unmap_vmap_area_addr(addr);
1079}
1080EXPORT_SYMBOL(vm_unmap_ram);
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1092{
1093 unsigned long size = count << PAGE_SHIFT;
1094 unsigned long addr;
1095 void *mem;
1096
1097 if (likely(count <= VMAP_MAX_ALLOC)) {
1098 mem = vb_alloc(size, GFP_KERNEL);
1099 if (IS_ERR(mem))
1100 return NULL;
1101 addr = (unsigned long)mem;
1102 } else {
1103 struct vmap_area *va;
1104 va = alloc_vmap_area(size, PAGE_SIZE,
1105 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1106 if (IS_ERR(va))
1107 return NULL;
1108
1109 addr = va->va_start;
1110 mem = (void *)addr;
1111 }
1112 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1113 vm_unmap_ram(mem, count);
1114 return NULL;
1115 }
1116 return mem;
1117}
1118EXPORT_SYMBOL(vm_map_ram);
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130void __init vm_area_add_early(struct vm_struct *vm)
1131{
1132 struct vm_struct *tmp, **p;
1133
1134 BUG_ON(vmap_initialized);
1135 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1136 if (tmp->addr >= vm->addr) {
1137 BUG_ON(tmp->addr < vm->addr + vm->size);
1138 break;
1139 } else
1140 BUG_ON(tmp->addr + tmp->size > vm->addr);
1141 }
1142 vm->next = *p;
1143 *p = vm;
1144}
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1159{
1160 static size_t vm_init_off __initdata;
1161 unsigned long addr;
1162
1163 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1164 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1165
1166 vm->addr = (void *)addr;
1167
1168 vm_area_add_early(vm);
1169}
1170
1171void __init vmalloc_init(void)
1172{
1173 struct vmap_area *va;
1174 struct vm_struct *tmp;
1175 int i;
1176
1177 for_each_possible_cpu(i) {
1178 struct vmap_block_queue *vbq;
1179
1180 vbq = &per_cpu(vmap_block_queue, i);
1181 spin_lock_init(&vbq->lock);
1182 INIT_LIST_HEAD(&vbq->free);
1183 }
1184
1185
1186 for (tmp = vmlist; tmp; tmp = tmp->next) {
1187 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1188 va->flags = tmp->flags | VM_VM_AREA;
1189 va->va_start = (unsigned long)tmp->addr;
1190 va->va_end = va->va_start + tmp->size;
1191 __insert_vmap_area(va);
1192 }
1193
1194 vmap_area_pcpu_hole = VMALLOC_END;
1195
1196 vmap_initialized = true;
1197}
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1219 pgprot_t prot, struct page **pages)
1220{
1221 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1222}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1239{
1240 vunmap_page_range(addr, addr + size);
1241}
1242EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252void unmap_kernel_range(unsigned long addr, unsigned long size)
1253{
1254 unsigned long end = addr + size;
1255
1256 flush_cache_vunmap(addr, end);
1257 vunmap_page_range(addr, end);
1258 flush_tlb_kernel_range(addr, end);
1259}
1260
1261int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1262{
1263 unsigned long addr = (unsigned long)area->addr;
1264 unsigned long end = addr + area->size - PAGE_SIZE;
1265 int err;
1266
1267 err = vmap_page_range(addr, end, prot, *pages);
1268 if (err > 0) {
1269 *pages += err;
1270 err = 0;
1271 }
1272
1273 return err;
1274}
1275EXPORT_SYMBOL_GPL(map_vm_area);
1276
1277
1278DEFINE_RWLOCK(vmlist_lock);
1279struct vm_struct *vmlist;
1280
1281static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1282 unsigned long flags, void *caller)
1283{
1284 vm->flags = flags;
1285 vm->addr = (void *)va->va_start;
1286 vm->size = va->va_end - va->va_start;
1287 vm->caller = caller;
1288 va->vm = vm;
1289 va->flags |= VM_VM_AREA;
1290}
1291
1292static void insert_vmalloc_vmlist(struct vm_struct *vm)
1293{
1294 struct vm_struct *tmp, **p;
1295
1296 vm->flags &= ~VM_UNLIST;
1297 write_lock(&vmlist_lock);
1298 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1299 if (tmp->addr >= vm->addr)
1300 break;
1301 }
1302 vm->next = *p;
1303 *p = vm;
1304 write_unlock(&vmlist_lock);
1305}
1306
1307static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1308 unsigned long flags, void *caller)
1309{
1310 setup_vmalloc_vm(vm, va, flags, caller);
1311 insert_vmalloc_vmlist(vm);
1312}
1313
1314static struct vm_struct *__get_vm_area_node(unsigned long size,
1315 unsigned long align, unsigned long flags, unsigned long start,
1316 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1317{
1318 struct vmap_area *va;
1319 struct vm_struct *area;
1320
1321 BUG_ON(in_interrupt());
1322 if (flags & VM_IOREMAP) {
1323 int bit = fls(size);
1324
1325 if (bit > IOREMAP_MAX_ORDER)
1326 bit = IOREMAP_MAX_ORDER;
1327 else if (bit < PAGE_SHIFT)
1328 bit = PAGE_SHIFT;
1329
1330 align = 1ul << bit;
1331 }
1332
1333 size = PAGE_ALIGN(size);
1334 if (unlikely(!size))
1335 return NULL;
1336
1337 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1338 if (unlikely(!area))
1339 return NULL;
1340
1341
1342
1343
1344 size += PAGE_SIZE;
1345
1346 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1347 if (IS_ERR(va)) {
1348 kfree(area);
1349 return NULL;
1350 }
1351
1352
1353
1354
1355
1356
1357
1358
1359 if (flags & VM_UNLIST)
1360 setup_vmalloc_vm(area, va, flags, caller);
1361 else
1362 insert_vmalloc_vm(area, va, flags, caller);
1363
1364 return area;
1365}
1366
1367struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1368 unsigned long start, unsigned long end)
1369{
1370 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1371 __builtin_return_address(0));
1372}
1373EXPORT_SYMBOL_GPL(__get_vm_area);
1374
1375struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1376 unsigned long start, unsigned long end,
1377 void *caller)
1378{
1379 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1380 caller);
1381}
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1393{
1394 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1395 -1, GFP_KERNEL, __builtin_return_address(0));
1396}
1397
1398struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1399 void *caller)
1400{
1401 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1402 -1, GFP_KERNEL, caller);
1403}
1404
1405static struct vm_struct *find_vm_area(const void *addr)
1406{
1407 struct vmap_area *va;
1408
1409 va = find_vmap_area((unsigned long)addr);
1410 if (va && va->flags & VM_VM_AREA)
1411 return va->vm;
1412
1413 return NULL;
1414}
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424struct vm_struct *remove_vm_area(const void *addr)
1425{
1426 struct vmap_area *va;
1427
1428 va = find_vmap_area((unsigned long)addr);
1429 if (va && va->flags & VM_VM_AREA) {
1430 struct vm_struct *vm = va->vm;
1431
1432 if (!(vm->flags & VM_UNLIST)) {
1433 struct vm_struct *tmp, **p;
1434
1435
1436
1437
1438
1439 write_lock(&vmlist_lock);
1440 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1441 ;
1442 *p = tmp->next;
1443 write_unlock(&vmlist_lock);
1444 }
1445
1446 vmap_debug_free_range(va->va_start, va->va_end);
1447 free_unmap_vmap_area(va);
1448 vm->size -= PAGE_SIZE;
1449
1450 return vm;
1451 }
1452 return NULL;
1453}
1454
1455static void __vunmap(const void *addr, int deallocate_pages)
1456{
1457 struct vm_struct *area;
1458
1459 if (!addr)
1460 return;
1461
1462 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1463 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1464 return;
1465 }
1466
1467 area = remove_vm_area(addr);
1468 if (unlikely(!area)) {
1469 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1470 addr);
1471 return;
1472 }
1473
1474 debug_check_no_locks_freed(addr, area->size);
1475 debug_check_no_obj_freed(addr, area->size);
1476
1477 if (deallocate_pages) {
1478 int i;
1479
1480 for (i = 0; i < area->nr_pages; i++) {
1481 struct page *page = area->pages[i];
1482
1483 BUG_ON(!page);
1484 __free_page(page);
1485 }
1486
1487 if (area->flags & VM_VPAGES)
1488 vfree(area->pages);
1489 else
1490 kfree(area->pages);
1491 }
1492
1493 kfree(area);
1494 return;
1495}
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507void vfree(const void *addr)
1508{
1509 BUG_ON(in_interrupt());
1510
1511 kmemleak_free(addr);
1512
1513 __vunmap(addr, 1);
1514}
1515EXPORT_SYMBOL(vfree);
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526void vunmap(const void *addr)
1527{
1528 BUG_ON(in_interrupt());
1529 might_sleep();
1530 __vunmap(addr, 0);
1531}
1532EXPORT_SYMBOL(vunmap);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544void *vmap(struct page **pages, unsigned int count,
1545 unsigned long flags, pgprot_t prot)
1546{
1547 struct vm_struct *area;
1548
1549 might_sleep();
1550
1551 if (count > totalram_pages)
1552 return NULL;
1553
1554 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1555 __builtin_return_address(0));
1556 if (!area)
1557 return NULL;
1558
1559 if (map_vm_area(area, prot, &pages)) {
1560 vunmap(area->addr);
1561 return NULL;
1562 }
1563
1564 return area->addr;
1565}
1566EXPORT_SYMBOL(vmap);
1567
1568static void *__vmalloc_node(unsigned long size, unsigned long align,
1569 gfp_t gfp_mask, pgprot_t prot,
1570 int node, void *caller);
1571static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1572 pgprot_t prot, int node, void *caller)
1573{
1574 const int order = 0;
1575 struct page **pages;
1576 unsigned int nr_pages, array_size, i;
1577 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1578
1579 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1580 array_size = (nr_pages * sizeof(struct page *));
1581
1582 area->nr_pages = nr_pages;
1583
1584 if (array_size > PAGE_SIZE) {
1585 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1586 PAGE_KERNEL, node, caller);
1587 area->flags |= VM_VPAGES;
1588 } else {
1589 pages = kmalloc_node(array_size, nested_gfp, node);
1590 }
1591 area->pages = pages;
1592 area->caller = caller;
1593 if (!area->pages) {
1594 remove_vm_area(area->addr);
1595 kfree(area);
1596 return NULL;
1597 }
1598
1599 for (i = 0; i < area->nr_pages; i++) {
1600 struct page *page;
1601 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1602
1603 if (node < 0)
1604 page = alloc_page(tmp_mask);
1605 else
1606 page = alloc_pages_node(node, tmp_mask, order);
1607
1608 if (unlikely(!page)) {
1609
1610 area->nr_pages = i;
1611 goto fail;
1612 }
1613 area->pages[i] = page;
1614 }
1615
1616 if (map_vm_area(area, prot, &pages))
1617 goto fail;
1618 return area->addr;
1619
1620fail:
1621 warn_alloc_failed(gfp_mask, order,
1622 "vmalloc: allocation failure, allocated %ld of %ld bytes\n",
1623 (area->nr_pages*PAGE_SIZE), area->size);
1624 vfree(area->addr);
1625 return NULL;
1626}
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643void *__vmalloc_node_range(unsigned long size, unsigned long align,
1644 unsigned long start, unsigned long end, gfp_t gfp_mask,
1645 pgprot_t prot, int node, void *caller)
1646{
1647 struct vm_struct *area;
1648 void *addr;
1649 unsigned long real_size = size;
1650
1651 size = PAGE_ALIGN(size);
1652 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1653 goto fail;
1654
1655 area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
1656 start, end, node, gfp_mask, caller);
1657 if (!area)
1658 goto fail;
1659
1660 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1661 if (!addr)
1662 return NULL;
1663
1664
1665
1666
1667
1668 insert_vmalloc_vmlist(area);
1669
1670
1671
1672
1673
1674
1675 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1676
1677 return addr;
1678
1679fail:
1680 warn_alloc_failed(gfp_mask, 0,
1681 "vmalloc: allocation failure: %lu bytes\n",
1682 real_size);
1683 return NULL;
1684}
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699static void *__vmalloc_node(unsigned long size, unsigned long align,
1700 gfp_t gfp_mask, pgprot_t prot,
1701 int node, void *caller)
1702{
1703 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1704 gfp_mask, prot, node, caller);
1705}
1706
1707void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1708{
1709 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1710 __builtin_return_address(0));
1711}
1712EXPORT_SYMBOL(__vmalloc);
1713
1714static inline void *__vmalloc_node_flags(unsigned long size,
1715 int node, gfp_t flags)
1716{
1717 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1718 node, __builtin_return_address(0));
1719}
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730void *vmalloc(unsigned long size)
1731{
1732 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1733}
1734EXPORT_SYMBOL(vmalloc);
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746void *vzalloc(unsigned long size)
1747{
1748 return __vmalloc_node_flags(size, -1,
1749 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1750}
1751EXPORT_SYMBOL(vzalloc);
1752
1753
1754
1755
1756
1757
1758
1759
1760void *vmalloc_user(unsigned long size)
1761{
1762 struct vm_struct *area;
1763 void *ret;
1764
1765 ret = __vmalloc_node(size, SHMLBA,
1766 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1767 PAGE_KERNEL, -1, __builtin_return_address(0));
1768 if (ret) {
1769 area = find_vm_area(ret);
1770 area->flags |= VM_USERMAP;
1771 }
1772 return ret;
1773}
1774EXPORT_SYMBOL(vmalloc_user);
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787void *vmalloc_node(unsigned long size, int node)
1788{
1789 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1790 node, __builtin_return_address(0));
1791}
1792EXPORT_SYMBOL(vmalloc_node);
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806void *vzalloc_node(unsigned long size, int node)
1807{
1808 return __vmalloc_node_flags(size, node,
1809 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1810}
1811EXPORT_SYMBOL(vzalloc_node);
1812
1813#ifndef PAGE_KERNEL_EXEC
1814# define PAGE_KERNEL_EXEC PAGE_KERNEL
1815#endif
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829void *vmalloc_exec(unsigned long size)
1830{
1831 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1832 -1, __builtin_return_address(0));
1833}
1834
1835#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1836#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1837#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1838#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1839#else
1840#define GFP_VMALLOC32 GFP_KERNEL
1841#endif
1842
1843
1844
1845
1846
1847
1848
1849
1850void *vmalloc_32(unsigned long size)
1851{
1852 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1853 -1, __builtin_return_address(0));
1854}
1855EXPORT_SYMBOL(vmalloc_32);
1856
1857
1858
1859
1860
1861
1862
1863
1864void *vmalloc_32_user(unsigned long size)
1865{
1866 struct vm_struct *area;
1867 void *ret;
1868
1869 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1870 -1, __builtin_return_address(0));
1871 if (ret) {
1872 area = find_vm_area(ret);
1873 area->flags |= VM_USERMAP;
1874 }
1875 return ret;
1876}
1877EXPORT_SYMBOL(vmalloc_32_user);
1878
1879
1880
1881
1882
1883
1884static int aligned_vread(char *buf, char *addr, unsigned long count)
1885{
1886 struct page *p;
1887 int copied = 0;
1888
1889 while (count) {
1890 unsigned long offset, length;
1891
1892 offset = (unsigned long)addr & ~PAGE_MASK;
1893 length = PAGE_SIZE - offset;
1894 if (length > count)
1895 length = count;
1896 p = vmalloc_to_page(addr);
1897
1898
1899
1900
1901
1902
1903
1904 if (p) {
1905
1906
1907
1908
1909 void *map = kmap_atomic(p, KM_USER0);
1910 memcpy(buf, map + offset, length);
1911 kunmap_atomic(map, KM_USER0);
1912 } else
1913 memset(buf, 0, length);
1914
1915 addr += length;
1916 buf += length;
1917 copied += length;
1918 count -= length;
1919 }
1920 return copied;
1921}
1922
1923static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1924{
1925 struct page *p;
1926 int copied = 0;
1927
1928 while (count) {
1929 unsigned long offset, length;
1930
1931 offset = (unsigned long)addr & ~PAGE_MASK;
1932 length = PAGE_SIZE - offset;
1933 if (length > count)
1934 length = count;
1935 p = vmalloc_to_page(addr);
1936
1937
1938
1939
1940
1941
1942
1943 if (p) {
1944
1945
1946
1947
1948 void *map = kmap_atomic(p, KM_USER0);
1949 memcpy(map + offset, buf, length);
1950 kunmap_atomic(map, KM_USER0);
1951 }
1952 addr += length;
1953 buf += length;
1954 copied += length;
1955 count -= length;
1956 }
1957 return copied;
1958}
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988long vread(char *buf, char *addr, unsigned long count)
1989{
1990 struct vm_struct *tmp;
1991 char *vaddr, *buf_start = buf;
1992 unsigned long buflen = count;
1993 unsigned long n;
1994
1995
1996 if ((unsigned long) addr + count < count)
1997 count = -(unsigned long) addr;
1998
1999 read_lock(&vmlist_lock);
2000 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2001 vaddr = (char *) tmp->addr;
2002 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2003 continue;
2004 while (addr < vaddr) {
2005 if (count == 0)
2006 goto finished;
2007 *buf = '\0';
2008 buf++;
2009 addr++;
2010 count--;
2011 }
2012 n = vaddr + tmp->size - PAGE_SIZE - addr;
2013 if (n > count)
2014 n = count;
2015 if (!(tmp->flags & VM_IOREMAP))
2016 aligned_vread(buf, addr, n);
2017 else
2018 memset(buf, 0, n);
2019 buf += n;
2020 addr += n;
2021 count -= n;
2022 }
2023finished:
2024 read_unlock(&vmlist_lock);
2025
2026 if (buf == buf_start)
2027 return 0;
2028
2029 if (buf != buf_start + buflen)
2030 memset(buf, 0, buflen - (buf - buf_start));
2031
2032 return buflen;
2033}
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063long vwrite(char *buf, char *addr, unsigned long count)
2064{
2065 struct vm_struct *tmp;
2066 char *vaddr;
2067 unsigned long n, buflen;
2068 int copied = 0;
2069
2070
2071 if ((unsigned long) addr + count < count)
2072 count = -(unsigned long) addr;
2073 buflen = count;
2074
2075 read_lock(&vmlist_lock);
2076 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2077 vaddr = (char *) tmp->addr;
2078 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2079 continue;
2080 while (addr < vaddr) {
2081 if (count == 0)
2082 goto finished;
2083 buf++;
2084 addr++;
2085 count--;
2086 }
2087 n = vaddr + tmp->size - PAGE_SIZE - addr;
2088 if (n > count)
2089 n = count;
2090 if (!(tmp->flags & VM_IOREMAP)) {
2091 aligned_vwrite(buf, addr, n);
2092 copied++;
2093 }
2094 buf += n;
2095 addr += n;
2096 count -= n;
2097 }
2098finished:
2099 read_unlock(&vmlist_lock);
2100 if (!copied)
2101 return 0;
2102 return buflen;
2103}
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2120 unsigned long pgoff)
2121{
2122 struct vm_struct *area;
2123 unsigned long uaddr = vma->vm_start;
2124 unsigned long usize = vma->vm_end - vma->vm_start;
2125
2126 if ((PAGE_SIZE-1) & (unsigned long)addr)
2127 return -EINVAL;
2128
2129 area = find_vm_area(addr);
2130 if (!area)
2131 return -EINVAL;
2132
2133 if (!(area->flags & VM_USERMAP))
2134 return -EINVAL;
2135
2136 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2137 return -EINVAL;
2138
2139 addr += pgoff << PAGE_SHIFT;
2140 do {
2141 struct page *page = vmalloc_to_page(addr);
2142 int ret;
2143
2144 ret = vm_insert_page(vma, uaddr, page);
2145 if (ret)
2146 return ret;
2147
2148 uaddr += PAGE_SIZE;
2149 addr += PAGE_SIZE;
2150 usize -= PAGE_SIZE;
2151 } while (usize > 0);
2152
2153
2154 vma->vm_flags |= VM_RESERVED;
2155
2156 return 0;
2157}
2158EXPORT_SYMBOL(remap_vmalloc_range);
2159
2160
2161
2162
2163
2164void __attribute__((weak)) vmalloc_sync_all(void)
2165{
2166}
2167
2168
2169static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2170{
2171 pte_t ***p = data;
2172
2173 if (p) {
2174 *(*p) = pte;
2175 (*p)++;
2176 }
2177 return 0;
2178}
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2195{
2196 struct vm_struct *area;
2197
2198 area = get_vm_area_caller(size, VM_IOREMAP,
2199 __builtin_return_address(0));
2200 if (area == NULL)
2201 return NULL;
2202
2203
2204
2205
2206
2207 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2208 size, f, ptes ? &ptes : NULL)) {
2209 free_vm_area(area);
2210 return NULL;
2211 }
2212
2213 return area;
2214}
2215EXPORT_SYMBOL_GPL(alloc_vm_area);
2216
2217void free_vm_area(struct vm_struct *area)
2218{
2219 struct vm_struct *ret;
2220 ret = remove_vm_area(area->addr);
2221 BUG_ON(ret != area);
2222 kfree(area);
2223}
2224EXPORT_SYMBOL_GPL(free_vm_area);
2225
2226#ifdef CONFIG_SMP
2227static struct vmap_area *node_to_va(struct rb_node *n)
2228{
2229 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2230}
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244static bool pvm_find_next_prev(unsigned long end,
2245 struct vmap_area **pnext,
2246 struct vmap_area **pprev)
2247{
2248 struct rb_node *n = vmap_area_root.rb_node;
2249 struct vmap_area *va = NULL;
2250
2251 while (n) {
2252 va = rb_entry(n, struct vmap_area, rb_node);
2253 if (end < va->va_end)
2254 n = n->rb_left;
2255 else if (end > va->va_end)
2256 n = n->rb_right;
2257 else
2258 break;
2259 }
2260
2261 if (!va)
2262 return false;
2263
2264 if (va->va_end > end) {
2265 *pnext = va;
2266 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2267 } else {
2268 *pprev = va;
2269 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2270 }
2271 return true;
2272}
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290static unsigned long pvm_determine_end(struct vmap_area **pnext,
2291 struct vmap_area **pprev,
2292 unsigned long align)
2293{
2294 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2295 unsigned long addr;
2296
2297 if (*pnext)
2298 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2299 else
2300 addr = vmalloc_end;
2301
2302 while (*pprev && (*pprev)->va_end > addr) {
2303 *pnext = *pprev;
2304 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2305 }
2306
2307 return addr;
2308}
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2335 const size_t *sizes, int nr_vms,
2336 size_t align)
2337{
2338 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2339 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2340 struct vmap_area **vas, *prev, *next;
2341 struct vm_struct **vms;
2342 int area, area2, last_area, term_area;
2343 unsigned long base, start, end, last_end;
2344 bool purged = false;
2345
2346
2347 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2348 for (last_area = 0, area = 0; area < nr_vms; area++) {
2349 start = offsets[area];
2350 end = start + sizes[area];
2351
2352
2353 BUG_ON(!IS_ALIGNED(offsets[area], align));
2354 BUG_ON(!IS_ALIGNED(sizes[area], align));
2355
2356
2357 if (start > offsets[last_area])
2358 last_area = area;
2359
2360 for (area2 = 0; area2 < nr_vms; area2++) {
2361 unsigned long start2 = offsets[area2];
2362 unsigned long end2 = start2 + sizes[area2];
2363
2364 if (area2 == area)
2365 continue;
2366
2367 BUG_ON(start2 >= start && start2 < end);
2368 BUG_ON(end2 <= end && end2 > start);
2369 }
2370 }
2371 last_end = offsets[last_area] + sizes[last_area];
2372
2373 if (vmalloc_end - vmalloc_start < last_end) {
2374 WARN_ON(true);
2375 return NULL;
2376 }
2377
2378 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
2379 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
2380 if (!vas || !vms)
2381 goto err_free2;
2382
2383 for (area = 0; area < nr_vms; area++) {
2384 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2385 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2386 if (!vas[area] || !vms[area])
2387 goto err_free;
2388 }
2389retry:
2390 spin_lock(&vmap_area_lock);
2391
2392
2393 area = term_area = last_area;
2394 start = offsets[area];
2395 end = start + sizes[area];
2396
2397 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2398 base = vmalloc_end - last_end;
2399 goto found;
2400 }
2401 base = pvm_determine_end(&next, &prev, align) - end;
2402
2403 while (true) {
2404 BUG_ON(next && next->va_end <= base + end);
2405 BUG_ON(prev && prev->va_end > base + end);
2406
2407
2408
2409
2410
2411 if (base + last_end < vmalloc_start + last_end) {
2412 spin_unlock(&vmap_area_lock);
2413 if (!purged) {
2414 purge_vmap_area_lazy();
2415 purged = true;
2416 goto retry;
2417 }
2418 goto err_free;
2419 }
2420
2421
2422
2423
2424
2425 if (next && next->va_start < base + end) {
2426 base = pvm_determine_end(&next, &prev, align) - end;
2427 term_area = area;
2428 continue;
2429 }
2430
2431
2432
2433
2434
2435
2436 if (prev && prev->va_end > base + start) {
2437 next = prev;
2438 prev = node_to_va(rb_prev(&next->rb_node));
2439 base = pvm_determine_end(&next, &prev, align) - end;
2440 term_area = area;
2441 continue;
2442 }
2443
2444
2445
2446
2447
2448 area = (area + nr_vms - 1) % nr_vms;
2449 if (area == term_area)
2450 break;
2451 start = offsets[area];
2452 end = start + sizes[area];
2453 pvm_find_next_prev(base + end, &next, &prev);
2454 }
2455found:
2456
2457 for (area = 0; area < nr_vms; area++) {
2458 struct vmap_area *va = vas[area];
2459
2460 va->va_start = base + offsets[area];
2461 va->va_end = va->va_start + sizes[area];
2462 __insert_vmap_area(va);
2463 }
2464
2465 vmap_area_pcpu_hole = base + offsets[last_area];
2466
2467 spin_unlock(&vmap_area_lock);
2468
2469
2470 for (area = 0; area < nr_vms; area++)
2471 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2472 pcpu_get_vm_areas);
2473
2474 kfree(vas);
2475 return vms;
2476
2477err_free:
2478 for (area = 0; area < nr_vms; area++) {
2479 kfree(vas[area]);
2480 kfree(vms[area]);
2481 }
2482err_free2:
2483 kfree(vas);
2484 kfree(vms);
2485 return NULL;
2486}
2487
2488
2489
2490
2491
2492
2493
2494
2495void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2496{
2497 int i;
2498
2499 for (i = 0; i < nr_vms; i++)
2500 free_vm_area(vms[i]);
2501 kfree(vms);
2502}
2503#endif
2504
2505#ifdef CONFIG_PROC_FS
2506static void *s_start(struct seq_file *m, loff_t *pos)
2507 __acquires(&vmlist_lock)
2508{
2509 loff_t n = *pos;
2510 struct vm_struct *v;
2511
2512 read_lock(&vmlist_lock);
2513 v = vmlist;
2514 while (n > 0 && v) {
2515 n--;
2516 v = v->next;
2517 }
2518 if (!n)
2519 return v;
2520
2521 return NULL;
2522
2523}
2524
2525static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2526{
2527 struct vm_struct *v = p;
2528
2529 ++*pos;
2530 return v->next;
2531}
2532
2533static void s_stop(struct seq_file *m, void *p)
2534 __releases(&vmlist_lock)
2535{
2536 read_unlock(&vmlist_lock);
2537}
2538
2539static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2540{
2541 if (NUMA_BUILD) {
2542 unsigned int nr, *counters = m->private;
2543
2544 if (!counters)
2545 return;
2546
2547 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2548
2549 for (nr = 0; nr < v->nr_pages; nr++)
2550 counters[page_to_nid(v->pages[nr])]++;
2551
2552 for_each_node_state(nr, N_HIGH_MEMORY)
2553 if (counters[nr])
2554 seq_printf(m, " N%u=%u", nr, counters[nr]);
2555 }
2556}
2557
2558static int s_show(struct seq_file *m, void *p)
2559{
2560 struct vm_struct *v = p;
2561
2562 seq_printf(m, "0x%p-0x%p %7ld",
2563 v->addr, v->addr + v->size, v->size);
2564
2565 if (v->caller)
2566 seq_printf(m, " %pS", v->caller);
2567
2568 if (v->nr_pages)
2569 seq_printf(m, " pages=%d", v->nr_pages);
2570
2571 if (v->phys_addr)
2572 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2573
2574 if (v->flags & VM_IOREMAP)
2575 seq_printf(m, " ioremap");
2576
2577 if (v->flags & VM_ALLOC)
2578 seq_printf(m, " vmalloc");
2579
2580 if (v->flags & VM_MAP)
2581 seq_printf(m, " vmap");
2582
2583 if (v->flags & VM_USERMAP)
2584 seq_printf(m, " user");
2585
2586 if (v->flags & VM_VPAGES)
2587 seq_printf(m, " vpages");
2588
2589 show_numa_info(m, v);
2590 seq_putc(m, '\n');
2591 return 0;
2592}
2593
2594static const struct seq_operations vmalloc_op = {
2595 .start = s_start,
2596 .next = s_next,
2597 .stop = s_stop,
2598 .show = s_show,
2599};
2600
2601static int vmalloc_open(struct inode *inode, struct file *file)
2602{
2603 unsigned int *ptr = NULL;
2604 int ret;
2605
2606 if (NUMA_BUILD) {
2607 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2608 if (ptr == NULL)
2609 return -ENOMEM;
2610 }
2611 ret = seq_open(file, &vmalloc_op);
2612 if (!ret) {
2613 struct seq_file *m = file->private_data;
2614 m->private = ptr;
2615 } else
2616 kfree(ptr);
2617 return ret;
2618}
2619
2620static const struct file_operations proc_vmalloc_operations = {
2621 .open = vmalloc_open,
2622 .read = seq_read,
2623 .llseek = seq_lseek,
2624 .release = seq_release_private,
2625};
2626
2627static int __init proc_vmalloc_init(void)
2628{
2629 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2630 return 0;
2631}
2632module_init(proc_vmalloc_init);
2633#endif
2634
2635