1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <asm/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 void *private;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static LIST_HEAD(vmap_area_list);
265static struct rb_root vmap_area_root = RB_ROOT;
266
267
268static struct rb_node *free_vmap_cache;
269static unsigned long cached_hole_size;
270static unsigned long cached_vstart;
271static unsigned long cached_align;
272
273static unsigned long vmap_area_pcpu_hole;
274
275static struct vmap_area *__find_vmap_area(unsigned long addr)
276{
277 struct rb_node *n = vmap_area_root.rb_node;
278
279 while (n) {
280 struct vmap_area *va;
281
282 va = rb_entry(n, struct vmap_area, rb_node);
283 if (addr < va->va_start)
284 n = n->rb_left;
285 else if (addr > va->va_start)
286 n = n->rb_right;
287 else
288 return va;
289 }
290
291 return NULL;
292}
293
294static void __insert_vmap_area(struct vmap_area *va)
295{
296 struct rb_node **p = &vmap_area_root.rb_node;
297 struct rb_node *parent = NULL;
298 struct rb_node *tmp;
299
300 while (*p) {
301 struct vmap_area *tmp_va;
302
303 parent = *p;
304 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
305 if (va->va_start < tmp_va->va_end)
306 p = &(*p)->rb_left;
307 else if (va->va_end > tmp_va->va_start)
308 p = &(*p)->rb_right;
309 else
310 BUG();
311 }
312
313 rb_link_node(&va->rb_node, parent, p);
314 rb_insert_color(&va->rb_node, &vmap_area_root);
315
316
317 tmp = rb_prev(&va->rb_node);
318 if (tmp) {
319 struct vmap_area *prev;
320 prev = rb_entry(tmp, struct vmap_area, rb_node);
321 list_add_rcu(&va->list, &prev->list);
322 } else
323 list_add_rcu(&va->list, &vmap_area_list);
324}
325
326static void purge_vmap_area_lazy(void);
327
328
329
330
331
332static struct vmap_area *alloc_vmap_area(unsigned long size,
333 unsigned long align,
334 unsigned long vstart, unsigned long vend,
335 int node, gfp_t gfp_mask)
336{
337 struct vmap_area *va;
338 struct rb_node *n;
339 unsigned long addr;
340 int purged = 0;
341 struct vmap_area *first;
342
343 BUG_ON(!size);
344 BUG_ON(size & ~PAGE_MASK);
345 BUG_ON(!is_power_of_2(align));
346
347 va = kmalloc_node(sizeof(struct vmap_area),
348 gfp_mask & GFP_RECLAIM_MASK, node);
349 if (unlikely(!va))
350 return ERR_PTR(-ENOMEM);
351
352retry:
353 spin_lock(&vmap_area_lock);
354
355
356
357
358
359
360
361
362
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
367nocache:
368 cached_hole_size = 0;
369 free_vmap_cache = NULL;
370 }
371
372 cached_vstart = vstart;
373 cached_align = align;
374
375
376 if (free_vmap_cache) {
377 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
378 addr = ALIGN(first->va_end, align);
379 if (addr < vstart)
380 goto nocache;
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 } else {
385 addr = ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
387 goto overflow;
388
389 n = vmap_area_root.rb_node;
390 first = NULL;
391
392 while (n) {
393 struct vmap_area *tmp;
394 tmp = rb_entry(n, struct vmap_area, rb_node);
395 if (tmp->va_end >= addr) {
396 first = tmp;
397 if (tmp->va_start <= addr)
398 break;
399 n = n->rb_left;
400 } else
401 n = n->rb_right;
402 }
403
404 if (!first)
405 goto found;
406 }
407
408
409 while (addr + size > first->va_start && addr + size <= vend) {
410 if (addr + cached_hole_size < first->va_start)
411 cached_hole_size = first->va_start - addr;
412 addr = ALIGN(first->va_end, align);
413 if (addr + size - 1 < addr)
414 goto overflow;
415
416 n = rb_next(&first->rb_node);
417 if (n)
418 first = rb_entry(n, struct vmap_area, rb_node);
419 else
420 goto found;
421 }
422
423found:
424 if (addr + size > vend)
425 goto overflow;
426
427 va->va_start = addr;
428 va->va_end = addr + size;
429 va->flags = 0;
430 __insert_vmap_area(va);
431 free_vmap_cache = &va->rb_node;
432 spin_unlock(&vmap_area_lock);
433
434 BUG_ON(va->va_start & (align-1));
435 BUG_ON(va->va_start < vstart);
436 BUG_ON(va->va_end > vend);
437
438 return va;
439
440overflow:
441 spin_unlock(&vmap_area_lock);
442 if (!purged) {
443 purge_vmap_area_lazy();
444 purged = 1;
445 goto retry;
446 }
447 if (printk_ratelimit())
448 printk(KERN_WARNING
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
451 kfree(va);
452 return ERR_PTR(-EBUSY);
453}
454
455static void rcu_free_va(struct rcu_head *head)
456{
457 struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
458
459 kfree(va);
460}
461
462static void __free_vmap_area(struct vmap_area *va)
463{
464 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
465
466 if (free_vmap_cache) {
467 if (va->va_end < cached_vstart) {
468 free_vmap_cache = NULL;
469 } else {
470 struct vmap_area *cache;
471 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
472 if (va->va_start <= cache->va_start) {
473 free_vmap_cache = rb_prev(&va->rb_node);
474
475
476
477
478 }
479 }
480 }
481 rb_erase(&va->rb_node, &vmap_area_root);
482 RB_CLEAR_NODE(&va->rb_node);
483 list_del_rcu(&va->list);
484
485
486
487
488
489
490
491 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
492 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
493
494 call_rcu(&va->rcu_head, rcu_free_va);
495}
496
497
498
499
500static void free_vmap_area(struct vmap_area *va)
501{
502 spin_lock(&vmap_area_lock);
503 __free_vmap_area(va);
504 spin_unlock(&vmap_area_lock);
505}
506
507
508
509
510static void unmap_vmap_area(struct vmap_area *va)
511{
512 vunmap_page_range(va->va_start, va->va_end);
513}
514
515static void vmap_debug_free_range(unsigned long start, unsigned long end)
516{
517
518
519
520
521
522
523
524
525
526
527
528
529
530#ifdef CONFIG_DEBUG_PAGEALLOC
531 vunmap_page_range(start, end);
532 flush_tlb_kernel_range(start, end);
533#endif
534}
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552static unsigned long lazy_max_pages(void)
553{
554 unsigned int log;
555
556 log = fls(num_online_cpus());
557
558 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
559}
560
561static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
562
563
564static void purge_fragmented_blocks_allcpus(void);
565
566
567
568
569
570void set_iounmap_nonlazy(void)
571{
572 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
573}
574
575
576
577
578
579
580
581
582
583
584
585static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
586 int sync, int force_flush)
587{
588 static DEFINE_SPINLOCK(purge_lock);
589 LIST_HEAD(valist);
590 struct vmap_area *va;
591 struct vmap_area *n_va;
592 int nr = 0;
593
594
595
596
597
598
599 if (!sync && !force_flush) {
600 if (!spin_trylock(&purge_lock))
601 return;
602 } else
603 spin_lock(&purge_lock);
604
605 if (sync)
606 purge_fragmented_blocks_allcpus();
607
608 rcu_read_lock();
609 list_for_each_entry_rcu(va, &vmap_area_list, list) {
610 if (va->flags & VM_LAZY_FREE) {
611 if (va->va_start < *start)
612 *start = va->va_start;
613 if (va->va_end > *end)
614 *end = va->va_end;
615 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
616 list_add_tail(&va->purge_list, &valist);
617 va->flags |= VM_LAZY_FREEING;
618 va->flags &= ~VM_LAZY_FREE;
619 }
620 }
621 rcu_read_unlock();
622
623 if (nr)
624 atomic_sub(nr, &vmap_lazy_nr);
625
626 if (nr || force_flush)
627 flush_tlb_kernel_range(*start, *end);
628
629 if (nr) {
630 spin_lock(&vmap_area_lock);
631 list_for_each_entry_safe(va, n_va, &valist, purge_list)
632 __free_vmap_area(va);
633 spin_unlock(&vmap_area_lock);
634 }
635 spin_unlock(&purge_lock);
636}
637
638
639
640
641
642static void try_purge_vmap_area_lazy(void)
643{
644 unsigned long start = ULONG_MAX, end = 0;
645
646 __purge_vmap_area_lazy(&start, &end, 0, 0);
647}
648
649
650
651
652static void purge_vmap_area_lazy(void)
653{
654 unsigned long start = ULONG_MAX, end = 0;
655
656 __purge_vmap_area_lazy(&start, &end, 1, 0);
657}
658
659
660
661
662
663
664static void free_vmap_area_noflush(struct vmap_area *va)
665{
666 va->flags |= VM_LAZY_FREE;
667 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
668 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
669 try_purge_vmap_area_lazy();
670}
671
672
673
674
675
676static void free_unmap_vmap_area_noflush(struct vmap_area *va)
677{
678 unmap_vmap_area(va);
679 free_vmap_area_noflush(va);
680}
681
682
683
684
685static void free_unmap_vmap_area(struct vmap_area *va)
686{
687 flush_cache_vunmap(va->va_start, va->va_end);
688 free_unmap_vmap_area_noflush(va);
689}
690
691static struct vmap_area *find_vmap_area(unsigned long addr)
692{
693 struct vmap_area *va;
694
695 spin_lock(&vmap_area_lock);
696 va = __find_vmap_area(addr);
697 spin_unlock(&vmap_area_lock);
698
699 return va;
700}
701
702static void free_unmap_vmap_area_addr(unsigned long addr)
703{
704 struct vmap_area *va;
705
706 va = find_vmap_area(addr);
707 BUG_ON(!va);
708 free_unmap_vmap_area(va);
709}
710
711
712
713
714
715
716
717
718
719
720
721
722
723#if BITS_PER_LONG == 32
724#define VMALLOC_SPACE (128UL*1024*1024)
725#else
726#define VMALLOC_SPACE (128UL*1024*1024*1024)
727#endif
728
729#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
730#define VMAP_MAX_ALLOC BITS_PER_LONG
731#define VMAP_BBMAP_BITS_MAX 1024
732#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
733#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
734#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
735#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
736 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
737 VMALLOC_PAGES / NR_CPUS / 16))
738
739#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
740
741static bool vmap_initialized __read_mostly = false;
742
743struct vmap_block_queue {
744 spinlock_t lock;
745 struct list_head free;
746};
747
748struct vmap_block {
749 spinlock_t lock;
750 struct vmap_area *va;
751 struct vmap_block_queue *vbq;
752 unsigned long free, dirty;
753 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
754 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
755 struct list_head free_list;
756 struct rcu_head rcu_head;
757 struct list_head purge;
758};
759
760
761static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
762
763
764
765
766
767
768static DEFINE_SPINLOCK(vmap_block_tree_lock);
769static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
770
771
772
773
774
775
776
777
778static unsigned long addr_to_vb_idx(unsigned long addr)
779{
780 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
781 addr /= VMAP_BLOCK_SIZE;
782 return addr;
783}
784
785static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
786{
787 struct vmap_block_queue *vbq;
788 struct vmap_block *vb;
789 struct vmap_area *va;
790 unsigned long vb_idx;
791 int node, err;
792
793 node = numa_node_id();
794
795 vb = kmalloc_node(sizeof(struct vmap_block),
796 gfp_mask & GFP_RECLAIM_MASK, node);
797 if (unlikely(!vb))
798 return ERR_PTR(-ENOMEM);
799
800 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
801 VMALLOC_START, VMALLOC_END,
802 node, gfp_mask);
803 if (IS_ERR(va)) {
804 kfree(vb);
805 return ERR_CAST(va);
806 }
807
808 err = radix_tree_preload(gfp_mask);
809 if (unlikely(err)) {
810 kfree(vb);
811 free_vmap_area(va);
812 return ERR_PTR(err);
813 }
814
815 spin_lock_init(&vb->lock);
816 vb->va = va;
817 vb->free = VMAP_BBMAP_BITS;
818 vb->dirty = 0;
819 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
820 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
821 INIT_LIST_HEAD(&vb->free_list);
822
823 vb_idx = addr_to_vb_idx(va->va_start);
824 spin_lock(&vmap_block_tree_lock);
825 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
826 spin_unlock(&vmap_block_tree_lock);
827 BUG_ON(err);
828 radix_tree_preload_end();
829
830 vbq = &get_cpu_var(vmap_block_queue);
831 vb->vbq = vbq;
832 spin_lock(&vbq->lock);
833 list_add_rcu(&vb->free_list, &vbq->free);
834 spin_unlock(&vbq->lock);
835 put_cpu_var(vmap_block_queue);
836
837 return vb;
838}
839
840static void rcu_free_vb(struct rcu_head *head)
841{
842 struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
843
844 kfree(vb);
845}
846
847static void free_vmap_block(struct vmap_block *vb)
848{
849 struct vmap_block *tmp;
850 unsigned long vb_idx;
851
852 vb_idx = addr_to_vb_idx(vb->va->va_start);
853 spin_lock(&vmap_block_tree_lock);
854 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
855 spin_unlock(&vmap_block_tree_lock);
856 BUG_ON(tmp != vb);
857
858 free_vmap_area_noflush(vb->va);
859 call_rcu(&vb->rcu_head, rcu_free_vb);
860}
861
862static void purge_fragmented_blocks(int cpu)
863{
864 LIST_HEAD(purge);
865 struct vmap_block *vb;
866 struct vmap_block *n_vb;
867 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
868
869 rcu_read_lock();
870 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
871
872 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
873 continue;
874
875 spin_lock(&vb->lock);
876 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
877 vb->free = 0;
878 vb->dirty = VMAP_BBMAP_BITS;
879 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
880 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
881 spin_lock(&vbq->lock);
882 list_del_rcu(&vb->free_list);
883 spin_unlock(&vbq->lock);
884 spin_unlock(&vb->lock);
885 list_add_tail(&vb->purge, &purge);
886 } else
887 spin_unlock(&vb->lock);
888 }
889 rcu_read_unlock();
890
891 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
892 list_del(&vb->purge);
893 free_vmap_block(vb);
894 }
895}
896
897static void purge_fragmented_blocks_thiscpu(void)
898{
899 purge_fragmented_blocks(smp_processor_id());
900}
901
902static void purge_fragmented_blocks_allcpus(void)
903{
904 int cpu;
905
906 for_each_possible_cpu(cpu)
907 purge_fragmented_blocks(cpu);
908}
909
910static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
911{
912 struct vmap_block_queue *vbq;
913 struct vmap_block *vb;
914 unsigned long addr = 0;
915 unsigned int order;
916 int purge = 0;
917
918 BUG_ON(size & ~PAGE_MASK);
919 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
920 order = get_order(size);
921
922again:
923 rcu_read_lock();
924 vbq = &get_cpu_var(vmap_block_queue);
925 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
926 int i;
927
928 spin_lock(&vb->lock);
929 if (vb->free < 1UL << order)
930 goto next;
931
932 i = bitmap_find_free_region(vb->alloc_map,
933 VMAP_BBMAP_BITS, order);
934
935 if (i < 0) {
936 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
937
938 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
939 purge = 1;
940 }
941 goto next;
942 }
943 addr = vb->va->va_start + (i << PAGE_SHIFT);
944 BUG_ON(addr_to_vb_idx(addr) !=
945 addr_to_vb_idx(vb->va->va_start));
946 vb->free -= 1UL << order;
947 if (vb->free == 0) {
948 spin_lock(&vbq->lock);
949 list_del_rcu(&vb->free_list);
950 spin_unlock(&vbq->lock);
951 }
952 spin_unlock(&vb->lock);
953 break;
954next:
955 spin_unlock(&vb->lock);
956 }
957
958 if (purge)
959 purge_fragmented_blocks_thiscpu();
960
961 put_cpu_var(vmap_block_queue);
962 rcu_read_unlock();
963
964 if (!addr) {
965 vb = new_vmap_block(gfp_mask);
966 if (IS_ERR(vb))
967 return vb;
968 goto again;
969 }
970
971 return (void *)addr;
972}
973
974static void vb_free(const void *addr, unsigned long size)
975{
976 unsigned long offset;
977 unsigned long vb_idx;
978 unsigned int order;
979 struct vmap_block *vb;
980
981 BUG_ON(size & ~PAGE_MASK);
982 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
983
984 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
985
986 order = get_order(size);
987
988 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
989
990 vb_idx = addr_to_vb_idx((unsigned long)addr);
991 rcu_read_lock();
992 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
993 rcu_read_unlock();
994 BUG_ON(!vb);
995
996 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
997
998 spin_lock(&vb->lock);
999 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
1000
1001 vb->dirty += 1UL << order;
1002 if (vb->dirty == VMAP_BBMAP_BITS) {
1003 BUG_ON(vb->free);
1004 spin_unlock(&vb->lock);
1005 free_vmap_block(vb);
1006 } else
1007 spin_unlock(&vb->lock);
1008}
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023void vm_unmap_aliases(void)
1024{
1025 unsigned long start = ULONG_MAX, end = 0;
1026 int cpu;
1027 int flush = 0;
1028
1029 if (unlikely(!vmap_initialized))
1030 return;
1031
1032 for_each_possible_cpu(cpu) {
1033 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1034 struct vmap_block *vb;
1035
1036 rcu_read_lock();
1037 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1038 int i;
1039
1040 spin_lock(&vb->lock);
1041 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1042 while (i < VMAP_BBMAP_BITS) {
1043 unsigned long s, e;
1044 int j;
1045 j = find_next_zero_bit(vb->dirty_map,
1046 VMAP_BBMAP_BITS, i);
1047
1048 s = vb->va->va_start + (i << PAGE_SHIFT);
1049 e = vb->va->va_start + (j << PAGE_SHIFT);
1050 flush = 1;
1051
1052 if (s < start)
1053 start = s;
1054 if (e > end)
1055 end = e;
1056
1057 i = j;
1058 i = find_next_bit(vb->dirty_map,
1059 VMAP_BBMAP_BITS, i);
1060 }
1061 spin_unlock(&vb->lock);
1062 }
1063 rcu_read_unlock();
1064 }
1065
1066 __purge_vmap_area_lazy(&start, &end, 1, flush);
1067}
1068EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1069
1070
1071
1072
1073
1074
1075void vm_unmap_ram(const void *mem, unsigned int count)
1076{
1077 unsigned long size = count << PAGE_SHIFT;
1078 unsigned long addr = (unsigned long)mem;
1079
1080 BUG_ON(!addr);
1081 BUG_ON(addr < VMALLOC_START);
1082 BUG_ON(addr > VMALLOC_END);
1083 BUG_ON(addr & (PAGE_SIZE-1));
1084
1085 debug_check_no_locks_freed(mem, size);
1086 vmap_debug_free_range(addr, addr+size);
1087
1088 if (likely(count <= VMAP_MAX_ALLOC))
1089 vb_free(mem, size);
1090 else
1091 free_unmap_vmap_area_addr(addr);
1092}
1093EXPORT_SYMBOL(vm_unmap_ram);
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1105{
1106 unsigned long size = count << PAGE_SHIFT;
1107 unsigned long addr;
1108 void *mem;
1109
1110 if (likely(count <= VMAP_MAX_ALLOC)) {
1111 mem = vb_alloc(size, GFP_KERNEL);
1112 if (IS_ERR(mem))
1113 return NULL;
1114 addr = (unsigned long)mem;
1115 } else {
1116 struct vmap_area *va;
1117 va = alloc_vmap_area(size, PAGE_SIZE,
1118 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1119 if (IS_ERR(va))
1120 return NULL;
1121
1122 addr = va->va_start;
1123 mem = (void *)addr;
1124 }
1125 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1126 vm_unmap_ram(mem, count);
1127 return NULL;
1128 }
1129 return mem;
1130}
1131EXPORT_SYMBOL(vm_map_ram);
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1146{
1147 static size_t vm_init_off __initdata;
1148 unsigned long addr;
1149
1150 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1151 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1152
1153 vm->addr = (void *)addr;
1154
1155 vm->next = vmlist;
1156 vmlist = vm;
1157}
1158
1159void __init vmalloc_init(void)
1160{
1161 struct vmap_area *va;
1162 struct vm_struct *tmp;
1163 int i;
1164
1165 for_each_possible_cpu(i) {
1166 struct vmap_block_queue *vbq;
1167
1168 vbq = &per_cpu(vmap_block_queue, i);
1169 spin_lock_init(&vbq->lock);
1170 INIT_LIST_HEAD(&vbq->free);
1171 }
1172
1173
1174 for (tmp = vmlist; tmp; tmp = tmp->next) {
1175 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1176 va->flags = tmp->flags | VM_VM_AREA;
1177 va->va_start = (unsigned long)tmp->addr;
1178 va->va_end = va->va_start + tmp->size;
1179 __insert_vmap_area(va);
1180 }
1181
1182 vmap_area_pcpu_hole = VMALLOC_END;
1183
1184 vmap_initialized = true;
1185}
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1207 pgprot_t prot, struct page **pages)
1208{
1209 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1210}
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1227{
1228 vunmap_page_range(addr, addr + size);
1229}
1230EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240void unmap_kernel_range(unsigned long addr, unsigned long size)
1241{
1242 unsigned long end = addr + size;
1243
1244 flush_cache_vunmap(addr, end);
1245 vunmap_page_range(addr, end);
1246 flush_tlb_kernel_range(addr, end);
1247}
1248
1249int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1250{
1251 unsigned long addr = (unsigned long)area->addr;
1252 unsigned long end = addr + area->size - PAGE_SIZE;
1253 int err;
1254
1255 err = vmap_page_range(addr, end, prot, *pages);
1256 if (err > 0) {
1257 *pages += err;
1258 err = 0;
1259 }
1260
1261 return err;
1262}
1263EXPORT_SYMBOL_GPL(map_vm_area);
1264
1265
1266DEFINE_RWLOCK(vmlist_lock);
1267struct vm_struct *vmlist;
1268
1269static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1270 unsigned long flags, void *caller)
1271{
1272 struct vm_struct *tmp, **p;
1273
1274 vm->flags = flags;
1275 vm->addr = (void *)va->va_start;
1276 vm->size = va->va_end - va->va_start;
1277 vm->caller = caller;
1278 va->private = vm;
1279 va->flags |= VM_VM_AREA;
1280
1281 write_lock(&vmlist_lock);
1282 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1283 if (tmp->addr >= vm->addr)
1284 break;
1285 }
1286 vm->next = *p;
1287 *p = vm;
1288 write_unlock(&vmlist_lock);
1289}
1290
1291static struct vm_struct *__get_vm_area_node(unsigned long size,
1292 unsigned long align, unsigned long flags, unsigned long start,
1293 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1294{
1295 static struct vmap_area *va;
1296 struct vm_struct *area;
1297
1298 BUG_ON(in_interrupt());
1299 if (flags & VM_IOREMAP) {
1300 int bit = fls(size);
1301
1302 if (bit > IOREMAP_MAX_ORDER)
1303 bit = IOREMAP_MAX_ORDER;
1304 else if (bit < PAGE_SHIFT)
1305 bit = PAGE_SHIFT;
1306
1307 align = 1ul << bit;
1308 }
1309
1310 size = PAGE_ALIGN(size);
1311 if (unlikely(!size))
1312 return NULL;
1313
1314 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1315 if (unlikely(!area))
1316 return NULL;
1317
1318
1319
1320
1321 size += PAGE_SIZE;
1322
1323 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1324 if (IS_ERR(va)) {
1325 kfree(area);
1326 return NULL;
1327 }
1328
1329 insert_vmalloc_vm(area, va, flags, caller);
1330 return area;
1331}
1332
1333struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1334 unsigned long start, unsigned long end)
1335{
1336 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1337 __builtin_return_address(0));
1338}
1339EXPORT_SYMBOL_GPL(__get_vm_area);
1340
1341struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1342 unsigned long start, unsigned long end,
1343 void *caller)
1344{
1345 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1346 caller);
1347}
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1359{
1360 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1361 -1, GFP_KERNEL, __builtin_return_address(0));
1362}
1363
1364struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1365 void *caller)
1366{
1367 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1368 -1, GFP_KERNEL, caller);
1369}
1370
1371static struct vm_struct *find_vm_area(const void *addr)
1372{
1373 struct vmap_area *va;
1374
1375 va = find_vmap_area((unsigned long)addr);
1376 if (va && va->flags & VM_VM_AREA)
1377 return va->private;
1378
1379 return NULL;
1380}
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390struct vm_struct *remove_vm_area(const void *addr)
1391{
1392 struct vmap_area *va;
1393
1394 va = find_vmap_area((unsigned long)addr);
1395 if (va && va->flags & VM_VM_AREA) {
1396 struct vm_struct *vm = va->private;
1397 struct vm_struct *tmp, **p;
1398
1399
1400
1401
1402
1403 write_lock(&vmlist_lock);
1404 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1405 ;
1406 *p = tmp->next;
1407 write_unlock(&vmlist_lock);
1408
1409 vmap_debug_free_range(va->va_start, va->va_end);
1410 free_unmap_vmap_area(va);
1411 vm->size -= PAGE_SIZE;
1412
1413 return vm;
1414 }
1415 return NULL;
1416}
1417
1418static void __vunmap(const void *addr, int deallocate_pages)
1419{
1420 struct vm_struct *area;
1421
1422 if (!addr)
1423 return;
1424
1425 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1426 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1427 return;
1428 }
1429
1430 area = remove_vm_area(addr);
1431 if (unlikely(!area)) {
1432 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1433 addr);
1434 return;
1435 }
1436
1437 debug_check_no_locks_freed(addr, area->size);
1438 debug_check_no_obj_freed(addr, area->size);
1439
1440 if (deallocate_pages) {
1441 int i;
1442
1443 for (i = 0; i < area->nr_pages; i++) {
1444 struct page *page = area->pages[i];
1445
1446 BUG_ON(!page);
1447 __free_page(page);
1448 }
1449
1450 if (area->flags & VM_VPAGES)
1451 vfree(area->pages);
1452 else
1453 kfree(area->pages);
1454 }
1455
1456 kfree(area);
1457 return;
1458}
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470void vfree(const void *addr)
1471{
1472 BUG_ON(in_interrupt());
1473
1474 kmemleak_free(addr);
1475
1476 __vunmap(addr, 1);
1477}
1478EXPORT_SYMBOL(vfree);
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489void vunmap(const void *addr)
1490{
1491 BUG_ON(in_interrupt());
1492 might_sleep();
1493 __vunmap(addr, 0);
1494}
1495EXPORT_SYMBOL(vunmap);
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507void *vmap(struct page **pages, unsigned int count,
1508 unsigned long flags, pgprot_t prot)
1509{
1510 struct vm_struct *area;
1511
1512 might_sleep();
1513
1514 if (count > totalram_pages)
1515 return NULL;
1516
1517 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1518 __builtin_return_address(0));
1519 if (!area)
1520 return NULL;
1521
1522 if (map_vm_area(area, prot, &pages)) {
1523 vunmap(area->addr);
1524 return NULL;
1525 }
1526
1527 return area->addr;
1528}
1529EXPORT_SYMBOL(vmap);
1530
1531static void *__vmalloc_node(unsigned long size, unsigned long align,
1532 gfp_t gfp_mask, pgprot_t prot,
1533 int node, void *caller);
1534static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1535 pgprot_t prot, int node, void *caller)
1536{
1537 const int order = 0;
1538 struct page **pages;
1539 unsigned int nr_pages, array_size, i;
1540 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1541
1542 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1543 array_size = (nr_pages * sizeof(struct page *));
1544
1545 area->nr_pages = nr_pages;
1546
1547 if (array_size > PAGE_SIZE) {
1548 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1549 PAGE_KERNEL, node, caller);
1550 area->flags |= VM_VPAGES;
1551 } else {
1552 pages = kmalloc_node(array_size, nested_gfp, node);
1553 }
1554 area->pages = pages;
1555 area->caller = caller;
1556 if (!area->pages) {
1557 remove_vm_area(area->addr);
1558 kfree(area);
1559 return NULL;
1560 }
1561
1562 for (i = 0; i < area->nr_pages; i++) {
1563 struct page *page;
1564 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1565
1566 if (node < 0)
1567 page = alloc_page(tmp_mask);
1568 else
1569 page = alloc_pages_node(node, tmp_mask, order);
1570
1571 if (unlikely(!page)) {
1572
1573 area->nr_pages = i;
1574 goto fail;
1575 }
1576 area->pages[i] = page;
1577 }
1578
1579 if (map_vm_area(area, prot, &pages))
1580 goto fail;
1581 return area->addr;
1582
1583fail:
1584 warn_alloc_failed(gfp_mask, order, "vmalloc: allocation failure, "
1585 "allocated %ld of %ld bytes\n",
1586 (area->nr_pages*PAGE_SIZE), area->size);
1587 vfree(area->addr);
1588 return NULL;
1589}
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606void *__vmalloc_node_range(unsigned long size, unsigned long align,
1607 unsigned long start, unsigned long end, gfp_t gfp_mask,
1608 pgprot_t prot, int node, void *caller)
1609{
1610 struct vm_struct *area;
1611 void *addr;
1612 unsigned long real_size = size;
1613
1614 size = PAGE_ALIGN(size);
1615 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1616 return NULL;
1617
1618 area = __get_vm_area_node(size, align, VM_ALLOC, start, end, node,
1619 gfp_mask, caller);
1620
1621 if (!area)
1622 return NULL;
1623
1624 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1625
1626
1627
1628
1629
1630
1631 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1632
1633 return addr;
1634}
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649static void *__vmalloc_node(unsigned long size, unsigned long align,
1650 gfp_t gfp_mask, pgprot_t prot,
1651 int node, void *caller)
1652{
1653 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1654 gfp_mask, prot, node, caller);
1655}
1656
1657void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1658{
1659 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1660 __builtin_return_address(0));
1661}
1662EXPORT_SYMBOL(__vmalloc);
1663
1664static inline void *__vmalloc_node_flags(unsigned long size,
1665 int node, gfp_t flags)
1666{
1667 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1668 node, __builtin_return_address(0));
1669}
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680void *vmalloc(unsigned long size)
1681{
1682 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1683}
1684EXPORT_SYMBOL(vmalloc);
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696void *vzalloc(unsigned long size)
1697{
1698 return __vmalloc_node_flags(size, -1,
1699 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1700}
1701EXPORT_SYMBOL(vzalloc);
1702
1703
1704
1705
1706
1707
1708
1709
1710void *vmalloc_user(unsigned long size)
1711{
1712 struct vm_struct *area;
1713 void *ret;
1714
1715 ret = __vmalloc_node(size, SHMLBA,
1716 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1717 PAGE_KERNEL, -1, __builtin_return_address(0));
1718 if (ret) {
1719 area = find_vm_area(ret);
1720 area->flags |= VM_USERMAP;
1721 }
1722 return ret;
1723}
1724EXPORT_SYMBOL(vmalloc_user);
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737void *vmalloc_node(unsigned long size, int node)
1738{
1739 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1740 node, __builtin_return_address(0));
1741}
1742EXPORT_SYMBOL(vmalloc_node);
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756void *vzalloc_node(unsigned long size, int node)
1757{
1758 return __vmalloc_node_flags(size, node,
1759 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1760}
1761EXPORT_SYMBOL(vzalloc_node);
1762
1763#ifndef PAGE_KERNEL_EXEC
1764# define PAGE_KERNEL_EXEC PAGE_KERNEL
1765#endif
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779void *vmalloc_exec(unsigned long size)
1780{
1781 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1782 -1, __builtin_return_address(0));
1783}
1784
1785#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1786#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1787#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1788#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1789#else
1790#define GFP_VMALLOC32 GFP_KERNEL
1791#endif
1792
1793
1794
1795
1796
1797
1798
1799
1800void *vmalloc_32(unsigned long size)
1801{
1802 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1803 -1, __builtin_return_address(0));
1804}
1805EXPORT_SYMBOL(vmalloc_32);
1806
1807
1808
1809
1810
1811
1812
1813
1814void *vmalloc_32_user(unsigned long size)
1815{
1816 struct vm_struct *area;
1817 void *ret;
1818
1819 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1820 -1, __builtin_return_address(0));
1821 if (ret) {
1822 area = find_vm_area(ret);
1823 area->flags |= VM_USERMAP;
1824 }
1825 return ret;
1826}
1827EXPORT_SYMBOL(vmalloc_32_user);
1828
1829
1830
1831
1832
1833
1834static int aligned_vread(char *buf, char *addr, unsigned long count)
1835{
1836 struct page *p;
1837 int copied = 0;
1838
1839 while (count) {
1840 unsigned long offset, length;
1841
1842 offset = (unsigned long)addr & ~PAGE_MASK;
1843 length = PAGE_SIZE - offset;
1844 if (length > count)
1845 length = count;
1846 p = vmalloc_to_page(addr);
1847
1848
1849
1850
1851
1852
1853
1854 if (p) {
1855
1856
1857
1858
1859 void *map = kmap_atomic(p, KM_USER0);
1860 memcpy(buf, map + offset, length);
1861 kunmap_atomic(map, KM_USER0);
1862 } else
1863 memset(buf, 0, length);
1864
1865 addr += length;
1866 buf += length;
1867 copied += length;
1868 count -= length;
1869 }
1870 return copied;
1871}
1872
1873static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1874{
1875 struct page *p;
1876 int copied = 0;
1877
1878 while (count) {
1879 unsigned long offset, length;
1880
1881 offset = (unsigned long)addr & ~PAGE_MASK;
1882 length = PAGE_SIZE - offset;
1883 if (length > count)
1884 length = count;
1885 p = vmalloc_to_page(addr);
1886
1887
1888
1889
1890
1891
1892
1893 if (p) {
1894
1895
1896
1897
1898 void *map = kmap_atomic(p, KM_USER0);
1899 memcpy(map + offset, buf, length);
1900 kunmap_atomic(map, KM_USER0);
1901 }
1902 addr += length;
1903 buf += length;
1904 copied += length;
1905 count -= length;
1906 }
1907 return copied;
1908}
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938long vread(char *buf, char *addr, unsigned long count)
1939{
1940 struct vm_struct *tmp;
1941 char *vaddr, *buf_start = buf;
1942 unsigned long buflen = count;
1943 unsigned long n;
1944
1945
1946 if ((unsigned long) addr + count < count)
1947 count = -(unsigned long) addr;
1948
1949 read_lock(&vmlist_lock);
1950 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1951 vaddr = (char *) tmp->addr;
1952 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1953 continue;
1954 while (addr < vaddr) {
1955 if (count == 0)
1956 goto finished;
1957 *buf = '\0';
1958 buf++;
1959 addr++;
1960 count--;
1961 }
1962 n = vaddr + tmp->size - PAGE_SIZE - addr;
1963 if (n > count)
1964 n = count;
1965 if (!(tmp->flags & VM_IOREMAP))
1966 aligned_vread(buf, addr, n);
1967 else
1968 memset(buf, 0, n);
1969 buf += n;
1970 addr += n;
1971 count -= n;
1972 }
1973finished:
1974 read_unlock(&vmlist_lock);
1975
1976 if (buf == buf_start)
1977 return 0;
1978
1979 if (buf != buf_start + buflen)
1980 memset(buf, 0, buflen - (buf - buf_start));
1981
1982 return buflen;
1983}
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013long vwrite(char *buf, char *addr, unsigned long count)
2014{
2015 struct vm_struct *tmp;
2016 char *vaddr;
2017 unsigned long n, buflen;
2018 int copied = 0;
2019
2020
2021 if ((unsigned long) addr + count < count)
2022 count = -(unsigned long) addr;
2023 buflen = count;
2024
2025 read_lock(&vmlist_lock);
2026 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2027 vaddr = (char *) tmp->addr;
2028 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2029 continue;
2030 while (addr < vaddr) {
2031 if (count == 0)
2032 goto finished;
2033 buf++;
2034 addr++;
2035 count--;
2036 }
2037 n = vaddr + tmp->size - PAGE_SIZE - addr;
2038 if (n > count)
2039 n = count;
2040 if (!(tmp->flags & VM_IOREMAP)) {
2041 aligned_vwrite(buf, addr, n);
2042 copied++;
2043 }
2044 buf += n;
2045 addr += n;
2046 count -= n;
2047 }
2048finished:
2049 read_unlock(&vmlist_lock);
2050 if (!copied)
2051 return 0;
2052 return buflen;
2053}
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2070 unsigned long pgoff)
2071{
2072 struct vm_struct *area;
2073 unsigned long uaddr = vma->vm_start;
2074 unsigned long usize = vma->vm_end - vma->vm_start;
2075
2076 if ((PAGE_SIZE-1) & (unsigned long)addr)
2077 return -EINVAL;
2078
2079 area = find_vm_area(addr);
2080 if (!area)
2081 return -EINVAL;
2082
2083 if (!(area->flags & VM_USERMAP))
2084 return -EINVAL;
2085
2086 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2087 return -EINVAL;
2088
2089 addr += pgoff << PAGE_SHIFT;
2090 do {
2091 struct page *page = vmalloc_to_page(addr);
2092 int ret;
2093
2094 ret = vm_insert_page(vma, uaddr, page);
2095 if (ret)
2096 return ret;
2097
2098 uaddr += PAGE_SIZE;
2099 addr += PAGE_SIZE;
2100 usize -= PAGE_SIZE;
2101 } while (usize > 0);
2102
2103
2104 vma->vm_flags |= VM_RESERVED;
2105
2106 return 0;
2107}
2108EXPORT_SYMBOL(remap_vmalloc_range);
2109
2110
2111
2112
2113
2114void __attribute__((weak)) vmalloc_sync_all(void)
2115{
2116}
2117
2118
2119static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2120{
2121
2122 return 0;
2123}
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137struct vm_struct *alloc_vm_area(size_t size)
2138{
2139 struct vm_struct *area;
2140
2141 area = get_vm_area_caller(size, VM_IOREMAP,
2142 __builtin_return_address(0));
2143 if (area == NULL)
2144 return NULL;
2145
2146
2147
2148
2149
2150 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2151 area->size, f, NULL)) {
2152 free_vm_area(area);
2153 return NULL;
2154 }
2155
2156 return area;
2157}
2158EXPORT_SYMBOL_GPL(alloc_vm_area);
2159
2160void free_vm_area(struct vm_struct *area)
2161{
2162 struct vm_struct *ret;
2163 ret = remove_vm_area(area->addr);
2164 BUG_ON(ret != area);
2165 kfree(area);
2166}
2167EXPORT_SYMBOL_GPL(free_vm_area);
2168
2169#ifdef CONFIG_SMP
2170static struct vmap_area *node_to_va(struct rb_node *n)
2171{
2172 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2173}
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187static bool pvm_find_next_prev(unsigned long end,
2188 struct vmap_area **pnext,
2189 struct vmap_area **pprev)
2190{
2191 struct rb_node *n = vmap_area_root.rb_node;
2192 struct vmap_area *va = NULL;
2193
2194 while (n) {
2195 va = rb_entry(n, struct vmap_area, rb_node);
2196 if (end < va->va_end)
2197 n = n->rb_left;
2198 else if (end > va->va_end)
2199 n = n->rb_right;
2200 else
2201 break;
2202 }
2203
2204 if (!va)
2205 return false;
2206
2207 if (va->va_end > end) {
2208 *pnext = va;
2209 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2210 } else {
2211 *pprev = va;
2212 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2213 }
2214 return true;
2215}
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233static unsigned long pvm_determine_end(struct vmap_area **pnext,
2234 struct vmap_area **pprev,
2235 unsigned long align)
2236{
2237 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2238 unsigned long addr;
2239
2240 if (*pnext)
2241 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2242 else
2243 addr = vmalloc_end;
2244
2245 while (*pprev && (*pprev)->va_end > addr) {
2246 *pnext = *pprev;
2247 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2248 }
2249
2250 return addr;
2251}
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2278 const size_t *sizes, int nr_vms,
2279 size_t align)
2280{
2281 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2282 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2283 struct vmap_area **vas, *prev, *next;
2284 struct vm_struct **vms;
2285 int area, area2, last_area, term_area;
2286 unsigned long base, start, end, last_end;
2287 bool purged = false;
2288
2289
2290 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2291 for (last_area = 0, area = 0; area < nr_vms; area++) {
2292 start = offsets[area];
2293 end = start + sizes[area];
2294
2295
2296 BUG_ON(!IS_ALIGNED(offsets[area], align));
2297 BUG_ON(!IS_ALIGNED(sizes[area], align));
2298
2299
2300 if (start > offsets[last_area])
2301 last_area = area;
2302
2303 for (area2 = 0; area2 < nr_vms; area2++) {
2304 unsigned long start2 = offsets[area2];
2305 unsigned long end2 = start2 + sizes[area2];
2306
2307 if (area2 == area)
2308 continue;
2309
2310 BUG_ON(start2 >= start && start2 < end);
2311 BUG_ON(end2 <= end && end2 > start);
2312 }
2313 }
2314 last_end = offsets[last_area] + sizes[last_area];
2315
2316 if (vmalloc_end - vmalloc_start < last_end) {
2317 WARN_ON(true);
2318 return NULL;
2319 }
2320
2321 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
2322 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
2323 if (!vas || !vms)
2324 goto err_free;
2325
2326 for (area = 0; area < nr_vms; area++) {
2327 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2328 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2329 if (!vas[area] || !vms[area])
2330 goto err_free;
2331 }
2332retry:
2333 spin_lock(&vmap_area_lock);
2334
2335
2336 area = term_area = last_area;
2337 start = offsets[area];
2338 end = start + sizes[area];
2339
2340 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2341 base = vmalloc_end - last_end;
2342 goto found;
2343 }
2344 base = pvm_determine_end(&next, &prev, align) - end;
2345
2346 while (true) {
2347 BUG_ON(next && next->va_end <= base + end);
2348 BUG_ON(prev && prev->va_end > base + end);
2349
2350
2351
2352
2353
2354 if (base + last_end < vmalloc_start + last_end) {
2355 spin_unlock(&vmap_area_lock);
2356 if (!purged) {
2357 purge_vmap_area_lazy();
2358 purged = true;
2359 goto retry;
2360 }
2361 goto err_free;
2362 }
2363
2364
2365
2366
2367
2368 if (next && next->va_start < base + end) {
2369 base = pvm_determine_end(&next, &prev, align) - end;
2370 term_area = area;
2371 continue;
2372 }
2373
2374
2375
2376
2377
2378
2379 if (prev && prev->va_end > base + start) {
2380 next = prev;
2381 prev = node_to_va(rb_prev(&next->rb_node));
2382 base = pvm_determine_end(&next, &prev, align) - end;
2383 term_area = area;
2384 continue;
2385 }
2386
2387
2388
2389
2390
2391 area = (area + nr_vms - 1) % nr_vms;
2392 if (area == term_area)
2393 break;
2394 start = offsets[area];
2395 end = start + sizes[area];
2396 pvm_find_next_prev(base + end, &next, &prev);
2397 }
2398found:
2399
2400 for (area = 0; area < nr_vms; area++) {
2401 struct vmap_area *va = vas[area];
2402
2403 va->va_start = base + offsets[area];
2404 va->va_end = va->va_start + sizes[area];
2405 __insert_vmap_area(va);
2406 }
2407
2408 vmap_area_pcpu_hole = base + offsets[last_area];
2409
2410 spin_unlock(&vmap_area_lock);
2411
2412
2413 for (area = 0; area < nr_vms; area++)
2414 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2415 pcpu_get_vm_areas);
2416
2417 kfree(vas);
2418 return vms;
2419
2420err_free:
2421 for (area = 0; area < nr_vms; area++) {
2422 if (vas)
2423 kfree(vas[area]);
2424 if (vms)
2425 kfree(vms[area]);
2426 }
2427 kfree(vas);
2428 kfree(vms);
2429 return NULL;
2430}
2431
2432
2433
2434
2435
2436
2437
2438
2439void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2440{
2441 int i;
2442
2443 for (i = 0; i < nr_vms; i++)
2444 free_vm_area(vms[i]);
2445 kfree(vms);
2446}
2447#endif
2448
2449#ifdef CONFIG_PROC_FS
2450static void *s_start(struct seq_file *m, loff_t *pos)
2451 __acquires(&vmlist_lock)
2452{
2453 loff_t n = *pos;
2454 struct vm_struct *v;
2455
2456 read_lock(&vmlist_lock);
2457 v = vmlist;
2458 while (n > 0 && v) {
2459 n--;
2460 v = v->next;
2461 }
2462 if (!n)
2463 return v;
2464
2465 return NULL;
2466
2467}
2468
2469static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2470{
2471 struct vm_struct *v = p;
2472
2473 ++*pos;
2474 return v->next;
2475}
2476
2477static void s_stop(struct seq_file *m, void *p)
2478 __releases(&vmlist_lock)
2479{
2480 read_unlock(&vmlist_lock);
2481}
2482
2483static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2484{
2485 if (NUMA_BUILD) {
2486 unsigned int nr, *counters = m->private;
2487
2488 if (!counters)
2489 return;
2490
2491 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2492
2493 for (nr = 0; nr < v->nr_pages; nr++)
2494 counters[page_to_nid(v->pages[nr])]++;
2495
2496 for_each_node_state(nr, N_HIGH_MEMORY)
2497 if (counters[nr])
2498 seq_printf(m, " N%u=%u", nr, counters[nr]);
2499 }
2500}
2501
2502static int s_show(struct seq_file *m, void *p)
2503{
2504 struct vm_struct *v = p;
2505
2506 seq_printf(m, "0x%p-0x%p %7ld",
2507 v->addr, v->addr + v->size, v->size);
2508
2509 if (v->caller)
2510 seq_printf(m, " %pS", v->caller);
2511
2512 if (v->nr_pages)
2513 seq_printf(m, " pages=%d", v->nr_pages);
2514
2515 if (v->phys_addr)
2516 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2517
2518 if (v->flags & VM_IOREMAP)
2519 seq_printf(m, " ioremap");
2520
2521 if (v->flags & VM_ALLOC)
2522 seq_printf(m, " vmalloc");
2523
2524 if (v->flags & VM_MAP)
2525 seq_printf(m, " vmap");
2526
2527 if (v->flags & VM_USERMAP)
2528 seq_printf(m, " user");
2529
2530 if (v->flags & VM_VPAGES)
2531 seq_printf(m, " vpages");
2532
2533 show_numa_info(m, v);
2534 seq_putc(m, '\n');
2535 return 0;
2536}
2537
2538static const struct seq_operations vmalloc_op = {
2539 .start = s_start,
2540 .next = s_next,
2541 .stop = s_stop,
2542 .show = s_show,
2543};
2544
2545static int vmalloc_open(struct inode *inode, struct file *file)
2546{
2547 unsigned int *ptr = NULL;
2548 int ret;
2549
2550 if (NUMA_BUILD) {
2551 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2552 if (ptr == NULL)
2553 return -ENOMEM;
2554 }
2555 ret = seq_open(file, &vmalloc_op);
2556 if (!ret) {
2557 struct seq_file *m = file->private_data;
2558 m->private = ptr;
2559 } else
2560 kfree(ptr);
2561 return ret;
2562}
2563
2564static const struct file_operations proc_vmalloc_operations = {
2565 .open = vmalloc_open,
2566 .read = seq_read,
2567 .llseek = seq_lseek,
2568 .release = seq_release_private,
2569};
2570
2571static int __init proc_vmalloc_init(void)
2572{
2573 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2574 return 0;
2575}
2576module_init(proc_vmalloc_init);
2577#endif
2578
2579