1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <asm/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36
37static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
38{
39 pte_t *pte;
40
41 pte = pte_offset_kernel(pmd, addr);
42 do {
43 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
44 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
45 } while (pte++, addr += PAGE_SIZE, addr != end);
46}
47
48static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
49{
50 pmd_t *pmd;
51 unsigned long next;
52
53 pmd = pmd_offset(pud, addr);
54 do {
55 next = pmd_addr_end(addr, end);
56 if (pmd_none_or_clear_bad(pmd))
57 continue;
58 vunmap_pte_range(pmd, addr, next);
59 } while (pmd++, addr = next, addr != end);
60}
61
62static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
63{
64 pud_t *pud;
65 unsigned long next;
66
67 pud = pud_offset(pgd, addr);
68 do {
69 next = pud_addr_end(addr, end);
70 if (pud_none_or_clear_bad(pud))
71 continue;
72 vunmap_pmd_range(pud, addr, next);
73 } while (pud++, addr = next, addr != end);
74}
75
76static void vunmap_page_range(unsigned long addr, unsigned long end)
77{
78 pgd_t *pgd;
79 unsigned long next;
80
81 BUG_ON(addr >= end);
82 pgd = pgd_offset_k(addr);
83 do {
84 next = pgd_addr_end(addr, end);
85 if (pgd_none_or_clear_bad(pgd))
86 continue;
87 vunmap_pud_range(pgd, addr, next);
88 } while (pgd++, addr = next, addr != end);
89}
90
91static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
92 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
93{
94 pte_t *pte;
95
96
97
98
99
100
101 pte = pte_alloc_kernel(pmd, addr);
102 if (!pte)
103 return -ENOMEM;
104 do {
105 struct page *page = pages[*nr];
106
107 if (WARN_ON(!pte_none(*pte)))
108 return -EBUSY;
109 if (WARN_ON(!page))
110 return -ENOMEM;
111 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
112 (*nr)++;
113 } while (pte++, addr += PAGE_SIZE, addr != end);
114 return 0;
115}
116
117static int vmap_pmd_range(pud_t *pud, unsigned long addr,
118 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
119{
120 pmd_t *pmd;
121 unsigned long next;
122
123 pmd = pmd_alloc(&init_mm, pud, addr);
124 if (!pmd)
125 return -ENOMEM;
126 do {
127 next = pmd_addr_end(addr, end);
128 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
129 return -ENOMEM;
130 } while (pmd++, addr = next, addr != end);
131 return 0;
132}
133
134static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
135 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
136{
137 pud_t *pud;
138 unsigned long next;
139
140 pud = pud_alloc(&init_mm, pgd, addr);
141 if (!pud)
142 return -ENOMEM;
143 do {
144 next = pud_addr_end(addr, end);
145 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
146 return -ENOMEM;
147 } while (pud++, addr = next, addr != end);
148 return 0;
149}
150
151
152
153
154
155
156
157static int vmap_page_range_noflush(unsigned long start, unsigned long end,
158 pgprot_t prot, struct page **pages)
159{
160 pgd_t *pgd;
161 unsigned long next;
162 unsigned long addr = start;
163 int err = 0;
164 int nr = 0;
165
166 BUG_ON(addr >= end);
167 pgd = pgd_offset_k(addr);
168 do {
169 next = pgd_addr_end(addr, end);
170 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
171 if (err)
172 return err;
173 } while (pgd++, addr = next, addr != end);
174
175 return nr;
176}
177
178static int vmap_page_range(unsigned long start, unsigned long end,
179 pgprot_t prot, struct page **pages)
180{
181 int ret;
182
183 ret = vmap_page_range_noflush(start, end, prot, pages);
184 flush_cache_vmap(start, end);
185 return ret;
186}
187
188int is_vmalloc_or_module_addr(const void *x)
189{
190
191
192
193
194
195#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
196 unsigned long addr = (unsigned long)x;
197 if (addr >= MODULES_VADDR && addr < MODULES_END)
198 return 1;
199#endif
200 return is_vmalloc_addr(x);
201}
202
203
204
205
206struct page *vmalloc_to_page(const void *vmalloc_addr)
207{
208 unsigned long addr = (unsigned long) vmalloc_addr;
209 struct page *page = NULL;
210 pgd_t *pgd = pgd_offset_k(addr);
211
212
213
214
215
216 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
217
218 if (!pgd_none(*pgd)) {
219 pud_t *pud = pud_offset(pgd, addr);
220 if (!pud_none(*pud)) {
221 pmd_t *pmd = pmd_offset(pud, addr);
222 if (!pmd_none(*pmd)) {
223 pte_t *ptep, pte;
224
225 ptep = pte_offset_map(pmd, addr);
226 pte = *ptep;
227 if (pte_present(pte))
228 page = pte_page(pte);
229 pte_unmap(ptep);
230 }
231 }
232 }
233 return page;
234}
235EXPORT_SYMBOL(vmalloc_to_page);
236
237
238
239
240unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
241{
242 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
243}
244EXPORT_SYMBOL(vmalloc_to_pfn);
245
246
247
248
249#define VM_LAZY_FREE 0x01
250#define VM_LAZY_FREEING 0x02
251#define VM_VM_AREA 0x04
252
253struct vmap_area {
254 unsigned long va_start;
255 unsigned long va_end;
256 unsigned long flags;
257 struct rb_node rb_node;
258 struct list_head list;
259 struct list_head purge_list;
260 void *private;
261 struct rcu_head rcu_head;
262};
263
264static DEFINE_SPINLOCK(vmap_area_lock);
265static struct rb_root vmap_area_root = RB_ROOT;
266static LIST_HEAD(vmap_area_list);
267static unsigned long vmap_area_pcpu_hole;
268
269static struct vmap_area *__find_vmap_area(unsigned long addr)
270{
271 struct rb_node *n = vmap_area_root.rb_node;
272
273 while (n) {
274 struct vmap_area *va;
275
276 va = rb_entry(n, struct vmap_area, rb_node);
277 if (addr < va->va_start)
278 n = n->rb_left;
279 else if (addr > va->va_start)
280 n = n->rb_right;
281 else
282 return va;
283 }
284
285 return NULL;
286}
287
288static void __insert_vmap_area(struct vmap_area *va)
289{
290 struct rb_node **p = &vmap_area_root.rb_node;
291 struct rb_node *parent = NULL;
292 struct rb_node *tmp;
293
294 while (*p) {
295 struct vmap_area *tmp;
296
297 parent = *p;
298 tmp = rb_entry(parent, struct vmap_area, rb_node);
299 if (va->va_start < tmp->va_end)
300 p = &(*p)->rb_left;
301 else if (va->va_end > tmp->va_start)
302 p = &(*p)->rb_right;
303 else
304 BUG();
305 }
306
307 rb_link_node(&va->rb_node, parent, p);
308 rb_insert_color(&va->rb_node, &vmap_area_root);
309
310
311 tmp = rb_prev(&va->rb_node);
312 if (tmp) {
313 struct vmap_area *prev;
314 prev = rb_entry(tmp, struct vmap_area, rb_node);
315 list_add_rcu(&va->list, &prev->list);
316 } else
317 list_add_rcu(&va->list, &vmap_area_list);
318}
319
320static void purge_vmap_area_lazy(void);
321
322
323
324
325
326static struct vmap_area *alloc_vmap_area(unsigned long size,
327 unsigned long align,
328 unsigned long vstart, unsigned long vend,
329 int node, gfp_t gfp_mask)
330{
331 struct vmap_area *va;
332 struct rb_node *n;
333 unsigned long addr;
334 int purged = 0;
335
336 BUG_ON(!size);
337 BUG_ON(size & ~PAGE_MASK);
338
339 va = kmalloc_node(sizeof(struct vmap_area),
340 gfp_mask & GFP_RECLAIM_MASK, node);
341 if (unlikely(!va))
342 return ERR_PTR(-ENOMEM);
343
344retry:
345 addr = ALIGN(vstart, align);
346
347 spin_lock(&vmap_area_lock);
348 if (addr + size - 1 < addr)
349 goto overflow;
350
351
352 n = vmap_area_root.rb_node;
353 if (n) {
354 struct vmap_area *first = NULL;
355
356 do {
357 struct vmap_area *tmp;
358 tmp = rb_entry(n, struct vmap_area, rb_node);
359 if (tmp->va_end >= addr) {
360 if (!first && tmp->va_start < addr + size)
361 first = tmp;
362 n = n->rb_left;
363 } else {
364 first = tmp;
365 n = n->rb_right;
366 }
367 } while (n);
368
369 if (!first)
370 goto found;
371
372 if (first->va_end < addr) {
373 n = rb_next(&first->rb_node);
374 if (n)
375 first = rb_entry(n, struct vmap_area, rb_node);
376 else
377 goto found;
378 }
379
380 while (addr + size > first->va_start && addr + size <= vend) {
381 addr = ALIGN(first->va_end + PAGE_SIZE, align);
382 if (addr + size - 1 < addr)
383 goto overflow;
384
385 n = rb_next(&first->rb_node);
386 if (n)
387 first = rb_entry(n, struct vmap_area, rb_node);
388 else
389 goto found;
390 }
391 }
392found:
393 if (addr + size > vend) {
394overflow:
395 spin_unlock(&vmap_area_lock);
396 if (!purged) {
397 purge_vmap_area_lazy();
398 purged = 1;
399 goto retry;
400 }
401 if (printk_ratelimit())
402 printk(KERN_WARNING
403 "vmap allocation for size %lu failed: "
404 "use vmalloc=<size> to increase size.\n", size);
405 kfree(va);
406 return ERR_PTR(-EBUSY);
407 }
408
409 BUG_ON(addr & (align-1));
410
411 va->va_start = addr;
412 va->va_end = addr + size;
413 va->flags = 0;
414 __insert_vmap_area(va);
415 spin_unlock(&vmap_area_lock);
416
417 return va;
418}
419
420static void rcu_free_va(struct rcu_head *head)
421{
422 struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
423
424 kfree(va);
425}
426
427static void __free_vmap_area(struct vmap_area *va)
428{
429 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
430 rb_erase(&va->rb_node, &vmap_area_root);
431 RB_CLEAR_NODE(&va->rb_node);
432 list_del_rcu(&va->list);
433
434
435
436
437
438
439
440 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
441 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
442
443 call_rcu(&va->rcu_head, rcu_free_va);
444}
445
446
447
448
449static void free_vmap_area(struct vmap_area *va)
450{
451 spin_lock(&vmap_area_lock);
452 __free_vmap_area(va);
453 spin_unlock(&vmap_area_lock);
454}
455
456
457
458
459static void unmap_vmap_area(struct vmap_area *va)
460{
461 vunmap_page_range(va->va_start, va->va_end);
462}
463
464static void vmap_debug_free_range(unsigned long start, unsigned long end)
465{
466
467
468
469
470
471
472
473
474
475
476
477
478
479#ifdef CONFIG_DEBUG_PAGEALLOC
480 vunmap_page_range(start, end);
481 flush_tlb_kernel_range(start, end);
482#endif
483}
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501static unsigned long lazy_max_pages(void)
502{
503 unsigned int log;
504
505 log = fls(num_online_cpus());
506
507 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
508}
509
510static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
511
512
513static void purge_fragmented_blocks_allcpus(void);
514
515
516
517
518
519
520
521
522
523
524
525static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
526 int sync, int force_flush)
527{
528 static DEFINE_SPINLOCK(purge_lock);
529 LIST_HEAD(valist);
530 struct vmap_area *va;
531 struct vmap_area *n_va;
532 int nr = 0;
533
534
535
536
537
538
539 if (!sync && !force_flush) {
540 if (!spin_trylock(&purge_lock))
541 return;
542 } else
543 spin_lock(&purge_lock);
544
545 if (sync)
546 purge_fragmented_blocks_allcpus();
547
548 rcu_read_lock();
549 list_for_each_entry_rcu(va, &vmap_area_list, list) {
550 if (va->flags & VM_LAZY_FREE) {
551 if (va->va_start < *start)
552 *start = va->va_start;
553 if (va->va_end > *end)
554 *end = va->va_end;
555 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
556 unmap_vmap_area(va);
557 list_add_tail(&va->purge_list, &valist);
558 va->flags |= VM_LAZY_FREEING;
559 va->flags &= ~VM_LAZY_FREE;
560 }
561 }
562 rcu_read_unlock();
563
564 if (nr)
565 atomic_sub(nr, &vmap_lazy_nr);
566
567 if (nr || force_flush)
568 flush_tlb_kernel_range(*start, *end);
569
570 if (nr) {
571 spin_lock(&vmap_area_lock);
572 list_for_each_entry_safe(va, n_va, &valist, purge_list)
573 __free_vmap_area(va);
574 spin_unlock(&vmap_area_lock);
575 }
576 spin_unlock(&purge_lock);
577}
578
579
580
581
582
583static void try_purge_vmap_area_lazy(void)
584{
585 unsigned long start = ULONG_MAX, end = 0;
586
587 __purge_vmap_area_lazy(&start, &end, 0, 0);
588}
589
590
591
592
593static void purge_vmap_area_lazy(void)
594{
595 unsigned long start = ULONG_MAX, end = 0;
596
597 __purge_vmap_area_lazy(&start, &end, 1, 0);
598}
599
600
601
602
603
604static void free_unmap_vmap_area_noflush(struct vmap_area *va)
605{
606 va->flags |= VM_LAZY_FREE;
607 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
608 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
609 try_purge_vmap_area_lazy();
610}
611
612
613
614
615static void free_unmap_vmap_area(struct vmap_area *va)
616{
617 flush_cache_vunmap(va->va_start, va->va_end);
618 free_unmap_vmap_area_noflush(va);
619}
620
621static struct vmap_area *find_vmap_area(unsigned long addr)
622{
623 struct vmap_area *va;
624
625 spin_lock(&vmap_area_lock);
626 va = __find_vmap_area(addr);
627 spin_unlock(&vmap_area_lock);
628
629 return va;
630}
631
632static void free_unmap_vmap_area_addr(unsigned long addr)
633{
634 struct vmap_area *va;
635
636 va = find_vmap_area(addr);
637 BUG_ON(!va);
638 free_unmap_vmap_area(va);
639}
640
641
642
643
644
645
646
647
648
649
650
651
652
653#if BITS_PER_LONG == 32
654#define VMALLOC_SPACE (128UL*1024*1024)
655#else
656#define VMALLOC_SPACE (128UL*1024*1024*1024)
657#endif
658
659#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
660#define VMAP_MAX_ALLOC BITS_PER_LONG
661#define VMAP_BBMAP_BITS_MAX 1024
662#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
663#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
664#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
665#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
666 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
667 VMALLOC_PAGES / NR_CPUS / 16))
668
669#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
670
671static bool vmap_initialized __read_mostly = false;
672
673struct vmap_block_queue {
674 spinlock_t lock;
675 struct list_head free;
676};
677
678struct vmap_block {
679 spinlock_t lock;
680 struct vmap_area *va;
681 struct vmap_block_queue *vbq;
682 unsigned long free, dirty;
683 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
684 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
685 struct list_head free_list;
686 struct rcu_head rcu_head;
687 struct list_head purge;
688};
689
690
691static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
692
693
694
695
696
697
698static DEFINE_SPINLOCK(vmap_block_tree_lock);
699static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
700
701
702
703
704
705
706
707
708static unsigned long addr_to_vb_idx(unsigned long addr)
709{
710 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
711 addr /= VMAP_BLOCK_SIZE;
712 return addr;
713}
714
715static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
716{
717 struct vmap_block_queue *vbq;
718 struct vmap_block *vb;
719 struct vmap_area *va;
720 unsigned long vb_idx;
721 int node, err;
722
723 node = numa_node_id();
724
725 vb = kmalloc_node(sizeof(struct vmap_block),
726 gfp_mask & GFP_RECLAIM_MASK, node);
727 if (unlikely(!vb))
728 return ERR_PTR(-ENOMEM);
729
730 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
731 VMALLOC_START, VMALLOC_END,
732 node, gfp_mask);
733 if (unlikely(IS_ERR(va))) {
734 kfree(vb);
735 return ERR_PTR(PTR_ERR(va));
736 }
737
738 err = radix_tree_preload(gfp_mask);
739 if (unlikely(err)) {
740 kfree(vb);
741 free_vmap_area(va);
742 return ERR_PTR(err);
743 }
744
745 spin_lock_init(&vb->lock);
746 vb->va = va;
747 vb->free = VMAP_BBMAP_BITS;
748 vb->dirty = 0;
749 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
750 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
751 INIT_LIST_HEAD(&vb->free_list);
752
753 vb_idx = addr_to_vb_idx(va->va_start);
754 spin_lock(&vmap_block_tree_lock);
755 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
756 spin_unlock(&vmap_block_tree_lock);
757 BUG_ON(err);
758 radix_tree_preload_end();
759
760 vbq = &get_cpu_var(vmap_block_queue);
761 vb->vbq = vbq;
762 spin_lock(&vbq->lock);
763 list_add_rcu(&vb->free_list, &vbq->free);
764 spin_unlock(&vbq->lock);
765 put_cpu_var(vmap_block_queue);
766
767 return vb;
768}
769
770static void rcu_free_vb(struct rcu_head *head)
771{
772 struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
773
774 kfree(vb);
775}
776
777static void free_vmap_block(struct vmap_block *vb)
778{
779 struct vmap_block *tmp;
780 unsigned long vb_idx;
781
782 vb_idx = addr_to_vb_idx(vb->va->va_start);
783 spin_lock(&vmap_block_tree_lock);
784 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
785 spin_unlock(&vmap_block_tree_lock);
786 BUG_ON(tmp != vb);
787
788 free_unmap_vmap_area_noflush(vb->va);
789 call_rcu(&vb->rcu_head, rcu_free_vb);
790}
791
792static void purge_fragmented_blocks(int cpu)
793{
794 LIST_HEAD(purge);
795 struct vmap_block *vb;
796 struct vmap_block *n_vb;
797 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
798
799 rcu_read_lock();
800 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
801
802 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
803 continue;
804
805 spin_lock(&vb->lock);
806 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
807 vb->free = 0;
808 vb->dirty = VMAP_BBMAP_BITS;
809 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
810 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
811 spin_lock(&vbq->lock);
812 list_del_rcu(&vb->free_list);
813 spin_unlock(&vbq->lock);
814 spin_unlock(&vb->lock);
815 list_add_tail(&vb->purge, &purge);
816 } else
817 spin_unlock(&vb->lock);
818 }
819 rcu_read_unlock();
820
821 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
822 list_del(&vb->purge);
823 free_vmap_block(vb);
824 }
825}
826
827static void purge_fragmented_blocks_thiscpu(void)
828{
829 purge_fragmented_blocks(smp_processor_id());
830}
831
832static void purge_fragmented_blocks_allcpus(void)
833{
834 int cpu;
835
836 for_each_possible_cpu(cpu)
837 purge_fragmented_blocks(cpu);
838}
839
840static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
841{
842 struct vmap_block_queue *vbq;
843 struct vmap_block *vb;
844 unsigned long addr = 0;
845 unsigned int order;
846 int purge = 0;
847
848 BUG_ON(size & ~PAGE_MASK);
849 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
850 order = get_order(size);
851
852again:
853 rcu_read_lock();
854 vbq = &get_cpu_var(vmap_block_queue);
855 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
856 int i;
857
858 spin_lock(&vb->lock);
859 if (vb->free < 1UL << order)
860 goto next;
861
862 i = bitmap_find_free_region(vb->alloc_map,
863 VMAP_BBMAP_BITS, order);
864
865 if (i < 0) {
866 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
867
868 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
869 purge = 1;
870 }
871 goto next;
872 }
873 addr = vb->va->va_start + (i << PAGE_SHIFT);
874 BUG_ON(addr_to_vb_idx(addr) !=
875 addr_to_vb_idx(vb->va->va_start));
876 vb->free -= 1UL << order;
877 if (vb->free == 0) {
878 spin_lock(&vbq->lock);
879 list_del_rcu(&vb->free_list);
880 spin_unlock(&vbq->lock);
881 }
882 spin_unlock(&vb->lock);
883 break;
884next:
885 spin_unlock(&vb->lock);
886 }
887
888 if (purge)
889 purge_fragmented_blocks_thiscpu();
890
891 put_cpu_var(vmap_block_queue);
892 rcu_read_unlock();
893
894 if (!addr) {
895 vb = new_vmap_block(gfp_mask);
896 if (IS_ERR(vb))
897 return vb;
898 goto again;
899 }
900
901 return (void *)addr;
902}
903
904static void vb_free(const void *addr, unsigned long size)
905{
906 unsigned long offset;
907 unsigned long vb_idx;
908 unsigned int order;
909 struct vmap_block *vb;
910
911 BUG_ON(size & ~PAGE_MASK);
912 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
913
914 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
915
916 order = get_order(size);
917
918 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
919
920 vb_idx = addr_to_vb_idx((unsigned long)addr);
921 rcu_read_lock();
922 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
923 rcu_read_unlock();
924 BUG_ON(!vb);
925
926 spin_lock(&vb->lock);
927 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
928
929 vb->dirty += 1UL << order;
930 if (vb->dirty == VMAP_BBMAP_BITS) {
931 BUG_ON(vb->free);
932 spin_unlock(&vb->lock);
933 free_vmap_block(vb);
934 } else
935 spin_unlock(&vb->lock);
936}
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951void vm_unmap_aliases(void)
952{
953 unsigned long start = ULONG_MAX, end = 0;
954 int cpu;
955 int flush = 0;
956
957 if (unlikely(!vmap_initialized))
958 return;
959
960 for_each_possible_cpu(cpu) {
961 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
962 struct vmap_block *vb;
963
964 rcu_read_lock();
965 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
966 int i;
967
968 spin_lock(&vb->lock);
969 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
970 while (i < VMAP_BBMAP_BITS) {
971 unsigned long s, e;
972 int j;
973 j = find_next_zero_bit(vb->dirty_map,
974 VMAP_BBMAP_BITS, i);
975
976 s = vb->va->va_start + (i << PAGE_SHIFT);
977 e = vb->va->va_start + (j << PAGE_SHIFT);
978 vunmap_page_range(s, e);
979 flush = 1;
980
981 if (s < start)
982 start = s;
983 if (e > end)
984 end = e;
985
986 i = j;
987 i = find_next_bit(vb->dirty_map,
988 VMAP_BBMAP_BITS, i);
989 }
990 spin_unlock(&vb->lock);
991 }
992 rcu_read_unlock();
993 }
994
995 __purge_vmap_area_lazy(&start, &end, 1, flush);
996}
997EXPORT_SYMBOL_GPL(vm_unmap_aliases);
998
999
1000
1001
1002
1003
1004void vm_unmap_ram(const void *mem, unsigned int count)
1005{
1006 unsigned long size = count << PAGE_SHIFT;
1007 unsigned long addr = (unsigned long)mem;
1008
1009 BUG_ON(!addr);
1010 BUG_ON(addr < VMALLOC_START);
1011 BUG_ON(addr > VMALLOC_END);
1012 BUG_ON(addr & (PAGE_SIZE-1));
1013
1014 debug_check_no_locks_freed(mem, size);
1015 vmap_debug_free_range(addr, addr+size);
1016
1017 if (likely(count <= VMAP_MAX_ALLOC))
1018 vb_free(mem, size);
1019 else
1020 free_unmap_vmap_area_addr(addr);
1021}
1022EXPORT_SYMBOL(vm_unmap_ram);
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1034{
1035 unsigned long size = count << PAGE_SHIFT;
1036 unsigned long addr;
1037 void *mem;
1038
1039 if (likely(count <= VMAP_MAX_ALLOC)) {
1040 mem = vb_alloc(size, GFP_KERNEL);
1041 if (IS_ERR(mem))
1042 return NULL;
1043 addr = (unsigned long)mem;
1044 } else {
1045 struct vmap_area *va;
1046 va = alloc_vmap_area(size, PAGE_SIZE,
1047 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1048 if (IS_ERR(va))
1049 return NULL;
1050
1051 addr = va->va_start;
1052 mem = (void *)addr;
1053 }
1054 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1055 vm_unmap_ram(mem, count);
1056 return NULL;
1057 }
1058 return mem;
1059}
1060EXPORT_SYMBOL(vm_map_ram);
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1075{
1076 static size_t vm_init_off __initdata;
1077 unsigned long addr;
1078
1079 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1080 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1081
1082 vm->addr = (void *)addr;
1083
1084 vm->next = vmlist;
1085 vmlist = vm;
1086}
1087
1088void __init vmalloc_init(void)
1089{
1090 struct vmap_area *va;
1091 struct vm_struct *tmp;
1092 int i;
1093
1094 for_each_possible_cpu(i) {
1095 struct vmap_block_queue *vbq;
1096
1097 vbq = &per_cpu(vmap_block_queue, i);
1098 spin_lock_init(&vbq->lock);
1099 INIT_LIST_HEAD(&vbq->free);
1100 }
1101
1102
1103 for (tmp = vmlist; tmp; tmp = tmp->next) {
1104 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1105 va->flags = tmp->flags | VM_VM_AREA;
1106 va->va_start = (unsigned long)tmp->addr;
1107 va->va_end = va->va_start + tmp->size;
1108 __insert_vmap_area(va);
1109 }
1110
1111 vmap_area_pcpu_hole = VMALLOC_END;
1112
1113 vmap_initialized = true;
1114}
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1136 pgprot_t prot, struct page **pages)
1137{
1138 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1139}
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1156{
1157 vunmap_page_range(addr, addr + size);
1158}
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168void unmap_kernel_range(unsigned long addr, unsigned long size)
1169{
1170 unsigned long end = addr + size;
1171
1172 flush_cache_vunmap(addr, end);
1173 vunmap_page_range(addr, end);
1174 flush_tlb_kernel_range(addr, end);
1175}
1176
1177int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1178{
1179 unsigned long addr = (unsigned long)area->addr;
1180 unsigned long end = addr + area->size - PAGE_SIZE;
1181 int err;
1182
1183 err = vmap_page_range(addr, end, prot, *pages);
1184 if (err > 0) {
1185 *pages += err;
1186 err = 0;
1187 }
1188
1189 return err;
1190}
1191EXPORT_SYMBOL_GPL(map_vm_area);
1192
1193
1194DEFINE_RWLOCK(vmlist_lock);
1195struct vm_struct *vmlist;
1196
1197static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1198 unsigned long flags, void *caller)
1199{
1200 struct vm_struct *tmp, **p;
1201
1202 vm->flags = flags;
1203 vm->addr = (void *)va->va_start;
1204 vm->size = va->va_end - va->va_start;
1205 vm->caller = caller;
1206 va->private = vm;
1207 va->flags |= VM_VM_AREA;
1208
1209 write_lock(&vmlist_lock);
1210 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1211 if (tmp->addr >= vm->addr)
1212 break;
1213 }
1214 vm->next = *p;
1215 *p = vm;
1216 write_unlock(&vmlist_lock);
1217}
1218
1219static struct vm_struct *__get_vm_area_node(unsigned long size,
1220 unsigned long align, unsigned long flags, unsigned long start,
1221 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1222{
1223 static struct vmap_area *va;
1224 struct vm_struct *area;
1225
1226 BUG_ON(in_interrupt());
1227 if (flags & VM_IOREMAP) {
1228 int bit = fls(size);
1229
1230 if (bit > IOREMAP_MAX_ORDER)
1231 bit = IOREMAP_MAX_ORDER;
1232 else if (bit < PAGE_SHIFT)
1233 bit = PAGE_SHIFT;
1234
1235 align = 1ul << bit;
1236 }
1237
1238 size = PAGE_ALIGN(size);
1239 if (unlikely(!size))
1240 return NULL;
1241
1242 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1243 if (unlikely(!area))
1244 return NULL;
1245
1246
1247
1248
1249 size += PAGE_SIZE;
1250
1251 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1252 if (IS_ERR(va)) {
1253 kfree(area);
1254 return NULL;
1255 }
1256
1257 insert_vmalloc_vm(area, va, flags, caller);
1258 return area;
1259}
1260
1261struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1262 unsigned long start, unsigned long end)
1263{
1264 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1265 __builtin_return_address(0));
1266}
1267EXPORT_SYMBOL_GPL(__get_vm_area);
1268
1269struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1270 unsigned long start, unsigned long end,
1271 void *caller)
1272{
1273 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1274 caller);
1275}
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1287{
1288 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1289 -1, GFP_KERNEL, __builtin_return_address(0));
1290}
1291
1292struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1293 void *caller)
1294{
1295 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1296 -1, GFP_KERNEL, caller);
1297}
1298
1299struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
1300 int node, gfp_t gfp_mask)
1301{
1302 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1303 node, gfp_mask, __builtin_return_address(0));
1304}
1305
1306static struct vm_struct *find_vm_area(const void *addr)
1307{
1308 struct vmap_area *va;
1309
1310 va = find_vmap_area((unsigned long)addr);
1311 if (va && va->flags & VM_VM_AREA)
1312 return va->private;
1313
1314 return NULL;
1315}
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325struct vm_struct *remove_vm_area(const void *addr)
1326{
1327 struct vmap_area *va;
1328
1329 va = find_vmap_area((unsigned long)addr);
1330 if (va && va->flags & VM_VM_AREA) {
1331 struct vm_struct *vm = va->private;
1332 struct vm_struct *tmp, **p;
1333
1334
1335
1336
1337
1338 write_lock(&vmlist_lock);
1339 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1340 ;
1341 *p = tmp->next;
1342 write_unlock(&vmlist_lock);
1343
1344 vmap_debug_free_range(va->va_start, va->va_end);
1345 free_unmap_vmap_area(va);
1346 vm->size -= PAGE_SIZE;
1347
1348 return vm;
1349 }
1350 return NULL;
1351}
1352
1353static void __vunmap(const void *addr, int deallocate_pages)
1354{
1355 struct vm_struct *area;
1356
1357 if (!addr)
1358 return;
1359
1360 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1361 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1362 return;
1363 }
1364
1365 area = remove_vm_area(addr);
1366 if (unlikely(!area)) {
1367 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1368 addr);
1369 return;
1370 }
1371
1372 debug_check_no_locks_freed(addr, area->size);
1373 debug_check_no_obj_freed(addr, area->size);
1374
1375 if (deallocate_pages) {
1376 int i;
1377
1378 for (i = 0; i < area->nr_pages; i++) {
1379 struct page *page = area->pages[i];
1380
1381 BUG_ON(!page);
1382 __free_page(page);
1383 }
1384
1385 if (area->flags & VM_VPAGES)
1386 vfree(area->pages);
1387 else
1388 kfree(area->pages);
1389 }
1390
1391 kfree(area);
1392 return;
1393}
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405void vfree(const void *addr)
1406{
1407 BUG_ON(in_interrupt());
1408
1409 kmemleak_free(addr);
1410
1411 __vunmap(addr, 1);
1412}
1413EXPORT_SYMBOL(vfree);
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424void vunmap(const void *addr)
1425{
1426 BUG_ON(in_interrupt());
1427 might_sleep();
1428 __vunmap(addr, 0);
1429}
1430EXPORT_SYMBOL(vunmap);
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442void *vmap(struct page **pages, unsigned int count,
1443 unsigned long flags, pgprot_t prot)
1444{
1445 struct vm_struct *area;
1446
1447 might_sleep();
1448
1449 if (count > totalram_pages)
1450 return NULL;
1451
1452 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1453 __builtin_return_address(0));
1454 if (!area)
1455 return NULL;
1456
1457 if (map_vm_area(area, prot, &pages)) {
1458 vunmap(area->addr);
1459 return NULL;
1460 }
1461
1462 return area->addr;
1463}
1464EXPORT_SYMBOL(vmap);
1465
1466static void *__vmalloc_node(unsigned long size, unsigned long align,
1467 gfp_t gfp_mask, pgprot_t prot,
1468 int node, void *caller);
1469static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1470 pgprot_t prot, int node, void *caller)
1471{
1472 struct page **pages;
1473 unsigned int nr_pages, array_size, i;
1474 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1475
1476 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1477 array_size = (nr_pages * sizeof(struct page *));
1478
1479 area->nr_pages = nr_pages;
1480
1481 if (array_size > PAGE_SIZE) {
1482 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1483 PAGE_KERNEL, node, caller);
1484 area->flags |= VM_VPAGES;
1485 } else {
1486 pages = kmalloc_node(array_size, nested_gfp, node);
1487 }
1488 area->pages = pages;
1489 area->caller = caller;
1490 if (!area->pages) {
1491 remove_vm_area(area->addr);
1492 kfree(area);
1493 return NULL;
1494 }
1495
1496 for (i = 0; i < area->nr_pages; i++) {
1497 struct page *page;
1498
1499 if (node < 0)
1500 page = alloc_page(gfp_mask);
1501 else
1502 page = alloc_pages_node(node, gfp_mask, 0);
1503
1504 if (unlikely(!page)) {
1505
1506 area->nr_pages = i;
1507 goto fail;
1508 }
1509 area->pages[i] = page;
1510 }
1511
1512 if (map_vm_area(area, prot, &pages))
1513 goto fail;
1514 return area->addr;
1515
1516fail:
1517 vfree(area->addr);
1518 return NULL;
1519}
1520
1521void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
1522{
1523 void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1,
1524 __builtin_return_address(0));
1525
1526
1527
1528
1529
1530
1531 kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask);
1532
1533 return addr;
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549static void *__vmalloc_node(unsigned long size, unsigned long align,
1550 gfp_t gfp_mask, pgprot_t prot,
1551 int node, void *caller)
1552{
1553 struct vm_struct *area;
1554 void *addr;
1555 unsigned long real_size = size;
1556
1557 size = PAGE_ALIGN(size);
1558 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1559 return NULL;
1560
1561 area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
1562 VMALLOC_END, node, gfp_mask, caller);
1563
1564 if (!area)
1565 return NULL;
1566
1567 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1568
1569
1570
1571
1572
1573
1574 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1575
1576 return addr;
1577}
1578
1579void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1580{
1581 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1582 __builtin_return_address(0));
1583}
1584EXPORT_SYMBOL(__vmalloc);
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595void *vmalloc(unsigned long size)
1596{
1597 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1598 -1, __builtin_return_address(0));
1599}
1600EXPORT_SYMBOL(vmalloc);
1601
1602
1603
1604
1605
1606
1607
1608
1609void *vmalloc_user(unsigned long size)
1610{
1611 struct vm_struct *area;
1612 void *ret;
1613
1614 ret = __vmalloc_node(size, SHMLBA,
1615 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1616 PAGE_KERNEL, -1, __builtin_return_address(0));
1617 if (ret) {
1618 area = find_vm_area(ret);
1619 area->flags |= VM_USERMAP;
1620 }
1621 return ret;
1622}
1623EXPORT_SYMBOL(vmalloc_user);
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636void *vmalloc_node(unsigned long size, int node)
1637{
1638 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1639 node, __builtin_return_address(0));
1640}
1641EXPORT_SYMBOL(vmalloc_node);
1642
1643#ifndef PAGE_KERNEL_EXEC
1644# define PAGE_KERNEL_EXEC PAGE_KERNEL
1645#endif
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659void *vmalloc_exec(unsigned long size)
1660{
1661 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1662 -1, __builtin_return_address(0));
1663}
1664
1665#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1666#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1667#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1668#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1669#else
1670#define GFP_VMALLOC32 GFP_KERNEL
1671#endif
1672
1673
1674
1675
1676
1677
1678
1679
1680void *vmalloc_32(unsigned long size)
1681{
1682 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1683 -1, __builtin_return_address(0));
1684}
1685EXPORT_SYMBOL(vmalloc_32);
1686
1687
1688
1689
1690
1691
1692
1693
1694void *vmalloc_32_user(unsigned long size)
1695{
1696 struct vm_struct *area;
1697 void *ret;
1698
1699 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1700 -1, __builtin_return_address(0));
1701 if (ret) {
1702 area = find_vm_area(ret);
1703 area->flags |= VM_USERMAP;
1704 }
1705 return ret;
1706}
1707EXPORT_SYMBOL(vmalloc_32_user);
1708
1709
1710
1711
1712
1713
1714static int aligned_vread(char *buf, char *addr, unsigned long count)
1715{
1716 struct page *p;
1717 int copied = 0;
1718
1719 while (count) {
1720 unsigned long offset, length;
1721
1722 offset = (unsigned long)addr & ~PAGE_MASK;
1723 length = PAGE_SIZE - offset;
1724 if (length > count)
1725 length = count;
1726 p = vmalloc_to_page(addr);
1727
1728
1729
1730
1731
1732
1733
1734 if (p) {
1735
1736
1737
1738
1739 void *map = kmap_atomic(p, KM_USER0);
1740 memcpy(buf, map + offset, length);
1741 kunmap_atomic(map, KM_USER0);
1742 } else
1743 memset(buf, 0, length);
1744
1745 addr += length;
1746 buf += length;
1747 copied += length;
1748 count -= length;
1749 }
1750 return copied;
1751}
1752
1753static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1754{
1755 struct page *p;
1756 int copied = 0;
1757
1758 while (count) {
1759 unsigned long offset, length;
1760
1761 offset = (unsigned long)addr & ~PAGE_MASK;
1762 length = PAGE_SIZE - offset;
1763 if (length > count)
1764 length = count;
1765 p = vmalloc_to_page(addr);
1766
1767
1768
1769
1770
1771
1772
1773 if (p) {
1774
1775
1776
1777
1778 void *map = kmap_atomic(p, KM_USER0);
1779 memcpy(map + offset, buf, length);
1780 kunmap_atomic(map, KM_USER0);
1781 }
1782 addr += length;
1783 buf += length;
1784 copied += length;
1785 count -= length;
1786 }
1787 return copied;
1788}
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818long vread(char *buf, char *addr, unsigned long count)
1819{
1820 struct vm_struct *tmp;
1821 char *vaddr, *buf_start = buf;
1822 unsigned long buflen = count;
1823 unsigned long n;
1824
1825
1826 if ((unsigned long) addr + count < count)
1827 count = -(unsigned long) addr;
1828
1829 read_lock(&vmlist_lock);
1830 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1831 vaddr = (char *) tmp->addr;
1832 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1833 continue;
1834 while (addr < vaddr) {
1835 if (count == 0)
1836 goto finished;
1837 *buf = '\0';
1838 buf++;
1839 addr++;
1840 count--;
1841 }
1842 n = vaddr + tmp->size - PAGE_SIZE - addr;
1843 if (n > count)
1844 n = count;
1845 if (!(tmp->flags & VM_IOREMAP))
1846 aligned_vread(buf, addr, n);
1847 else
1848 memset(buf, 0, n);
1849 buf += n;
1850 addr += n;
1851 count -= n;
1852 }
1853finished:
1854 read_unlock(&vmlist_lock);
1855
1856 if (buf == buf_start)
1857 return 0;
1858
1859 if (buf != buf_start + buflen)
1860 memset(buf, 0, buflen - (buf - buf_start));
1861
1862 return buflen;
1863}
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895long vwrite(char *buf, char *addr, unsigned long count)
1896{
1897 struct vm_struct *tmp;
1898 char *vaddr;
1899 unsigned long n, buflen;
1900 int copied = 0;
1901
1902
1903 if ((unsigned long) addr + count < count)
1904 count = -(unsigned long) addr;
1905 buflen = count;
1906
1907 read_lock(&vmlist_lock);
1908 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1909 vaddr = (char *) tmp->addr;
1910 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1911 continue;
1912 while (addr < vaddr) {
1913 if (count == 0)
1914 goto finished;
1915 buf++;
1916 addr++;
1917 count--;
1918 }
1919 n = vaddr + tmp->size - PAGE_SIZE - addr;
1920 if (n > count)
1921 n = count;
1922 if (!(tmp->flags & VM_IOREMAP)) {
1923 aligned_vwrite(buf, addr, n);
1924 copied++;
1925 }
1926 buf += n;
1927 addr += n;
1928 count -= n;
1929 }
1930finished:
1931 read_unlock(&vmlist_lock);
1932 if (!copied)
1933 return 0;
1934 return buflen;
1935}
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
1952 unsigned long pgoff)
1953{
1954 struct vm_struct *area;
1955 unsigned long uaddr = vma->vm_start;
1956 unsigned long usize = vma->vm_end - vma->vm_start;
1957
1958 if ((PAGE_SIZE-1) & (unsigned long)addr)
1959 return -EINVAL;
1960
1961 area = find_vm_area(addr);
1962 if (!area)
1963 return -EINVAL;
1964
1965 if (!(area->flags & VM_USERMAP))
1966 return -EINVAL;
1967
1968 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
1969 return -EINVAL;
1970
1971 addr += pgoff << PAGE_SHIFT;
1972 do {
1973 struct page *page = vmalloc_to_page(addr);
1974 int ret;
1975
1976 ret = vm_insert_page(vma, uaddr, page);
1977 if (ret)
1978 return ret;
1979
1980 uaddr += PAGE_SIZE;
1981 addr += PAGE_SIZE;
1982 usize -= PAGE_SIZE;
1983 } while (usize > 0);
1984
1985
1986 vma->vm_flags |= VM_RESERVED;
1987
1988 return 0;
1989}
1990EXPORT_SYMBOL(remap_vmalloc_range);
1991
1992
1993
1994
1995
1996void __attribute__((weak)) vmalloc_sync_all(void)
1997{
1998}
1999
2000
2001static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2002{
2003
2004 return 0;
2005}
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019struct vm_struct *alloc_vm_area(size_t size)
2020{
2021 struct vm_struct *area;
2022
2023 area = get_vm_area_caller(size, VM_IOREMAP,
2024 __builtin_return_address(0));
2025 if (area == NULL)
2026 return NULL;
2027
2028
2029
2030
2031
2032 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2033 area->size, f, NULL)) {
2034 free_vm_area(area);
2035 return NULL;
2036 }
2037
2038
2039
2040 vmalloc_sync_all();
2041
2042 return area;
2043}
2044EXPORT_SYMBOL_GPL(alloc_vm_area);
2045
2046void free_vm_area(struct vm_struct *area)
2047{
2048 struct vm_struct *ret;
2049 ret = remove_vm_area(area->addr);
2050 BUG_ON(ret != area);
2051 kfree(area);
2052}
2053EXPORT_SYMBOL_GPL(free_vm_area);
2054
2055static struct vmap_area *node_to_va(struct rb_node *n)
2056{
2057 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2058}
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072static bool pvm_find_next_prev(unsigned long end,
2073 struct vmap_area **pnext,
2074 struct vmap_area **pprev)
2075{
2076 struct rb_node *n = vmap_area_root.rb_node;
2077 struct vmap_area *va = NULL;
2078
2079 while (n) {
2080 va = rb_entry(n, struct vmap_area, rb_node);
2081 if (end < va->va_end)
2082 n = n->rb_left;
2083 else if (end > va->va_end)
2084 n = n->rb_right;
2085 else
2086 break;
2087 }
2088
2089 if (!va)
2090 return false;
2091
2092 if (va->va_end > end) {
2093 *pnext = va;
2094 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2095 } else {
2096 *pprev = va;
2097 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2098 }
2099 return true;
2100}
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118static unsigned long pvm_determine_end(struct vmap_area **pnext,
2119 struct vmap_area **pprev,
2120 unsigned long align)
2121{
2122 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2123 unsigned long addr;
2124
2125 if (*pnext)
2126 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2127 else
2128 addr = vmalloc_end;
2129
2130 while (*pprev && (*pprev)->va_end > addr) {
2131 *pnext = *pprev;
2132 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2133 }
2134
2135 return addr;
2136}
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2164 const size_t *sizes, int nr_vms,
2165 size_t align, gfp_t gfp_mask)
2166{
2167 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2168 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2169 struct vmap_area **vas, *prev, *next;
2170 struct vm_struct **vms;
2171 int area, area2, last_area, term_area;
2172 unsigned long base, start, end, last_end;
2173 bool purged = false;
2174
2175 gfp_mask &= GFP_RECLAIM_MASK;
2176
2177
2178 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2179 for (last_area = 0, area = 0; area < nr_vms; area++) {
2180 start = offsets[area];
2181 end = start + sizes[area];
2182
2183
2184 BUG_ON(!IS_ALIGNED(offsets[area], align));
2185 BUG_ON(!IS_ALIGNED(sizes[area], align));
2186
2187
2188 if (start > offsets[last_area])
2189 last_area = area;
2190
2191 for (area2 = 0; area2 < nr_vms; area2++) {
2192 unsigned long start2 = offsets[area2];
2193 unsigned long end2 = start2 + sizes[area2];
2194
2195 if (area2 == area)
2196 continue;
2197
2198 BUG_ON(start2 >= start && start2 < end);
2199 BUG_ON(end2 <= end && end2 > start);
2200 }
2201 }
2202 last_end = offsets[last_area] + sizes[last_area];
2203
2204 if (vmalloc_end - vmalloc_start < last_end) {
2205 WARN_ON(true);
2206 return NULL;
2207 }
2208
2209 vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
2210 vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
2211 if (!vas || !vms)
2212 goto err_free;
2213
2214 for (area = 0; area < nr_vms; area++) {
2215 vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
2216 vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
2217 if (!vas[area] || !vms[area])
2218 goto err_free;
2219 }
2220retry:
2221 spin_lock(&vmap_area_lock);
2222
2223
2224 area = term_area = last_area;
2225 start = offsets[area];
2226 end = start + sizes[area];
2227
2228 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2229 base = vmalloc_end - last_end;
2230 goto found;
2231 }
2232 base = pvm_determine_end(&next, &prev, align) - end;
2233
2234 while (true) {
2235 BUG_ON(next && next->va_end <= base + end);
2236 BUG_ON(prev && prev->va_end > base + end);
2237
2238
2239
2240
2241
2242 if (base + last_end < vmalloc_start + last_end) {
2243 spin_unlock(&vmap_area_lock);
2244 if (!purged) {
2245 purge_vmap_area_lazy();
2246 purged = true;
2247 goto retry;
2248 }
2249 goto err_free;
2250 }
2251
2252
2253
2254
2255
2256 if (next && next->va_start < base + end) {
2257 base = pvm_determine_end(&next, &prev, align) - end;
2258 term_area = area;
2259 continue;
2260 }
2261
2262
2263
2264
2265
2266
2267 if (prev && prev->va_end > base + start) {
2268 next = prev;
2269 prev = node_to_va(rb_prev(&next->rb_node));
2270 base = pvm_determine_end(&next, &prev, align) - end;
2271 term_area = area;
2272 continue;
2273 }
2274
2275
2276
2277
2278
2279 area = (area + nr_vms - 1) % nr_vms;
2280 if (area == term_area)
2281 break;
2282 start = offsets[area];
2283 end = start + sizes[area];
2284 pvm_find_next_prev(base + end, &next, &prev);
2285 }
2286found:
2287
2288 for (area = 0; area < nr_vms; area++) {
2289 struct vmap_area *va = vas[area];
2290
2291 va->va_start = base + offsets[area];
2292 va->va_end = va->va_start + sizes[area];
2293 __insert_vmap_area(va);
2294 }
2295
2296 vmap_area_pcpu_hole = base + offsets[last_area];
2297
2298 spin_unlock(&vmap_area_lock);
2299
2300
2301 for (area = 0; area < nr_vms; area++)
2302 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2303 pcpu_get_vm_areas);
2304
2305 kfree(vas);
2306 return vms;
2307
2308err_free:
2309 for (area = 0; area < nr_vms; area++) {
2310 if (vas)
2311 kfree(vas[area]);
2312 if (vms)
2313 kfree(vms[area]);
2314 }
2315 kfree(vas);
2316 kfree(vms);
2317 return NULL;
2318}
2319
2320
2321
2322
2323
2324
2325
2326
2327void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2328{
2329 int i;
2330
2331 for (i = 0; i < nr_vms; i++)
2332 free_vm_area(vms[i]);
2333 kfree(vms);
2334}
2335
2336#ifdef CONFIG_PROC_FS
2337static void *s_start(struct seq_file *m, loff_t *pos)
2338{
2339 loff_t n = *pos;
2340 struct vm_struct *v;
2341
2342 read_lock(&vmlist_lock);
2343 v = vmlist;
2344 while (n > 0 && v) {
2345 n--;
2346 v = v->next;
2347 }
2348 if (!n)
2349 return v;
2350
2351 return NULL;
2352
2353}
2354
2355static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2356{
2357 struct vm_struct *v = p;
2358
2359 ++*pos;
2360 return v->next;
2361}
2362
2363static void s_stop(struct seq_file *m, void *p)
2364{
2365 read_unlock(&vmlist_lock);
2366}
2367
2368static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2369{
2370 if (NUMA_BUILD) {
2371 unsigned int nr, *counters = m->private;
2372
2373 if (!counters)
2374 return;
2375
2376 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2377
2378 for (nr = 0; nr < v->nr_pages; nr++)
2379 counters[page_to_nid(v->pages[nr])]++;
2380
2381 for_each_node_state(nr, N_HIGH_MEMORY)
2382 if (counters[nr])
2383 seq_printf(m, " N%u=%u", nr, counters[nr]);
2384 }
2385}
2386
2387static int s_show(struct seq_file *m, void *p)
2388{
2389 struct vm_struct *v = p;
2390
2391 seq_printf(m, "0x%p-0x%p %7ld",
2392 v->addr, v->addr + v->size, v->size);
2393
2394 if (v->caller) {
2395 char buff[KSYM_SYMBOL_LEN];
2396
2397 seq_putc(m, ' ');
2398 sprint_symbol(buff, (unsigned long)v->caller);
2399 seq_puts(m, buff);
2400 }
2401
2402 if (v->nr_pages)
2403 seq_printf(m, " pages=%d", v->nr_pages);
2404
2405 if (v->phys_addr)
2406 seq_printf(m, " phys=%lx", v->phys_addr);
2407
2408 if (v->flags & VM_IOREMAP)
2409 seq_printf(m, " ioremap");
2410
2411 if (v->flags & VM_ALLOC)
2412 seq_printf(m, " vmalloc");
2413
2414 if (v->flags & VM_MAP)
2415 seq_printf(m, " vmap");
2416
2417 if (v->flags & VM_USERMAP)
2418 seq_printf(m, " user");
2419
2420 if (v->flags & VM_VPAGES)
2421 seq_printf(m, " vpages");
2422
2423 show_numa_info(m, v);
2424 seq_putc(m, '\n');
2425 return 0;
2426}
2427
2428static const struct seq_operations vmalloc_op = {
2429 .start = s_start,
2430 .next = s_next,
2431 .stop = s_stop,
2432 .show = s_show,
2433};
2434
2435static int vmalloc_open(struct inode *inode, struct file *file)
2436{
2437 unsigned int *ptr = NULL;
2438 int ret;
2439
2440 if (NUMA_BUILD)
2441 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2442 ret = seq_open(file, &vmalloc_op);
2443 if (!ret) {
2444 struct seq_file *m = file->private_data;
2445 m->private = ptr;
2446 } else
2447 kfree(ptr);
2448 return ret;
2449}
2450
2451static const struct file_operations proc_vmalloc_operations = {
2452 .open = vmalloc_open,
2453 .read = seq_read,
2454 .llseek = seq_lseek,
2455 .release = seq_release_private,
2456};
2457
2458static int __init proc_vmalloc_init(void)
2459{
2460 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2461 return 0;
2462}
2463module_init(proc_vmalloc_init);
2464#endif
2465
2466