1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <asm/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 void *private;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static struct rb_root vmap_area_root = RB_ROOT;
265static LIST_HEAD(vmap_area_list);
266static unsigned long vmap_area_pcpu_hole;
267
268static struct vmap_area *__find_vmap_area(unsigned long addr)
269{
270 struct rb_node *n = vmap_area_root.rb_node;
271
272 while (n) {
273 struct vmap_area *va;
274
275 va = rb_entry(n, struct vmap_area, rb_node);
276 if (addr < va->va_start)
277 n = n->rb_left;
278 else if (addr > va->va_start)
279 n = n->rb_right;
280 else
281 return va;
282 }
283
284 return NULL;
285}
286
287static void __insert_vmap_area(struct vmap_area *va)
288{
289 struct rb_node **p = &vmap_area_root.rb_node;
290 struct rb_node *parent = NULL;
291 struct rb_node *tmp;
292
293 while (*p) {
294 struct vmap_area *tmp_va;
295
296 parent = *p;
297 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
298 if (va->va_start < tmp_va->va_end)
299 p = &(*p)->rb_left;
300 else if (va->va_end > tmp_va->va_start)
301 p = &(*p)->rb_right;
302 else
303 BUG();
304 }
305
306 rb_link_node(&va->rb_node, parent, p);
307 rb_insert_color(&va->rb_node, &vmap_area_root);
308
309
310 tmp = rb_prev(&va->rb_node);
311 if (tmp) {
312 struct vmap_area *prev;
313 prev = rb_entry(tmp, struct vmap_area, rb_node);
314 list_add_rcu(&va->list, &prev->list);
315 } else
316 list_add_rcu(&va->list, &vmap_area_list);
317}
318
319static void purge_vmap_area_lazy(void);
320
321
322
323
324
325static struct vmap_area *alloc_vmap_area(unsigned long size,
326 unsigned long align,
327 unsigned long vstart, unsigned long vend,
328 int node, gfp_t gfp_mask)
329{
330 struct vmap_area *va;
331 struct rb_node *n;
332 unsigned long addr;
333 int purged = 0;
334
335 BUG_ON(!size);
336 BUG_ON(size & ~PAGE_MASK);
337
338 va = kmalloc_node(sizeof(struct vmap_area),
339 gfp_mask & GFP_RECLAIM_MASK, node);
340 if (unlikely(!va))
341 return ERR_PTR(-ENOMEM);
342
343retry:
344 addr = ALIGN(vstart, align);
345
346 spin_lock(&vmap_area_lock);
347 if (addr + size - 1 < addr)
348 goto overflow;
349
350
351 n = vmap_area_root.rb_node;
352 if (n) {
353 struct vmap_area *first = NULL;
354
355 do {
356 struct vmap_area *tmp;
357 tmp = rb_entry(n, struct vmap_area, rb_node);
358 if (tmp->va_end >= addr) {
359 if (!first && tmp->va_start < addr + size)
360 first = tmp;
361 n = n->rb_left;
362 } else {
363 first = tmp;
364 n = n->rb_right;
365 }
366 } while (n);
367
368 if (!first)
369 goto found;
370
371 if (first->va_end < addr) {
372 n = rb_next(&first->rb_node);
373 if (n)
374 first = rb_entry(n, struct vmap_area, rb_node);
375 else
376 goto found;
377 }
378
379 while (addr + size > first->va_start && addr + size <= vend) {
380 addr = ALIGN(first->va_end + PAGE_SIZE, align);
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 n = rb_next(&first->rb_node);
385 if (n)
386 first = rb_entry(n, struct vmap_area, rb_node);
387 else
388 goto found;
389 }
390 }
391found:
392 if (addr + size > vend) {
393overflow:
394 spin_unlock(&vmap_area_lock);
395 if (!purged) {
396 purge_vmap_area_lazy();
397 purged = 1;
398 goto retry;
399 }
400 if (printk_ratelimit())
401 printk(KERN_WARNING
402 "vmap allocation for size %lu failed: "
403 "use vmalloc=<size> to increase size.\n", size);
404 kfree(va);
405 return ERR_PTR(-EBUSY);
406 }
407
408 BUG_ON(addr & (align-1));
409
410 va->va_start = addr;
411 va->va_end = addr + size;
412 va->flags = 0;
413 __insert_vmap_area(va);
414 spin_unlock(&vmap_area_lock);
415
416 return va;
417}
418
419static void rcu_free_va(struct rcu_head *head)
420{
421 struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
422
423 kfree(va);
424}
425
426static void __free_vmap_area(struct vmap_area *va)
427{
428 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
429 rb_erase(&va->rb_node, &vmap_area_root);
430 RB_CLEAR_NODE(&va->rb_node);
431 list_del_rcu(&va->list);
432
433
434
435
436
437
438
439 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
440 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
441
442 call_rcu(&va->rcu_head, rcu_free_va);
443}
444
445
446
447
448static void free_vmap_area(struct vmap_area *va)
449{
450 spin_lock(&vmap_area_lock);
451 __free_vmap_area(va);
452 spin_unlock(&vmap_area_lock);
453}
454
455
456
457
458static void unmap_vmap_area(struct vmap_area *va)
459{
460 vunmap_page_range(va->va_start, va->va_end);
461}
462
463static void vmap_debug_free_range(unsigned long start, unsigned long end)
464{
465
466
467
468
469
470
471
472
473
474
475
476
477
478#ifdef CONFIG_DEBUG_PAGEALLOC
479 vunmap_page_range(start, end);
480 flush_tlb_kernel_range(start, end);
481#endif
482}
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500static unsigned long lazy_max_pages(void)
501{
502 unsigned int log;
503
504 log = fls(num_online_cpus());
505
506 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
507}
508
509static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
510
511
512static void purge_fragmented_blocks_allcpus(void);
513
514
515
516
517
518void set_iounmap_nonlazy(void)
519{
520 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
521}
522
523
524
525
526
527
528
529
530
531
532
533static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
534 int sync, int force_flush)
535{
536 static DEFINE_SPINLOCK(purge_lock);
537 LIST_HEAD(valist);
538 struct vmap_area *va;
539 struct vmap_area *n_va;
540 int nr = 0;
541
542
543
544
545
546
547 if (!sync && !force_flush) {
548 if (!spin_trylock(&purge_lock))
549 return;
550 } else
551 spin_lock(&purge_lock);
552
553 if (sync)
554 purge_fragmented_blocks_allcpus();
555
556 rcu_read_lock();
557 list_for_each_entry_rcu(va, &vmap_area_list, list) {
558 if (va->flags & VM_LAZY_FREE) {
559 if (va->va_start < *start)
560 *start = va->va_start;
561 if (va->va_end > *end)
562 *end = va->va_end;
563 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
564 list_add_tail(&va->purge_list, &valist);
565 va->flags |= VM_LAZY_FREEING;
566 va->flags &= ~VM_LAZY_FREE;
567 }
568 }
569 rcu_read_unlock();
570
571 if (nr)
572 atomic_sub(nr, &vmap_lazy_nr);
573
574 if (nr || force_flush)
575 flush_tlb_kernel_range(*start, *end);
576
577 if (nr) {
578 spin_lock(&vmap_area_lock);
579 list_for_each_entry_safe(va, n_va, &valist, purge_list)
580 __free_vmap_area(va);
581 spin_unlock(&vmap_area_lock);
582 }
583 spin_unlock(&purge_lock);
584}
585
586
587
588
589
590static void try_purge_vmap_area_lazy(void)
591{
592 unsigned long start = ULONG_MAX, end = 0;
593
594 __purge_vmap_area_lazy(&start, &end, 0, 0);
595}
596
597
598
599
600static void purge_vmap_area_lazy(void)
601{
602 unsigned long start = ULONG_MAX, end = 0;
603
604 __purge_vmap_area_lazy(&start, &end, 1, 0);
605}
606
607
608
609
610
611
612static void free_vmap_area_noflush(struct vmap_area *va)
613{
614 va->flags |= VM_LAZY_FREE;
615 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
616 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
617 try_purge_vmap_area_lazy();
618}
619
620
621
622
623
624static void free_unmap_vmap_area_noflush(struct vmap_area *va)
625{
626 unmap_vmap_area(va);
627 free_vmap_area_noflush(va);
628}
629
630
631
632
633static void free_unmap_vmap_area(struct vmap_area *va)
634{
635 flush_cache_vunmap(va->va_start, va->va_end);
636 free_unmap_vmap_area_noflush(va);
637}
638
639static struct vmap_area *find_vmap_area(unsigned long addr)
640{
641 struct vmap_area *va;
642
643 spin_lock(&vmap_area_lock);
644 va = __find_vmap_area(addr);
645 spin_unlock(&vmap_area_lock);
646
647 return va;
648}
649
650static void free_unmap_vmap_area_addr(unsigned long addr)
651{
652 struct vmap_area *va;
653
654 va = find_vmap_area(addr);
655 BUG_ON(!va);
656 free_unmap_vmap_area(va);
657}
658
659
660
661
662
663
664
665
666
667
668
669
670
671#if BITS_PER_LONG == 32
672#define VMALLOC_SPACE (128UL*1024*1024)
673#else
674#define VMALLOC_SPACE (128UL*1024*1024*1024)
675#endif
676
677#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
678#define VMAP_MAX_ALLOC BITS_PER_LONG
679#define VMAP_BBMAP_BITS_MAX 1024
680#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
681#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
682#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
683#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
684 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
685 VMALLOC_PAGES / NR_CPUS / 16))
686
687#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
688
689static bool vmap_initialized __read_mostly = false;
690
691struct vmap_block_queue {
692 spinlock_t lock;
693 struct list_head free;
694};
695
696struct vmap_block {
697 spinlock_t lock;
698 struct vmap_area *va;
699 struct vmap_block_queue *vbq;
700 unsigned long free, dirty;
701 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
702 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
703 struct list_head free_list;
704 struct rcu_head rcu_head;
705 struct list_head purge;
706};
707
708
709static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
710
711
712
713
714
715
716static DEFINE_SPINLOCK(vmap_block_tree_lock);
717static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
718
719
720
721
722
723
724
725
726static unsigned long addr_to_vb_idx(unsigned long addr)
727{
728 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
729 addr /= VMAP_BLOCK_SIZE;
730 return addr;
731}
732
733static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
734{
735 struct vmap_block_queue *vbq;
736 struct vmap_block *vb;
737 struct vmap_area *va;
738 unsigned long vb_idx;
739 int node, err;
740
741 node = numa_node_id();
742
743 vb = kmalloc_node(sizeof(struct vmap_block),
744 gfp_mask & GFP_RECLAIM_MASK, node);
745 if (unlikely(!vb))
746 return ERR_PTR(-ENOMEM);
747
748 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
749 VMALLOC_START, VMALLOC_END,
750 node, gfp_mask);
751 if (unlikely(IS_ERR(va))) {
752 kfree(vb);
753 return ERR_CAST(va);
754 }
755
756 err = radix_tree_preload(gfp_mask);
757 if (unlikely(err)) {
758 kfree(vb);
759 free_vmap_area(va);
760 return ERR_PTR(err);
761 }
762
763 spin_lock_init(&vb->lock);
764 vb->va = va;
765 vb->free = VMAP_BBMAP_BITS;
766 vb->dirty = 0;
767 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
768 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
769 INIT_LIST_HEAD(&vb->free_list);
770
771 vb_idx = addr_to_vb_idx(va->va_start);
772 spin_lock(&vmap_block_tree_lock);
773 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
774 spin_unlock(&vmap_block_tree_lock);
775 BUG_ON(err);
776 radix_tree_preload_end();
777
778 vbq = &get_cpu_var(vmap_block_queue);
779 vb->vbq = vbq;
780 spin_lock(&vbq->lock);
781 list_add_rcu(&vb->free_list, &vbq->free);
782 spin_unlock(&vbq->lock);
783 put_cpu_var(vmap_block_queue);
784
785 return vb;
786}
787
788static void rcu_free_vb(struct rcu_head *head)
789{
790 struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
791
792 kfree(vb);
793}
794
795static void free_vmap_block(struct vmap_block *vb)
796{
797 struct vmap_block *tmp;
798 unsigned long vb_idx;
799
800 vb_idx = addr_to_vb_idx(vb->va->va_start);
801 spin_lock(&vmap_block_tree_lock);
802 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
803 spin_unlock(&vmap_block_tree_lock);
804 BUG_ON(tmp != vb);
805
806 free_vmap_area_noflush(vb->va);
807 call_rcu(&vb->rcu_head, rcu_free_vb);
808}
809
810static void purge_fragmented_blocks(int cpu)
811{
812 LIST_HEAD(purge);
813 struct vmap_block *vb;
814 struct vmap_block *n_vb;
815 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
816
817 rcu_read_lock();
818 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
819
820 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
821 continue;
822
823 spin_lock(&vb->lock);
824 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
825 vb->free = 0;
826 vb->dirty = VMAP_BBMAP_BITS;
827 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
828 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
829 spin_lock(&vbq->lock);
830 list_del_rcu(&vb->free_list);
831 spin_unlock(&vbq->lock);
832 spin_unlock(&vb->lock);
833 list_add_tail(&vb->purge, &purge);
834 } else
835 spin_unlock(&vb->lock);
836 }
837 rcu_read_unlock();
838
839 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
840 list_del(&vb->purge);
841 free_vmap_block(vb);
842 }
843}
844
845static void purge_fragmented_blocks_thiscpu(void)
846{
847 purge_fragmented_blocks(smp_processor_id());
848}
849
850static void purge_fragmented_blocks_allcpus(void)
851{
852 int cpu;
853
854 for_each_possible_cpu(cpu)
855 purge_fragmented_blocks(cpu);
856}
857
858static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
859{
860 struct vmap_block_queue *vbq;
861 struct vmap_block *vb;
862 unsigned long addr = 0;
863 unsigned int order;
864 int purge = 0;
865
866 BUG_ON(size & ~PAGE_MASK);
867 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
868 order = get_order(size);
869
870again:
871 rcu_read_lock();
872 vbq = &get_cpu_var(vmap_block_queue);
873 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
874 int i;
875
876 spin_lock(&vb->lock);
877 if (vb->free < 1UL << order)
878 goto next;
879
880 i = bitmap_find_free_region(vb->alloc_map,
881 VMAP_BBMAP_BITS, order);
882
883 if (i < 0) {
884 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
885
886 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
887 purge = 1;
888 }
889 goto next;
890 }
891 addr = vb->va->va_start + (i << PAGE_SHIFT);
892 BUG_ON(addr_to_vb_idx(addr) !=
893 addr_to_vb_idx(vb->va->va_start));
894 vb->free -= 1UL << order;
895 if (vb->free == 0) {
896 spin_lock(&vbq->lock);
897 list_del_rcu(&vb->free_list);
898 spin_unlock(&vbq->lock);
899 }
900 spin_unlock(&vb->lock);
901 break;
902next:
903 spin_unlock(&vb->lock);
904 }
905
906 if (purge)
907 purge_fragmented_blocks_thiscpu();
908
909 put_cpu_var(vmap_block_queue);
910 rcu_read_unlock();
911
912 if (!addr) {
913 vb = new_vmap_block(gfp_mask);
914 if (IS_ERR(vb))
915 return vb;
916 goto again;
917 }
918
919 return (void *)addr;
920}
921
922static void vb_free(const void *addr, unsigned long size)
923{
924 unsigned long offset;
925 unsigned long vb_idx;
926 unsigned int order;
927 struct vmap_block *vb;
928
929 BUG_ON(size & ~PAGE_MASK);
930 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
931
932 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
933
934 order = get_order(size);
935
936 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
937
938 vb_idx = addr_to_vb_idx((unsigned long)addr);
939 rcu_read_lock();
940 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
941 rcu_read_unlock();
942 BUG_ON(!vb);
943
944 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
945
946 spin_lock(&vb->lock);
947 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
948
949 vb->dirty += 1UL << order;
950 if (vb->dirty == VMAP_BBMAP_BITS) {
951 BUG_ON(vb->free);
952 spin_unlock(&vb->lock);
953 free_vmap_block(vb);
954 } else
955 spin_unlock(&vb->lock);
956}
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971void vm_unmap_aliases(void)
972{
973 unsigned long start = ULONG_MAX, end = 0;
974 int cpu;
975 int flush = 0;
976
977 if (unlikely(!vmap_initialized))
978 return;
979
980 for_each_possible_cpu(cpu) {
981 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
982 struct vmap_block *vb;
983
984 rcu_read_lock();
985 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
986 int i;
987
988 spin_lock(&vb->lock);
989 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
990 while (i < VMAP_BBMAP_BITS) {
991 unsigned long s, e;
992 int j;
993 j = find_next_zero_bit(vb->dirty_map,
994 VMAP_BBMAP_BITS, i);
995
996 s = vb->va->va_start + (i << PAGE_SHIFT);
997 e = vb->va->va_start + (j << PAGE_SHIFT);
998 flush = 1;
999
1000 if (s < start)
1001 start = s;
1002 if (e > end)
1003 end = e;
1004
1005 i = j;
1006 i = find_next_bit(vb->dirty_map,
1007 VMAP_BBMAP_BITS, i);
1008 }
1009 spin_unlock(&vb->lock);
1010 }
1011 rcu_read_unlock();
1012 }
1013
1014 __purge_vmap_area_lazy(&start, &end, 1, flush);
1015}
1016EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1017
1018
1019
1020
1021
1022
1023void vm_unmap_ram(const void *mem, unsigned int count)
1024{
1025 unsigned long size = count << PAGE_SHIFT;
1026 unsigned long addr = (unsigned long)mem;
1027
1028 BUG_ON(!addr);
1029 BUG_ON(addr < VMALLOC_START);
1030 BUG_ON(addr > VMALLOC_END);
1031 BUG_ON(addr & (PAGE_SIZE-1));
1032
1033 debug_check_no_locks_freed(mem, size);
1034 vmap_debug_free_range(addr, addr+size);
1035
1036 if (likely(count <= VMAP_MAX_ALLOC))
1037 vb_free(mem, size);
1038 else
1039 free_unmap_vmap_area_addr(addr);
1040}
1041EXPORT_SYMBOL(vm_unmap_ram);
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1053{
1054 unsigned long size = count << PAGE_SHIFT;
1055 unsigned long addr;
1056 void *mem;
1057
1058 if (likely(count <= VMAP_MAX_ALLOC)) {
1059 mem = vb_alloc(size, GFP_KERNEL);
1060 if (IS_ERR(mem))
1061 return NULL;
1062 addr = (unsigned long)mem;
1063 } else {
1064 struct vmap_area *va;
1065 va = alloc_vmap_area(size, PAGE_SIZE,
1066 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1067 if (IS_ERR(va))
1068 return NULL;
1069
1070 addr = va->va_start;
1071 mem = (void *)addr;
1072 }
1073 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1074 vm_unmap_ram(mem, count);
1075 return NULL;
1076 }
1077 return mem;
1078}
1079EXPORT_SYMBOL(vm_map_ram);
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1094{
1095 static size_t vm_init_off __initdata;
1096 unsigned long addr;
1097
1098 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1099 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1100
1101 vm->addr = (void *)addr;
1102
1103 vm->next = vmlist;
1104 vmlist = vm;
1105}
1106
1107void __init vmalloc_init(void)
1108{
1109 struct vmap_area *va;
1110 struct vm_struct *tmp;
1111 int i;
1112
1113 for_each_possible_cpu(i) {
1114 struct vmap_block_queue *vbq;
1115
1116 vbq = &per_cpu(vmap_block_queue, i);
1117 spin_lock_init(&vbq->lock);
1118 INIT_LIST_HEAD(&vbq->free);
1119 }
1120
1121
1122 for (tmp = vmlist; tmp; tmp = tmp->next) {
1123 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1124 va->flags = tmp->flags | VM_VM_AREA;
1125 va->va_start = (unsigned long)tmp->addr;
1126 va->va_end = va->va_start + tmp->size;
1127 __insert_vmap_area(va);
1128 }
1129
1130 vmap_area_pcpu_hole = VMALLOC_END;
1131
1132 vmap_initialized = true;
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1155 pgprot_t prot, struct page **pages)
1156{
1157 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1158}
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1175{
1176 vunmap_page_range(addr, addr + size);
1177}
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187void unmap_kernel_range(unsigned long addr, unsigned long size)
1188{
1189 unsigned long end = addr + size;
1190
1191 flush_cache_vunmap(addr, end);
1192 vunmap_page_range(addr, end);
1193 flush_tlb_kernel_range(addr, end);
1194}
1195
1196int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1197{
1198 unsigned long addr = (unsigned long)area->addr;
1199 unsigned long end = addr + area->size - PAGE_SIZE;
1200 int err;
1201
1202 err = vmap_page_range(addr, end, prot, *pages);
1203 if (err > 0) {
1204 *pages += err;
1205 err = 0;
1206 }
1207
1208 return err;
1209}
1210EXPORT_SYMBOL_GPL(map_vm_area);
1211
1212
1213DEFINE_RWLOCK(vmlist_lock);
1214struct vm_struct *vmlist;
1215
1216static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1217 unsigned long flags, void *caller)
1218{
1219 struct vm_struct *tmp, **p;
1220
1221 vm->flags = flags;
1222 vm->addr = (void *)va->va_start;
1223 vm->size = va->va_end - va->va_start;
1224 vm->caller = caller;
1225 va->private = vm;
1226 va->flags |= VM_VM_AREA;
1227
1228 write_lock(&vmlist_lock);
1229 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1230 if (tmp->addr >= vm->addr)
1231 break;
1232 }
1233 vm->next = *p;
1234 *p = vm;
1235 write_unlock(&vmlist_lock);
1236}
1237
1238static struct vm_struct *__get_vm_area_node(unsigned long size,
1239 unsigned long align, unsigned long flags, unsigned long start,
1240 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1241{
1242 static struct vmap_area *va;
1243 struct vm_struct *area;
1244
1245 BUG_ON(in_interrupt());
1246 if (flags & VM_IOREMAP) {
1247 int bit = fls(size);
1248
1249 if (bit > IOREMAP_MAX_ORDER)
1250 bit = IOREMAP_MAX_ORDER;
1251 else if (bit < PAGE_SHIFT)
1252 bit = PAGE_SHIFT;
1253
1254 align = 1ul << bit;
1255 }
1256
1257 size = PAGE_ALIGN(size);
1258 if (unlikely(!size))
1259 return NULL;
1260
1261 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1262 if (unlikely(!area))
1263 return NULL;
1264
1265
1266
1267
1268 size += PAGE_SIZE;
1269
1270 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1271 if (IS_ERR(va)) {
1272 kfree(area);
1273 return NULL;
1274 }
1275
1276 insert_vmalloc_vm(area, va, flags, caller);
1277 return area;
1278}
1279
1280struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1281 unsigned long start, unsigned long end)
1282{
1283 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1284 __builtin_return_address(0));
1285}
1286EXPORT_SYMBOL_GPL(__get_vm_area);
1287
1288struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1289 unsigned long start, unsigned long end,
1290 void *caller)
1291{
1292 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1293 caller);
1294}
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1306{
1307 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1308 -1, GFP_KERNEL, __builtin_return_address(0));
1309}
1310
1311struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1312 void *caller)
1313{
1314 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1315 -1, GFP_KERNEL, caller);
1316}
1317
1318struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
1319 int node, gfp_t gfp_mask)
1320{
1321 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1322 node, gfp_mask, __builtin_return_address(0));
1323}
1324
1325static struct vm_struct *find_vm_area(const void *addr)
1326{
1327 struct vmap_area *va;
1328
1329 va = find_vmap_area((unsigned long)addr);
1330 if (va && va->flags & VM_VM_AREA)
1331 return va->private;
1332
1333 return NULL;
1334}
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344struct vm_struct *remove_vm_area(const void *addr)
1345{
1346 struct vmap_area *va;
1347
1348 va = find_vmap_area((unsigned long)addr);
1349 if (va && va->flags & VM_VM_AREA) {
1350 struct vm_struct *vm = va->private;
1351 struct vm_struct *tmp, **p;
1352
1353
1354
1355
1356
1357 write_lock(&vmlist_lock);
1358 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1359 ;
1360 *p = tmp->next;
1361 write_unlock(&vmlist_lock);
1362
1363 vmap_debug_free_range(va->va_start, va->va_end);
1364 free_unmap_vmap_area(va);
1365 vm->size -= PAGE_SIZE;
1366
1367 return vm;
1368 }
1369 return NULL;
1370}
1371
1372static void __vunmap(const void *addr, int deallocate_pages)
1373{
1374 struct vm_struct *area;
1375
1376 if (!addr)
1377 return;
1378
1379 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1380 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1381 return;
1382 }
1383
1384 area = remove_vm_area(addr);
1385 if (unlikely(!area)) {
1386 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1387 addr);
1388 return;
1389 }
1390
1391 debug_check_no_locks_freed(addr, area->size);
1392 debug_check_no_obj_freed(addr, area->size);
1393
1394 if (deallocate_pages) {
1395 int i;
1396
1397 for (i = 0; i < area->nr_pages; i++) {
1398 struct page *page = area->pages[i];
1399
1400 BUG_ON(!page);
1401 __free_page(page);
1402 }
1403
1404 if (area->flags & VM_VPAGES)
1405 vfree(area->pages);
1406 else
1407 kfree(area->pages);
1408 }
1409
1410 kfree(area);
1411 return;
1412}
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424void vfree(const void *addr)
1425{
1426 BUG_ON(in_interrupt());
1427
1428 kmemleak_free(addr);
1429
1430 __vunmap(addr, 1);
1431}
1432EXPORT_SYMBOL(vfree);
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443void vunmap(const void *addr)
1444{
1445 BUG_ON(in_interrupt());
1446 might_sleep();
1447 __vunmap(addr, 0);
1448}
1449EXPORT_SYMBOL(vunmap);
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461void *vmap(struct page **pages, unsigned int count,
1462 unsigned long flags, pgprot_t prot)
1463{
1464 struct vm_struct *area;
1465
1466 might_sleep();
1467
1468 if (count > totalram_pages)
1469 return NULL;
1470
1471 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1472 __builtin_return_address(0));
1473 if (!area)
1474 return NULL;
1475
1476 if (map_vm_area(area, prot, &pages)) {
1477 vunmap(area->addr);
1478 return NULL;
1479 }
1480
1481 return area->addr;
1482}
1483EXPORT_SYMBOL(vmap);
1484
1485static void *__vmalloc_node(unsigned long size, unsigned long align,
1486 gfp_t gfp_mask, pgprot_t prot,
1487 int node, void *caller);
1488static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1489 pgprot_t prot, int node, void *caller)
1490{
1491 struct page **pages;
1492 unsigned int nr_pages, array_size, i;
1493 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1494
1495 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1496 array_size = (nr_pages * sizeof(struct page *));
1497
1498 area->nr_pages = nr_pages;
1499
1500 if (array_size > PAGE_SIZE) {
1501 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1502 PAGE_KERNEL, node, caller);
1503 area->flags |= VM_VPAGES;
1504 } else {
1505 pages = kmalloc_node(array_size, nested_gfp, node);
1506 }
1507 area->pages = pages;
1508 area->caller = caller;
1509 if (!area->pages) {
1510 remove_vm_area(area->addr);
1511 kfree(area);
1512 return NULL;
1513 }
1514
1515 for (i = 0; i < area->nr_pages; i++) {
1516 struct page *page;
1517
1518 if (node < 0)
1519 page = alloc_page(gfp_mask);
1520 else
1521 page = alloc_pages_node(node, gfp_mask, 0);
1522
1523 if (unlikely(!page)) {
1524
1525 area->nr_pages = i;
1526 goto fail;
1527 }
1528 area->pages[i] = page;
1529 }
1530
1531 if (map_vm_area(area, prot, &pages))
1532 goto fail;
1533 return area->addr;
1534
1535fail:
1536 vfree(area->addr);
1537 return NULL;
1538}
1539
1540void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
1541{
1542 void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1,
1543 __builtin_return_address(0));
1544
1545
1546
1547
1548
1549
1550 kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask);
1551
1552 return addr;
1553}
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568static void *__vmalloc_node(unsigned long size, unsigned long align,
1569 gfp_t gfp_mask, pgprot_t prot,
1570 int node, void *caller)
1571{
1572 struct vm_struct *area;
1573 void *addr;
1574 unsigned long real_size = size;
1575
1576 size = PAGE_ALIGN(size);
1577 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1578 return NULL;
1579
1580 area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
1581 VMALLOC_END, node, gfp_mask, caller);
1582
1583 if (!area)
1584 return NULL;
1585
1586 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1587
1588
1589
1590
1591
1592
1593 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1594
1595 return addr;
1596}
1597
1598void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1599{
1600 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1601 __builtin_return_address(0));
1602}
1603EXPORT_SYMBOL(__vmalloc);
1604
1605static inline void *__vmalloc_node_flags(unsigned long size,
1606 int node, gfp_t flags)
1607{
1608 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1609 node, __builtin_return_address(0));
1610}
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621void *vmalloc(unsigned long size)
1622{
1623 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1624}
1625EXPORT_SYMBOL(vmalloc);
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637void *vzalloc(unsigned long size)
1638{
1639 return __vmalloc_node_flags(size, -1,
1640 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1641}
1642EXPORT_SYMBOL(vzalloc);
1643
1644
1645
1646
1647
1648
1649
1650
1651void *vmalloc_user(unsigned long size)
1652{
1653 struct vm_struct *area;
1654 void *ret;
1655
1656 ret = __vmalloc_node(size, SHMLBA,
1657 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1658 PAGE_KERNEL, -1, __builtin_return_address(0));
1659 if (ret) {
1660 area = find_vm_area(ret);
1661 area->flags |= VM_USERMAP;
1662 }
1663 return ret;
1664}
1665EXPORT_SYMBOL(vmalloc_user);
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678void *vmalloc_node(unsigned long size, int node)
1679{
1680 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1681 node, __builtin_return_address(0));
1682}
1683EXPORT_SYMBOL(vmalloc_node);
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697void *vzalloc_node(unsigned long size, int node)
1698{
1699 return __vmalloc_node_flags(size, node,
1700 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1701}
1702EXPORT_SYMBOL(vzalloc_node);
1703
1704#ifndef PAGE_KERNEL_EXEC
1705# define PAGE_KERNEL_EXEC PAGE_KERNEL
1706#endif
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720void *vmalloc_exec(unsigned long size)
1721{
1722 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1723 -1, __builtin_return_address(0));
1724}
1725
1726#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1727#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1728#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1729#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1730#else
1731#define GFP_VMALLOC32 GFP_KERNEL
1732#endif
1733
1734
1735
1736
1737
1738
1739
1740
1741void *vmalloc_32(unsigned long size)
1742{
1743 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1744 -1, __builtin_return_address(0));
1745}
1746EXPORT_SYMBOL(vmalloc_32);
1747
1748
1749
1750
1751
1752
1753
1754
1755void *vmalloc_32_user(unsigned long size)
1756{
1757 struct vm_struct *area;
1758 void *ret;
1759
1760 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1761 -1, __builtin_return_address(0));
1762 if (ret) {
1763 area = find_vm_area(ret);
1764 area->flags |= VM_USERMAP;
1765 }
1766 return ret;
1767}
1768EXPORT_SYMBOL(vmalloc_32_user);
1769
1770
1771
1772
1773
1774
1775static int aligned_vread(char *buf, char *addr, unsigned long count)
1776{
1777 struct page *p;
1778 int copied = 0;
1779
1780 while (count) {
1781 unsigned long offset, length;
1782
1783 offset = (unsigned long)addr & ~PAGE_MASK;
1784 length = PAGE_SIZE - offset;
1785 if (length > count)
1786 length = count;
1787 p = vmalloc_to_page(addr);
1788
1789
1790
1791
1792
1793
1794
1795 if (p) {
1796
1797
1798
1799
1800 void *map = kmap_atomic(p, KM_USER0);
1801 memcpy(buf, map + offset, length);
1802 kunmap_atomic(map, KM_USER0);
1803 } else
1804 memset(buf, 0, length);
1805
1806 addr += length;
1807 buf += length;
1808 copied += length;
1809 count -= length;
1810 }
1811 return copied;
1812}
1813
1814static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1815{
1816 struct page *p;
1817 int copied = 0;
1818
1819 while (count) {
1820 unsigned long offset, length;
1821
1822 offset = (unsigned long)addr & ~PAGE_MASK;
1823 length = PAGE_SIZE - offset;
1824 if (length > count)
1825 length = count;
1826 p = vmalloc_to_page(addr);
1827
1828
1829
1830
1831
1832
1833
1834 if (p) {
1835
1836
1837
1838
1839 void *map = kmap_atomic(p, KM_USER0);
1840 memcpy(map + offset, buf, length);
1841 kunmap_atomic(map, KM_USER0);
1842 }
1843 addr += length;
1844 buf += length;
1845 copied += length;
1846 count -= length;
1847 }
1848 return copied;
1849}
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879long vread(char *buf, char *addr, unsigned long count)
1880{
1881 struct vm_struct *tmp;
1882 char *vaddr, *buf_start = buf;
1883 unsigned long buflen = count;
1884 unsigned long n;
1885
1886
1887 if ((unsigned long) addr + count < count)
1888 count = -(unsigned long) addr;
1889
1890 read_lock(&vmlist_lock);
1891 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1892 vaddr = (char *) tmp->addr;
1893 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1894 continue;
1895 while (addr < vaddr) {
1896 if (count == 0)
1897 goto finished;
1898 *buf = '\0';
1899 buf++;
1900 addr++;
1901 count--;
1902 }
1903 n = vaddr + tmp->size - PAGE_SIZE - addr;
1904 if (n > count)
1905 n = count;
1906 if (!(tmp->flags & VM_IOREMAP))
1907 aligned_vread(buf, addr, n);
1908 else
1909 memset(buf, 0, n);
1910 buf += n;
1911 addr += n;
1912 count -= n;
1913 }
1914finished:
1915 read_unlock(&vmlist_lock);
1916
1917 if (buf == buf_start)
1918 return 0;
1919
1920 if (buf != buf_start + buflen)
1921 memset(buf, 0, buflen - (buf - buf_start));
1922
1923 return buflen;
1924}
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956long vwrite(char *buf, char *addr, unsigned long count)
1957{
1958 struct vm_struct *tmp;
1959 char *vaddr;
1960 unsigned long n, buflen;
1961 int copied = 0;
1962
1963
1964 if ((unsigned long) addr + count < count)
1965 count = -(unsigned long) addr;
1966 buflen = count;
1967
1968 read_lock(&vmlist_lock);
1969 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1970 vaddr = (char *) tmp->addr;
1971 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1972 continue;
1973 while (addr < vaddr) {
1974 if (count == 0)
1975 goto finished;
1976 buf++;
1977 addr++;
1978 count--;
1979 }
1980 n = vaddr + tmp->size - PAGE_SIZE - addr;
1981 if (n > count)
1982 n = count;
1983 if (!(tmp->flags & VM_IOREMAP)) {
1984 aligned_vwrite(buf, addr, n);
1985 copied++;
1986 }
1987 buf += n;
1988 addr += n;
1989 count -= n;
1990 }
1991finished:
1992 read_unlock(&vmlist_lock);
1993 if (!copied)
1994 return 0;
1995 return buflen;
1996}
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2013 unsigned long pgoff)
2014{
2015 struct vm_struct *area;
2016 unsigned long uaddr = vma->vm_start;
2017 unsigned long usize = vma->vm_end - vma->vm_start;
2018
2019 if ((PAGE_SIZE-1) & (unsigned long)addr)
2020 return -EINVAL;
2021
2022 area = find_vm_area(addr);
2023 if (!area)
2024 return -EINVAL;
2025
2026 if (!(area->flags & VM_USERMAP))
2027 return -EINVAL;
2028
2029 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2030 return -EINVAL;
2031
2032 addr += pgoff << PAGE_SHIFT;
2033 do {
2034 struct page *page = vmalloc_to_page(addr);
2035 int ret;
2036
2037 ret = vm_insert_page(vma, uaddr, page);
2038 if (ret)
2039 return ret;
2040
2041 uaddr += PAGE_SIZE;
2042 addr += PAGE_SIZE;
2043 usize -= PAGE_SIZE;
2044 } while (usize > 0);
2045
2046
2047 vma->vm_flags |= VM_RESERVED;
2048
2049 return 0;
2050}
2051EXPORT_SYMBOL(remap_vmalloc_range);
2052
2053
2054
2055
2056
2057void __attribute__((weak)) vmalloc_sync_all(void)
2058{
2059}
2060
2061
2062static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2063{
2064
2065 return 0;
2066}
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080struct vm_struct *alloc_vm_area(size_t size)
2081{
2082 struct vm_struct *area;
2083
2084 area = get_vm_area_caller(size, VM_IOREMAP,
2085 __builtin_return_address(0));
2086 if (area == NULL)
2087 return NULL;
2088
2089
2090
2091
2092
2093 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2094 area->size, f, NULL)) {
2095 free_vm_area(area);
2096 return NULL;
2097 }
2098
2099
2100
2101 vmalloc_sync_all();
2102
2103 return area;
2104}
2105EXPORT_SYMBOL_GPL(alloc_vm_area);
2106
2107void free_vm_area(struct vm_struct *area)
2108{
2109 struct vm_struct *ret;
2110 ret = remove_vm_area(area->addr);
2111 BUG_ON(ret != area);
2112 kfree(area);
2113}
2114EXPORT_SYMBOL_GPL(free_vm_area);
2115
2116#ifdef CONFIG_SMP
2117static struct vmap_area *node_to_va(struct rb_node *n)
2118{
2119 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2120}
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134static bool pvm_find_next_prev(unsigned long end,
2135 struct vmap_area **pnext,
2136 struct vmap_area **pprev)
2137{
2138 struct rb_node *n = vmap_area_root.rb_node;
2139 struct vmap_area *va = NULL;
2140
2141 while (n) {
2142 va = rb_entry(n, struct vmap_area, rb_node);
2143 if (end < va->va_end)
2144 n = n->rb_left;
2145 else if (end > va->va_end)
2146 n = n->rb_right;
2147 else
2148 break;
2149 }
2150
2151 if (!va)
2152 return false;
2153
2154 if (va->va_end > end) {
2155 *pnext = va;
2156 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2157 } else {
2158 *pprev = va;
2159 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2160 }
2161 return true;
2162}
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180static unsigned long pvm_determine_end(struct vmap_area **pnext,
2181 struct vmap_area **pprev,
2182 unsigned long align)
2183{
2184 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2185 unsigned long addr;
2186
2187 if (*pnext)
2188 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2189 else
2190 addr = vmalloc_end;
2191
2192 while (*pprev && (*pprev)->va_end > addr) {
2193 *pnext = *pprev;
2194 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2195 }
2196
2197 return addr;
2198}
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2226 const size_t *sizes, int nr_vms,
2227 size_t align, gfp_t gfp_mask)
2228{
2229 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2230 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2231 struct vmap_area **vas, *prev, *next;
2232 struct vm_struct **vms;
2233 int area, area2, last_area, term_area;
2234 unsigned long base, start, end, last_end;
2235 bool purged = false;
2236
2237 gfp_mask &= GFP_RECLAIM_MASK;
2238
2239
2240 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2241 for (last_area = 0, area = 0; area < nr_vms; area++) {
2242 start = offsets[area];
2243 end = start + sizes[area];
2244
2245
2246 BUG_ON(!IS_ALIGNED(offsets[area], align));
2247 BUG_ON(!IS_ALIGNED(sizes[area], align));
2248
2249
2250 if (start > offsets[last_area])
2251 last_area = area;
2252
2253 for (area2 = 0; area2 < nr_vms; area2++) {
2254 unsigned long start2 = offsets[area2];
2255 unsigned long end2 = start2 + sizes[area2];
2256
2257 if (area2 == area)
2258 continue;
2259
2260 BUG_ON(start2 >= start && start2 < end);
2261 BUG_ON(end2 <= end && end2 > start);
2262 }
2263 }
2264 last_end = offsets[last_area] + sizes[last_area];
2265
2266 if (vmalloc_end - vmalloc_start < last_end) {
2267 WARN_ON(true);
2268 return NULL;
2269 }
2270
2271 vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
2272 vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
2273 if (!vas || !vms)
2274 goto err_free;
2275
2276 for (area = 0; area < nr_vms; area++) {
2277 vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
2278 vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
2279 if (!vas[area] || !vms[area])
2280 goto err_free;
2281 }
2282retry:
2283 spin_lock(&vmap_area_lock);
2284
2285
2286 area = term_area = last_area;
2287 start = offsets[area];
2288 end = start + sizes[area];
2289
2290 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2291 base = vmalloc_end - last_end;
2292 goto found;
2293 }
2294 base = pvm_determine_end(&next, &prev, align) - end;
2295
2296 while (true) {
2297 BUG_ON(next && next->va_end <= base + end);
2298 BUG_ON(prev && prev->va_end > base + end);
2299
2300
2301
2302
2303
2304 if (base + last_end < vmalloc_start + last_end) {
2305 spin_unlock(&vmap_area_lock);
2306 if (!purged) {
2307 purge_vmap_area_lazy();
2308 purged = true;
2309 goto retry;
2310 }
2311 goto err_free;
2312 }
2313
2314
2315
2316
2317
2318 if (next && next->va_start < base + end) {
2319 base = pvm_determine_end(&next, &prev, align) - end;
2320 term_area = area;
2321 continue;
2322 }
2323
2324
2325
2326
2327
2328
2329 if (prev && prev->va_end > base + start) {
2330 next = prev;
2331 prev = node_to_va(rb_prev(&next->rb_node));
2332 base = pvm_determine_end(&next, &prev, align) - end;
2333 term_area = area;
2334 continue;
2335 }
2336
2337
2338
2339
2340
2341 area = (area + nr_vms - 1) % nr_vms;
2342 if (area == term_area)
2343 break;
2344 start = offsets[area];
2345 end = start + sizes[area];
2346 pvm_find_next_prev(base + end, &next, &prev);
2347 }
2348found:
2349
2350 for (area = 0; area < nr_vms; area++) {
2351 struct vmap_area *va = vas[area];
2352
2353 va->va_start = base + offsets[area];
2354 va->va_end = va->va_start + sizes[area];
2355 __insert_vmap_area(va);
2356 }
2357
2358 vmap_area_pcpu_hole = base + offsets[last_area];
2359
2360 spin_unlock(&vmap_area_lock);
2361
2362
2363 for (area = 0; area < nr_vms; area++)
2364 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2365 pcpu_get_vm_areas);
2366
2367 kfree(vas);
2368 return vms;
2369
2370err_free:
2371 for (area = 0; area < nr_vms; area++) {
2372 if (vas)
2373 kfree(vas[area]);
2374 if (vms)
2375 kfree(vms[area]);
2376 }
2377 kfree(vas);
2378 kfree(vms);
2379 return NULL;
2380}
2381
2382
2383
2384
2385
2386
2387
2388
2389void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2390{
2391 int i;
2392
2393 for (i = 0; i < nr_vms; i++)
2394 free_vm_area(vms[i]);
2395 kfree(vms);
2396}
2397#endif
2398
2399#ifdef CONFIG_PROC_FS
2400static void *s_start(struct seq_file *m, loff_t *pos)
2401 __acquires(&vmlist_lock)
2402{
2403 loff_t n = *pos;
2404 struct vm_struct *v;
2405
2406 read_lock(&vmlist_lock);
2407 v = vmlist;
2408 while (n > 0 && v) {
2409 n--;
2410 v = v->next;
2411 }
2412 if (!n)
2413 return v;
2414
2415 return NULL;
2416
2417}
2418
2419static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2420{
2421 struct vm_struct *v = p;
2422
2423 ++*pos;
2424 return v->next;
2425}
2426
2427static void s_stop(struct seq_file *m, void *p)
2428 __releases(&vmlist_lock)
2429{
2430 read_unlock(&vmlist_lock);
2431}
2432
2433static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2434{
2435 if (NUMA_BUILD) {
2436 unsigned int nr, *counters = m->private;
2437
2438 if (!counters)
2439 return;
2440
2441 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2442
2443 for (nr = 0; nr < v->nr_pages; nr++)
2444 counters[page_to_nid(v->pages[nr])]++;
2445
2446 for_each_node_state(nr, N_HIGH_MEMORY)
2447 if (counters[nr])
2448 seq_printf(m, " N%u=%u", nr, counters[nr]);
2449 }
2450}
2451
2452static int s_show(struct seq_file *m, void *p)
2453{
2454 struct vm_struct *v = p;
2455
2456 seq_printf(m, "0x%p-0x%p %7ld",
2457 v->addr, v->addr + v->size, v->size);
2458
2459 if (v->caller) {
2460 char buff[KSYM_SYMBOL_LEN];
2461
2462 seq_putc(m, ' ');
2463 sprint_symbol(buff, (unsigned long)v->caller);
2464 seq_puts(m, buff);
2465 }
2466
2467 if (v->nr_pages)
2468 seq_printf(m, " pages=%d", v->nr_pages);
2469
2470 if (v->phys_addr)
2471 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2472
2473 if (v->flags & VM_IOREMAP)
2474 seq_printf(m, " ioremap");
2475
2476 if (v->flags & VM_ALLOC)
2477 seq_printf(m, " vmalloc");
2478
2479 if (v->flags & VM_MAP)
2480 seq_printf(m, " vmap");
2481
2482 if (v->flags & VM_USERMAP)
2483 seq_printf(m, " user");
2484
2485 if (v->flags & VM_VPAGES)
2486 seq_printf(m, " vpages");
2487
2488 show_numa_info(m, v);
2489 seq_putc(m, '\n');
2490 return 0;
2491}
2492
2493static const struct seq_operations vmalloc_op = {
2494 .start = s_start,
2495 .next = s_next,
2496 .stop = s_stop,
2497 .show = s_show,
2498};
2499
2500static int vmalloc_open(struct inode *inode, struct file *file)
2501{
2502 unsigned int *ptr = NULL;
2503 int ret;
2504
2505 if (NUMA_BUILD) {
2506 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2507 if (ptr == NULL)
2508 return -ENOMEM;
2509 }
2510 ret = seq_open(file, &vmalloc_op);
2511 if (!ret) {
2512 struct seq_file *m = file->private_data;
2513 m->private = ptr;
2514 } else
2515 kfree(ptr);
2516 return ret;
2517}
2518
2519static const struct file_operations proc_vmalloc_operations = {
2520 .open = vmalloc_open,
2521 .read = seq_read,
2522 .llseek = seq_lseek,
2523 .release = seq_release_private,
2524};
2525
2526static int __init proc_vmalloc_init(void)
2527{
2528 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2529 return 0;
2530}
2531module_init(proc_vmalloc_init);
2532#endif
2533
2534