1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h>
107#include <linux/string.h>
108#include <linux/uaccess.h>
109#include <linux/nodemask.h>
110#include <linux/kmemleak.h>
111#include <linux/mempolicy.h>
112#include <linux/mutex.h>
113#include <linux/fault-inject.h>
114#include <linux/rtmutex.h>
115#include <linux/reciprocal_div.h>
116#include <linux/debugobjects.h>
117#include <linux/kmemcheck.h>
118
119#include <asm/cacheflush.h>
120#include <asm/tlbflush.h>
121#include <asm/page.h>
122
123
124
125
126
127
128
129
130
131
132
133#ifdef CONFIG_DEBUG_SLAB
134#define DEBUG 1
135#define STATS 1
136#define FORCED_DEBUG 1
137#else
138#define DEBUG 0
139#define STATS 0
140#define FORCED_DEBUG 0
141#endif
142
143
144#define BYTES_PER_WORD sizeof(void *)
145#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
146
147#ifndef ARCH_KMALLOC_MINALIGN
148
149
150
151
152
153
154
155
156
157#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
158#endif
159
160#ifndef ARCH_SLAB_MINALIGN
161
162
163
164
165
166
167
168#define ARCH_SLAB_MINALIGN 0
169#endif
170
171#ifndef ARCH_KMALLOC_FLAGS
172#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
173#endif
174
175
176#if DEBUG
177# define CREATE_MASK (SLAB_RED_ZONE | \
178 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
179 SLAB_CACHE_DMA | \
180 SLAB_STORE_USER | \
181 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
182 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
183 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
184#else
185# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
186 SLAB_CACHE_DMA | \
187 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
188 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
189 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
190#endif
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211typedef unsigned int kmem_bufctl_t;
212#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
213#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
214#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
215#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
216
217
218
219
220
221
222
223
224struct slab {
225 struct list_head list;
226 unsigned long colouroff;
227 void *s_mem;
228 unsigned int inuse;
229 kmem_bufctl_t free;
230 unsigned short nodeid;
231};
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249struct slab_rcu {
250 struct rcu_head head;
251 struct kmem_cache *cachep;
252 void *addr;
253};
254
255
256
257
258
259
260
261
262
263
264
265
266
267struct array_cache {
268 unsigned int avail;
269 unsigned int limit;
270 unsigned int batchcount;
271 unsigned int touched;
272 spinlock_t lock;
273 void *entry[];
274
275
276
277
278};
279
280
281
282
283
284#define BOOT_CPUCACHE_ENTRIES 1
285struct arraycache_init {
286 struct array_cache cache;
287 void *entries[BOOT_CPUCACHE_ENTRIES];
288};
289
290
291
292
293struct kmem_list3 {
294 struct list_head slabs_partial;
295 struct list_head slabs_full;
296 struct list_head slabs_free;
297 unsigned long free_objects;
298 unsigned int free_limit;
299 unsigned int colour_next;
300 spinlock_t list_lock;
301 struct array_cache *shared;
302 struct array_cache **alien;
303 unsigned long next_reap;
304 int free_touched;
305};
306
307
308
309
310#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
311struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
312#define CACHE_CACHE 0
313#define SIZE_AC MAX_NUMNODES
314#define SIZE_L3 (2 * MAX_NUMNODES)
315
316static int drain_freelist(struct kmem_cache *cache,
317 struct kmem_list3 *l3, int tofree);
318static void free_block(struct kmem_cache *cachep, void **objpp, int len,
319 int node);
320static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
321static void cache_reap(struct work_struct *unused);
322
323
324
325
326
327static __always_inline int index_of(const size_t size)
328{
329 extern void __bad_size(void);
330
331 if (__builtin_constant_p(size)) {
332 int i = 0;
333
334#define CACHE(x) \
335 if (size <=x) \
336 return i; \
337 else \
338 i++;
339#include <linux/kmalloc_sizes.h>
340#undef CACHE
341 __bad_size();
342 } else
343 __bad_size();
344 return 0;
345}
346
347static int slab_early_init = 1;
348
349#define INDEX_AC index_of(sizeof(struct arraycache_init))
350#define INDEX_L3 index_of(sizeof(struct kmem_list3))
351
352static void kmem_list3_init(struct kmem_list3 *parent)
353{
354 INIT_LIST_HEAD(&parent->slabs_full);
355 INIT_LIST_HEAD(&parent->slabs_partial);
356 INIT_LIST_HEAD(&parent->slabs_free);
357 parent->shared = NULL;
358 parent->alien = NULL;
359 parent->colour_next = 0;
360 spin_lock_init(&parent->list_lock);
361 parent->free_objects = 0;
362 parent->free_touched = 0;
363}
364
365#define MAKE_LIST(cachep, listp, slab, nodeid) \
366 do { \
367 INIT_LIST_HEAD(listp); \
368 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
369 } while (0)
370
371#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
372 do { \
373 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
374 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
375 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
376 } while (0)
377
378#define CFLGS_OFF_SLAB (0x80000000UL)
379#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
380
381#define BATCHREFILL_LIMIT 16
382
383
384
385
386
387
388
389#define REAPTIMEOUT_CPUC (2*HZ)
390#define REAPTIMEOUT_LIST3 (4*HZ)
391
392#if STATS
393#define STATS_INC_ACTIVE(x) ((x)->num_active++)
394#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
395#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
396#define STATS_INC_GROWN(x) ((x)->grown++)
397#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
398#define STATS_SET_HIGH(x) \
399 do { \
400 if ((x)->num_active > (x)->high_mark) \
401 (x)->high_mark = (x)->num_active; \
402 } while (0)
403#define STATS_INC_ERR(x) ((x)->errors++)
404#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
405#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
406#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
407#define STATS_SET_FREEABLE(x, i) \
408 do { \
409 if ((x)->max_freeable < i) \
410 (x)->max_freeable = i; \
411 } while (0)
412#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
413#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
414#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
415#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
416#else
417#define STATS_INC_ACTIVE(x) do { } while (0)
418#define STATS_DEC_ACTIVE(x) do { } while (0)
419#define STATS_INC_ALLOCED(x) do { } while (0)
420#define STATS_INC_GROWN(x) do { } while (0)
421#define STATS_ADD_REAPED(x,y) do { } while (0)
422#define STATS_SET_HIGH(x) do { } while (0)
423#define STATS_INC_ERR(x) do { } while (0)
424#define STATS_INC_NODEALLOCS(x) do { } while (0)
425#define STATS_INC_NODEFREES(x) do { } while (0)
426#define STATS_INC_ACOVERFLOW(x) do { } while (0)
427#define STATS_SET_FREEABLE(x, i) do { } while (0)
428#define STATS_INC_ALLOCHIT(x) do { } while (0)
429#define STATS_INC_ALLOCMISS(x) do { } while (0)
430#define STATS_INC_FREEHIT(x) do { } while (0)
431#define STATS_INC_FREEMISS(x) do { } while (0)
432#endif
433
434#if DEBUG
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449static int obj_offset(struct kmem_cache *cachep)
450{
451 return cachep->obj_offset;
452}
453
454static int obj_size(struct kmem_cache *cachep)
455{
456 return cachep->obj_size;
457}
458
459static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
460{
461 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
462 return (unsigned long long*) (objp + obj_offset(cachep) -
463 sizeof(unsigned long long));
464}
465
466static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
467{
468 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
469 if (cachep->flags & SLAB_STORE_USER)
470 return (unsigned long long *)(objp + cachep->buffer_size -
471 sizeof(unsigned long long) -
472 REDZONE_ALIGN);
473 return (unsigned long long *) (objp + cachep->buffer_size -
474 sizeof(unsigned long long));
475}
476
477static void **dbg_userword(struct kmem_cache *cachep, void *objp)
478{
479 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
480 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
481}
482
483#else
484
485#define obj_offset(x) 0
486#define obj_size(cachep) (cachep->buffer_size)
487#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
488#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
489#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
490
491#endif
492
493#ifdef CONFIG_TRACING
494size_t slab_buffer_size(struct kmem_cache *cachep)
495{
496 return cachep->buffer_size;
497}
498EXPORT_SYMBOL(slab_buffer_size);
499#endif
500
501
502
503
504#define BREAK_GFP_ORDER_HI 1
505#define BREAK_GFP_ORDER_LO 0
506static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
507
508
509
510
511
512
513static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
514{
515 page->lru.next = (struct list_head *)cache;
516}
517
518static inline struct kmem_cache *page_get_cache(struct page *page)
519{
520 page = compound_head(page);
521 BUG_ON(!PageSlab(page));
522 return (struct kmem_cache *)page->lru.next;
523}
524
525static inline void page_set_slab(struct page *page, struct slab *slab)
526{
527 page->lru.prev = (struct list_head *)slab;
528}
529
530static inline struct slab *page_get_slab(struct page *page)
531{
532 BUG_ON(!PageSlab(page));
533 return (struct slab *)page->lru.prev;
534}
535
536static inline struct kmem_cache *virt_to_cache(const void *obj)
537{
538 struct page *page = virt_to_head_page(obj);
539 return page_get_cache(page);
540}
541
542static inline struct slab *virt_to_slab(const void *obj)
543{
544 struct page *page = virt_to_head_page(obj);
545 return page_get_slab(page);
546}
547
548static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
549 unsigned int idx)
550{
551 return slab->s_mem + cache->buffer_size * idx;
552}
553
554
555
556
557
558
559
560static inline unsigned int obj_to_index(const struct kmem_cache *cache,
561 const struct slab *slab, void *obj)
562{
563 u32 offset = (obj - slab->s_mem);
564 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
565}
566
567
568
569
570struct cache_sizes malloc_sizes[] = {
571#define CACHE(x) { .cs_size = (x) },
572#include <linux/kmalloc_sizes.h>
573 CACHE(ULONG_MAX)
574#undef CACHE
575};
576EXPORT_SYMBOL(malloc_sizes);
577
578
579struct cache_names {
580 char *name;
581 char *name_dma;
582};
583
584static struct cache_names __initdata cache_names[] = {
585#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
586#include <linux/kmalloc_sizes.h>
587 {NULL,}
588#undef CACHE
589};
590
591static struct arraycache_init initarray_cache __initdata =
592 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
593static struct arraycache_init initarray_generic =
594 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
595
596
597static struct kmem_cache cache_cache = {
598 .batchcount = 1,
599 .limit = BOOT_CPUCACHE_ENTRIES,
600 .shared = 1,
601 .buffer_size = sizeof(struct kmem_cache),
602 .name = "kmem_cache",
603};
604
605#define BAD_ALIEN_MAGIC 0x01020304ul
606
607
608
609
610
611static enum {
612 NONE,
613 PARTIAL_AC,
614 PARTIAL_L3,
615 EARLY,
616 FULL
617} g_cpucache_up;
618
619
620
621
622int slab_is_available(void)
623{
624 return g_cpucache_up >= EARLY;
625}
626
627#ifdef CONFIG_LOCKDEP
628
629
630
631
632
633
634
635
636
637
638
639
640static struct lock_class_key on_slab_l3_key;
641static struct lock_class_key on_slab_alc_key;
642
643static void init_node_lock_keys(int q)
644{
645 struct cache_sizes *s = malloc_sizes;
646
647 if (g_cpucache_up != FULL)
648 return;
649
650 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
651 struct array_cache **alc;
652 struct kmem_list3 *l3;
653 int r;
654
655 l3 = s->cs_cachep->nodelists[q];
656 if (!l3 || OFF_SLAB(s->cs_cachep))
657 continue;
658 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
659 alc = l3->alien;
660
661
662
663
664
665
666
667 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
668 continue;
669 for_each_node(r) {
670 if (alc[r])
671 lockdep_set_class(&alc[r]->lock,
672 &on_slab_alc_key);
673 }
674 }
675}
676
677static inline void init_lock_keys(void)
678{
679 int node;
680
681 for_each_node(node)
682 init_node_lock_keys(node);
683}
684#else
685static void init_node_lock_keys(int q)
686{
687}
688
689static inline void init_lock_keys(void)
690{
691}
692#endif
693
694
695
696
697static DEFINE_MUTEX(cache_chain_mutex);
698static struct list_head cache_chain;
699
700static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
701
702static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
703{
704 return cachep->array[smp_processor_id()];
705}
706
707static inline struct kmem_cache *__find_general_cachep(size_t size,
708 gfp_t gfpflags)
709{
710 struct cache_sizes *csizep = malloc_sizes;
711
712#if DEBUG
713
714
715
716
717 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
718#endif
719 if (!size)
720 return ZERO_SIZE_PTR;
721
722 while (size > csizep->cs_size)
723 csizep++;
724
725
726
727
728
729
730#ifdef CONFIG_ZONE_DMA
731 if (unlikely(gfpflags & GFP_DMA))
732 return csizep->cs_dmacachep;
733#endif
734 return csizep->cs_cachep;
735}
736
737static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
738{
739 return __find_general_cachep(size, gfpflags);
740}
741
742static size_t slab_mgmt_size(size_t nr_objs, size_t align)
743{
744 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
745}
746
747
748
749
750static void cache_estimate(unsigned long gfporder, size_t buffer_size,
751 size_t align, int flags, size_t *left_over,
752 unsigned int *num)
753{
754 int nr_objs;
755 size_t mgmt_size;
756 size_t slab_size = PAGE_SIZE << gfporder;
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773 if (flags & CFLGS_OFF_SLAB) {
774 mgmt_size = 0;
775 nr_objs = slab_size / buffer_size;
776
777 if (nr_objs > SLAB_LIMIT)
778 nr_objs = SLAB_LIMIT;
779 } else {
780
781
782
783
784
785
786
787
788 nr_objs = (slab_size - sizeof(struct slab)) /
789 (buffer_size + sizeof(kmem_bufctl_t));
790
791
792
793
794
795 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
796 > slab_size)
797 nr_objs--;
798
799 if (nr_objs > SLAB_LIMIT)
800 nr_objs = SLAB_LIMIT;
801
802 mgmt_size = slab_mgmt_size(nr_objs, align);
803 }
804 *num = nr_objs;
805 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
806}
807
808#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
809
810static void __slab_error(const char *function, struct kmem_cache *cachep,
811 char *msg)
812{
813 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
814 function, cachep->name, msg);
815 dump_stack();
816}
817
818
819
820
821
822
823
824
825
826static int use_alien_caches __read_mostly = 1;
827static int __init noaliencache_setup(char *s)
828{
829 use_alien_caches = 0;
830 return 1;
831}
832__setup("noaliencache", noaliencache_setup);
833
834#ifdef CONFIG_NUMA
835
836
837
838
839
840
841static DEFINE_PER_CPU(unsigned long, slab_reap_node);
842
843static void init_reap_node(int cpu)
844{
845 int node;
846
847 node = next_node(cpu_to_node(cpu), node_online_map);
848 if (node == MAX_NUMNODES)
849 node = first_node(node_online_map);
850
851 per_cpu(slab_reap_node, cpu) = node;
852}
853
854static void next_reap_node(void)
855{
856 int node = __get_cpu_var(slab_reap_node);
857
858 node = next_node(node, node_online_map);
859 if (unlikely(node >= MAX_NUMNODES))
860 node = first_node(node_online_map);
861 __get_cpu_var(slab_reap_node) = node;
862}
863
864#else
865#define init_reap_node(cpu) do { } while (0)
866#define next_reap_node(void) do { } while (0)
867#endif
868
869
870
871
872
873
874
875
876static void __cpuinit start_cpu_timer(int cpu)
877{
878 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
879
880
881
882
883
884
885 if (keventd_up() && reap_work->work.func == NULL) {
886 init_reap_node(cpu);
887 INIT_DELAYED_WORK(reap_work, cache_reap);
888 schedule_delayed_work_on(cpu, reap_work,
889 __round_jiffies_relative(HZ, cpu));
890 }
891}
892
893static struct array_cache *alloc_arraycache(int node, int entries,
894 int batchcount, gfp_t gfp)
895{
896 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
897 struct array_cache *nc = NULL;
898
899 nc = kmalloc_node(memsize, gfp, node);
900
901
902
903
904
905
906
907 kmemleak_no_scan(nc);
908 if (nc) {
909 nc->avail = 0;
910 nc->limit = entries;
911 nc->batchcount = batchcount;
912 nc->touched = 0;
913 spin_lock_init(&nc->lock);
914 }
915 return nc;
916}
917
918
919
920
921
922
923
924static int transfer_objects(struct array_cache *to,
925 struct array_cache *from, unsigned int max)
926{
927
928 int nr = min(min(from->avail, max), to->limit - to->avail);
929
930 if (!nr)
931 return 0;
932
933 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
934 sizeof(void *) *nr);
935
936 from->avail -= nr;
937 to->avail += nr;
938 to->touched = 1;
939 return nr;
940}
941
942#ifndef CONFIG_NUMA
943
944#define drain_alien_cache(cachep, alien) do { } while (0)
945#define reap_alien(cachep, l3) do { } while (0)
946
947static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
948{
949 return (struct array_cache **)BAD_ALIEN_MAGIC;
950}
951
952static inline void free_alien_cache(struct array_cache **ac_ptr)
953{
954}
955
956static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
957{
958 return 0;
959}
960
961static inline void *alternate_node_alloc(struct kmem_cache *cachep,
962 gfp_t flags)
963{
964 return NULL;
965}
966
967static inline void *____cache_alloc_node(struct kmem_cache *cachep,
968 gfp_t flags, int nodeid)
969{
970 return NULL;
971}
972
973#else
974
975static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
976static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
977
978static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
979{
980 struct array_cache **ac_ptr;
981 int memsize = sizeof(void *) * nr_node_ids;
982 int i;
983
984 if (limit > 1)
985 limit = 12;
986 ac_ptr = kmalloc_node(memsize, gfp, node);
987 if (ac_ptr) {
988 for_each_node(i) {
989 if (i == node || !node_online(i)) {
990 ac_ptr[i] = NULL;
991 continue;
992 }
993 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
994 if (!ac_ptr[i]) {
995 for (i--; i >= 0; i--)
996 kfree(ac_ptr[i]);
997 kfree(ac_ptr);
998 return NULL;
999 }
1000 }
1001 }
1002 return ac_ptr;
1003}
1004
1005static void free_alien_cache(struct array_cache **ac_ptr)
1006{
1007 int i;
1008
1009 if (!ac_ptr)
1010 return;
1011 for_each_node(i)
1012 kfree(ac_ptr[i]);
1013 kfree(ac_ptr);
1014}
1015
1016static void __drain_alien_cache(struct kmem_cache *cachep,
1017 struct array_cache *ac, int node)
1018{
1019 struct kmem_list3 *rl3 = cachep->nodelists[node];
1020
1021 if (ac->avail) {
1022 spin_lock(&rl3->list_lock);
1023
1024
1025
1026
1027
1028 if (rl3->shared)
1029 transfer_objects(rl3->shared, ac, ac->limit);
1030
1031 free_block(cachep, ac->entry, ac->avail, node);
1032 ac->avail = 0;
1033 spin_unlock(&rl3->list_lock);
1034 }
1035}
1036
1037
1038
1039
1040static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1041{
1042 int node = __get_cpu_var(slab_reap_node);
1043
1044 if (l3->alien) {
1045 struct array_cache *ac = l3->alien[node];
1046
1047 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1048 __drain_alien_cache(cachep, ac, node);
1049 spin_unlock_irq(&ac->lock);
1050 }
1051 }
1052}
1053
1054static void drain_alien_cache(struct kmem_cache *cachep,
1055 struct array_cache **alien)
1056{
1057 int i = 0;
1058 struct array_cache *ac;
1059 unsigned long flags;
1060
1061 for_each_online_node(i) {
1062 ac = alien[i];
1063 if (ac) {
1064 spin_lock_irqsave(&ac->lock, flags);
1065 __drain_alien_cache(cachep, ac, i);
1066 spin_unlock_irqrestore(&ac->lock, flags);
1067 }
1068 }
1069}
1070
1071static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1072{
1073 struct slab *slabp = virt_to_slab(objp);
1074 int nodeid = slabp->nodeid;
1075 struct kmem_list3 *l3;
1076 struct array_cache *alien = NULL;
1077 int node;
1078
1079 node = numa_node_id();
1080
1081
1082
1083
1084
1085 if (likely(slabp->nodeid == node))
1086 return 0;
1087
1088 l3 = cachep->nodelists[node];
1089 STATS_INC_NODEFREES(cachep);
1090 if (l3->alien && l3->alien[nodeid]) {
1091 alien = l3->alien[nodeid];
1092 spin_lock(&alien->lock);
1093 if (unlikely(alien->avail == alien->limit)) {
1094 STATS_INC_ACOVERFLOW(cachep);
1095 __drain_alien_cache(cachep, alien, nodeid);
1096 }
1097 alien->entry[alien->avail++] = objp;
1098 spin_unlock(&alien->lock);
1099 } else {
1100 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1101 free_block(cachep, &objp, 1, nodeid);
1102 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1103 }
1104 return 1;
1105}
1106#endif
1107
1108static void __cpuinit cpuup_canceled(long cpu)
1109{
1110 struct kmem_cache *cachep;
1111 struct kmem_list3 *l3 = NULL;
1112 int node = cpu_to_node(cpu);
1113 const struct cpumask *mask = cpumask_of_node(node);
1114
1115 list_for_each_entry(cachep, &cache_chain, next) {
1116 struct array_cache *nc;
1117 struct array_cache *shared;
1118 struct array_cache **alien;
1119
1120
1121 nc = cachep->array[cpu];
1122 cachep->array[cpu] = NULL;
1123 l3 = cachep->nodelists[node];
1124
1125 if (!l3)
1126 goto free_array_cache;
1127
1128 spin_lock_irq(&l3->list_lock);
1129
1130
1131 l3->free_limit -= cachep->batchcount;
1132 if (nc)
1133 free_block(cachep, nc->entry, nc->avail, node);
1134
1135 if (!cpumask_empty(mask)) {
1136 spin_unlock_irq(&l3->list_lock);
1137 goto free_array_cache;
1138 }
1139
1140 shared = l3->shared;
1141 if (shared) {
1142 free_block(cachep, shared->entry,
1143 shared->avail, node);
1144 l3->shared = NULL;
1145 }
1146
1147 alien = l3->alien;
1148 l3->alien = NULL;
1149
1150 spin_unlock_irq(&l3->list_lock);
1151
1152 kfree(shared);
1153 if (alien) {
1154 drain_alien_cache(cachep, alien);
1155 free_alien_cache(alien);
1156 }
1157free_array_cache:
1158 kfree(nc);
1159 }
1160
1161
1162
1163
1164
1165 list_for_each_entry(cachep, &cache_chain, next) {
1166 l3 = cachep->nodelists[node];
1167 if (!l3)
1168 continue;
1169 drain_freelist(cachep, l3, l3->free_objects);
1170 }
1171}
1172
1173static int __cpuinit cpuup_prepare(long cpu)
1174{
1175 struct kmem_cache *cachep;
1176 struct kmem_list3 *l3 = NULL;
1177 int node = cpu_to_node(cpu);
1178 const int memsize = sizeof(struct kmem_list3);
1179
1180
1181
1182
1183
1184
1185
1186
1187 list_for_each_entry(cachep, &cache_chain, next) {
1188
1189
1190
1191
1192
1193 if (!cachep->nodelists[node]) {
1194 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1195 if (!l3)
1196 goto bad;
1197 kmem_list3_init(l3);
1198 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1199 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1200
1201
1202
1203
1204
1205
1206 cachep->nodelists[node] = l3;
1207 }
1208
1209 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1210 cachep->nodelists[node]->free_limit =
1211 (1 + nr_cpus_node(node)) *
1212 cachep->batchcount + cachep->num;
1213 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1214 }
1215
1216
1217
1218
1219
1220 list_for_each_entry(cachep, &cache_chain, next) {
1221 struct array_cache *nc;
1222 struct array_cache *shared = NULL;
1223 struct array_cache **alien = NULL;
1224
1225 nc = alloc_arraycache(node, cachep->limit,
1226 cachep->batchcount, GFP_KERNEL);
1227 if (!nc)
1228 goto bad;
1229 if (cachep->shared) {
1230 shared = alloc_arraycache(node,
1231 cachep->shared * cachep->batchcount,
1232 0xbaadf00d, GFP_KERNEL);
1233 if (!shared) {
1234 kfree(nc);
1235 goto bad;
1236 }
1237 }
1238 if (use_alien_caches) {
1239 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1240 if (!alien) {
1241 kfree(shared);
1242 kfree(nc);
1243 goto bad;
1244 }
1245 }
1246 cachep->array[cpu] = nc;
1247 l3 = cachep->nodelists[node];
1248 BUG_ON(!l3);
1249
1250 spin_lock_irq(&l3->list_lock);
1251 if (!l3->shared) {
1252
1253
1254
1255
1256 l3->shared = shared;
1257 shared = NULL;
1258 }
1259#ifdef CONFIG_NUMA
1260 if (!l3->alien) {
1261 l3->alien = alien;
1262 alien = NULL;
1263 }
1264#endif
1265 spin_unlock_irq(&l3->list_lock);
1266 kfree(shared);
1267 free_alien_cache(alien);
1268 }
1269 init_node_lock_keys(node);
1270
1271 return 0;
1272bad:
1273 cpuup_canceled(cpu);
1274 return -ENOMEM;
1275}
1276
1277static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1278 unsigned long action, void *hcpu)
1279{
1280 long cpu = (long)hcpu;
1281 int err = 0;
1282
1283 switch (action) {
1284 case CPU_UP_PREPARE:
1285 case CPU_UP_PREPARE_FROZEN:
1286 mutex_lock(&cache_chain_mutex);
1287 err = cpuup_prepare(cpu);
1288 mutex_unlock(&cache_chain_mutex);
1289 break;
1290 case CPU_ONLINE:
1291 case CPU_ONLINE_FROZEN:
1292 start_cpu_timer(cpu);
1293 break;
1294#ifdef CONFIG_HOTPLUG_CPU
1295 case CPU_DOWN_PREPARE:
1296 case CPU_DOWN_PREPARE_FROZEN:
1297
1298
1299
1300
1301
1302
1303 cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
1304
1305 per_cpu(slab_reap_work, cpu).work.func = NULL;
1306 break;
1307 case CPU_DOWN_FAILED:
1308 case CPU_DOWN_FAILED_FROZEN:
1309 start_cpu_timer(cpu);
1310 break;
1311 case CPU_DEAD:
1312 case CPU_DEAD_FROZEN:
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322#endif
1323 case CPU_UP_CANCELED:
1324 case CPU_UP_CANCELED_FROZEN:
1325 mutex_lock(&cache_chain_mutex);
1326 cpuup_canceled(cpu);
1327 mutex_unlock(&cache_chain_mutex);
1328 break;
1329 }
1330 return err ? NOTIFY_BAD : NOTIFY_OK;
1331}
1332
1333static struct notifier_block __cpuinitdata cpucache_notifier = {
1334 &cpuup_callback, NULL, 0
1335};
1336
1337
1338
1339
1340static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1341 int nodeid)
1342{
1343 struct kmem_list3 *ptr;
1344
1345 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1346 BUG_ON(!ptr);
1347
1348 memcpy(ptr, list, sizeof(struct kmem_list3));
1349
1350
1351
1352 spin_lock_init(&ptr->list_lock);
1353
1354 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1355 cachep->nodelists[nodeid] = ptr;
1356}
1357
1358
1359
1360
1361
1362static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1363{
1364 int node;
1365
1366 for_each_online_node(node) {
1367 cachep->nodelists[node] = &initkmem_list3[index + node];
1368 cachep->nodelists[node]->next_reap = jiffies +
1369 REAPTIMEOUT_LIST3 +
1370 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1371 }
1372}
1373
1374
1375
1376
1377
1378void __init kmem_cache_init(void)
1379{
1380 size_t left_over;
1381 struct cache_sizes *sizes;
1382 struct cache_names *names;
1383 int i;
1384 int order;
1385 int node;
1386
1387 if (num_possible_nodes() == 1)
1388 use_alien_caches = 0;
1389
1390 for (i = 0; i < NUM_INIT_LISTS; i++) {
1391 kmem_list3_init(&initkmem_list3[i]);
1392 if (i < MAX_NUMNODES)
1393 cache_cache.nodelists[i] = NULL;
1394 }
1395 set_up_list3s(&cache_cache, CACHE_CACHE);
1396
1397
1398
1399
1400
1401 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1402 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424 node = numa_node_id();
1425
1426
1427 INIT_LIST_HEAD(&cache_chain);
1428 list_add(&cache_cache.next, &cache_chain);
1429 cache_cache.colour_off = cache_line_size();
1430 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1431 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1432
1433
1434
1435
1436
1437 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1438 nr_node_ids * sizeof(struct kmem_list3 *);
1439#if DEBUG
1440 cache_cache.obj_size = cache_cache.buffer_size;
1441#endif
1442 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1443 cache_line_size());
1444 cache_cache.reciprocal_buffer_size =
1445 reciprocal_value(cache_cache.buffer_size);
1446
1447 for (order = 0; order < MAX_ORDER; order++) {
1448 cache_estimate(order, cache_cache.buffer_size,
1449 cache_line_size(), 0, &left_over, &cache_cache.num);
1450 if (cache_cache.num)
1451 break;
1452 }
1453 BUG_ON(!cache_cache.num);
1454 cache_cache.gfporder = order;
1455 cache_cache.colour = left_over / cache_cache.colour_off;
1456 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1457 sizeof(struct slab), cache_line_size());
1458
1459
1460 sizes = malloc_sizes;
1461 names = cache_names;
1462
1463
1464
1465
1466
1467
1468
1469 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1470 sizes[INDEX_AC].cs_size,
1471 ARCH_KMALLOC_MINALIGN,
1472 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1473 NULL);
1474
1475 if (INDEX_AC != INDEX_L3) {
1476 sizes[INDEX_L3].cs_cachep =
1477 kmem_cache_create(names[INDEX_L3].name,
1478 sizes[INDEX_L3].cs_size,
1479 ARCH_KMALLOC_MINALIGN,
1480 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1481 NULL);
1482 }
1483
1484 slab_early_init = 0;
1485
1486 while (sizes->cs_size != ULONG_MAX) {
1487
1488
1489
1490
1491
1492
1493
1494 if (!sizes->cs_cachep) {
1495 sizes->cs_cachep = kmem_cache_create(names->name,
1496 sizes->cs_size,
1497 ARCH_KMALLOC_MINALIGN,
1498 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1499 NULL);
1500 }
1501#ifdef CONFIG_ZONE_DMA
1502 sizes->cs_dmacachep = kmem_cache_create(
1503 names->name_dma,
1504 sizes->cs_size,
1505 ARCH_KMALLOC_MINALIGN,
1506 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1507 SLAB_PANIC,
1508 NULL);
1509#endif
1510 sizes++;
1511 names++;
1512 }
1513
1514 {
1515 struct array_cache *ptr;
1516
1517 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1518
1519 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1520 memcpy(ptr, cpu_cache_get(&cache_cache),
1521 sizeof(struct arraycache_init));
1522
1523
1524
1525 spin_lock_init(&ptr->lock);
1526
1527 cache_cache.array[smp_processor_id()] = ptr;
1528
1529 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1530
1531 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1532 != &initarray_generic.cache);
1533 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1534 sizeof(struct arraycache_init));
1535
1536
1537
1538 spin_lock_init(&ptr->lock);
1539
1540 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1541 ptr;
1542 }
1543
1544 {
1545 int nid;
1546
1547 for_each_online_node(nid) {
1548 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1549
1550 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1551 &initkmem_list3[SIZE_AC + nid], nid);
1552
1553 if (INDEX_AC != INDEX_L3) {
1554 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1555 &initkmem_list3[SIZE_L3 + nid], nid);
1556 }
1557 }
1558 }
1559
1560 g_cpucache_up = EARLY;
1561}
1562
1563void __init kmem_cache_init_late(void)
1564{
1565 struct kmem_cache *cachep;
1566
1567
1568 mutex_lock(&cache_chain_mutex);
1569 list_for_each_entry(cachep, &cache_chain, next)
1570 if (enable_cpucache(cachep, GFP_NOWAIT))
1571 BUG();
1572 mutex_unlock(&cache_chain_mutex);
1573
1574
1575 g_cpucache_up = FULL;
1576
1577
1578 init_lock_keys();
1579
1580
1581
1582
1583
1584 register_cpu_notifier(&cpucache_notifier);
1585
1586
1587
1588
1589
1590}
1591
1592static int __init cpucache_init(void)
1593{
1594 int cpu;
1595
1596
1597
1598
1599 for_each_online_cpu(cpu)
1600 start_cpu_timer(cpu);
1601 return 0;
1602}
1603__initcall(cpucache_init);
1604
1605
1606
1607
1608
1609
1610
1611
1612static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1613{
1614 struct page *page;
1615 int nr_pages;
1616 int i;
1617
1618#ifndef CONFIG_MMU
1619
1620
1621
1622
1623 flags |= __GFP_COMP;
1624#endif
1625
1626 flags |= cachep->gfpflags;
1627 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1628 flags |= __GFP_RECLAIMABLE;
1629
1630 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1631 if (!page)
1632 return NULL;
1633
1634 nr_pages = (1 << cachep->gfporder);
1635 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1636 add_zone_page_state(page_zone(page),
1637 NR_SLAB_RECLAIMABLE, nr_pages);
1638 else
1639 add_zone_page_state(page_zone(page),
1640 NR_SLAB_UNRECLAIMABLE, nr_pages);
1641 for (i = 0; i < nr_pages; i++)
1642 __SetPageSlab(page + i);
1643
1644 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1645 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1646
1647 if (cachep->ctor)
1648 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1649 else
1650 kmemcheck_mark_unallocated_pages(page, nr_pages);
1651 }
1652
1653 return page_address(page);
1654}
1655
1656
1657
1658
1659static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1660{
1661 unsigned long i = (1 << cachep->gfporder);
1662 struct page *page = virt_to_page(addr);
1663 const unsigned long nr_freed = i;
1664
1665 kmemcheck_free_shadow(page, cachep->gfporder);
1666
1667 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1668 sub_zone_page_state(page_zone(page),
1669 NR_SLAB_RECLAIMABLE, nr_freed);
1670 else
1671 sub_zone_page_state(page_zone(page),
1672 NR_SLAB_UNRECLAIMABLE, nr_freed);
1673 while (i--) {
1674 BUG_ON(!PageSlab(page));
1675 __ClearPageSlab(page);
1676 page++;
1677 }
1678 if (current->reclaim_state)
1679 current->reclaim_state->reclaimed_slab += nr_freed;
1680 free_pages((unsigned long)addr, cachep->gfporder);
1681}
1682
1683static void kmem_rcu_free(struct rcu_head *head)
1684{
1685 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1686 struct kmem_cache *cachep = slab_rcu->cachep;
1687
1688 kmem_freepages(cachep, slab_rcu->addr);
1689 if (OFF_SLAB(cachep))
1690 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1691}
1692
1693#if DEBUG
1694
1695#ifdef CONFIG_DEBUG_PAGEALLOC
1696static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1697 unsigned long caller)
1698{
1699 int size = obj_size(cachep);
1700
1701 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1702
1703 if (size < 5 * sizeof(unsigned long))
1704 return;
1705
1706 *addr++ = 0x12345678;
1707 *addr++ = caller;
1708 *addr++ = smp_processor_id();
1709 size -= 3 * sizeof(unsigned long);
1710 {
1711 unsigned long *sptr = &caller;
1712 unsigned long svalue;
1713
1714 while (!kstack_end(sptr)) {
1715 svalue = *sptr++;
1716 if (kernel_text_address(svalue)) {
1717 *addr++ = svalue;
1718 size -= sizeof(unsigned long);
1719 if (size <= sizeof(unsigned long))
1720 break;
1721 }
1722 }
1723
1724 }
1725 *addr++ = 0x87654321;
1726}
1727#endif
1728
1729static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1730{
1731 int size = obj_size(cachep);
1732 addr = &((char *)addr)[obj_offset(cachep)];
1733
1734 memset(addr, val, size);
1735 *(unsigned char *)(addr + size - 1) = POISON_END;
1736}
1737
1738static void dump_line(char *data, int offset, int limit)
1739{
1740 int i;
1741 unsigned char error = 0;
1742 int bad_count = 0;
1743
1744 printk(KERN_ERR "%03x:", offset);
1745 for (i = 0; i < limit; i++) {
1746 if (data[offset + i] != POISON_FREE) {
1747 error = data[offset + i];
1748 bad_count++;
1749 }
1750 printk(" %02x", (unsigned char)data[offset + i]);
1751 }
1752 printk("\n");
1753
1754 if (bad_count == 1) {
1755 error ^= POISON_FREE;
1756 if (!(error & (error - 1))) {
1757 printk(KERN_ERR "Single bit error detected. Probably "
1758 "bad RAM.\n");
1759#ifdef CONFIG_X86
1760 printk(KERN_ERR "Run memtest86+ or a similar memory "
1761 "test tool.\n");
1762#else
1763 printk(KERN_ERR "Run a memory test tool.\n");
1764#endif
1765 }
1766 }
1767}
1768#endif
1769
1770#if DEBUG
1771
1772static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1773{
1774 int i, size;
1775 char *realobj;
1776
1777 if (cachep->flags & SLAB_RED_ZONE) {
1778 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1779 *dbg_redzone1(cachep, objp),
1780 *dbg_redzone2(cachep, objp));
1781 }
1782
1783 if (cachep->flags & SLAB_STORE_USER) {
1784 printk(KERN_ERR "Last user: [<%p>]",
1785 *dbg_userword(cachep, objp));
1786 print_symbol("(%s)",
1787 (unsigned long)*dbg_userword(cachep, objp));
1788 printk("\n");
1789 }
1790 realobj = (char *)objp + obj_offset(cachep);
1791 size = obj_size(cachep);
1792 for (i = 0; i < size && lines; i += 16, lines--) {
1793 int limit;
1794 limit = 16;
1795 if (i + limit > size)
1796 limit = size - i;
1797 dump_line(realobj, i, limit);
1798 }
1799}
1800
1801static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1802{
1803 char *realobj;
1804 int size, i;
1805 int lines = 0;
1806
1807 realobj = (char *)objp + obj_offset(cachep);
1808 size = obj_size(cachep);
1809
1810 for (i = 0; i < size; i++) {
1811 char exp = POISON_FREE;
1812 if (i == size - 1)
1813 exp = POISON_END;
1814 if (realobj[i] != exp) {
1815 int limit;
1816
1817
1818 if (lines == 0) {
1819 printk(KERN_ERR
1820 "Slab corruption: %s start=%p, len=%d\n",
1821 cachep->name, realobj, size);
1822 print_objinfo(cachep, objp, 0);
1823 }
1824
1825 i = (i / 16) * 16;
1826 limit = 16;
1827 if (i + limit > size)
1828 limit = size - i;
1829 dump_line(realobj, i, limit);
1830 i += 16;
1831 lines++;
1832
1833 if (lines > 5)
1834 break;
1835 }
1836 }
1837 if (lines != 0) {
1838
1839
1840
1841 struct slab *slabp = virt_to_slab(objp);
1842 unsigned int objnr;
1843
1844 objnr = obj_to_index(cachep, slabp, objp);
1845 if (objnr) {
1846 objp = index_to_obj(cachep, slabp, objnr - 1);
1847 realobj = (char *)objp + obj_offset(cachep);
1848 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1849 realobj, size);
1850 print_objinfo(cachep, objp, 2);
1851 }
1852 if (objnr + 1 < cachep->num) {
1853 objp = index_to_obj(cachep, slabp, objnr + 1);
1854 realobj = (char *)objp + obj_offset(cachep);
1855 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1856 realobj, size);
1857 print_objinfo(cachep, objp, 2);
1858 }
1859 }
1860}
1861#endif
1862
1863#if DEBUG
1864static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1865{
1866 int i;
1867 for (i = 0; i < cachep->num; i++) {
1868 void *objp = index_to_obj(cachep, slabp, i);
1869
1870 if (cachep->flags & SLAB_POISON) {
1871#ifdef CONFIG_DEBUG_PAGEALLOC
1872 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1873 OFF_SLAB(cachep))
1874 kernel_map_pages(virt_to_page(objp),
1875 cachep->buffer_size / PAGE_SIZE, 1);
1876 else
1877 check_poison_obj(cachep, objp);
1878#else
1879 check_poison_obj(cachep, objp);
1880#endif
1881 }
1882 if (cachep->flags & SLAB_RED_ZONE) {
1883 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1884 slab_error(cachep, "start of a freed object "
1885 "was overwritten");
1886 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1887 slab_error(cachep, "end of a freed object "
1888 "was overwritten");
1889 }
1890 }
1891}
1892#else
1893static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1894{
1895}
1896#endif
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1908{
1909 void *addr = slabp->s_mem - slabp->colouroff;
1910
1911 slab_destroy_debugcheck(cachep, slabp);
1912 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1913 struct slab_rcu *slab_rcu;
1914
1915 slab_rcu = (struct slab_rcu *)slabp;
1916 slab_rcu->cachep = cachep;
1917 slab_rcu->addr = addr;
1918 call_rcu(&slab_rcu->head, kmem_rcu_free);
1919 } else {
1920 kmem_freepages(cachep, addr);
1921 if (OFF_SLAB(cachep))
1922 kmem_cache_free(cachep->slabp_cache, slabp);
1923 }
1924}
1925
1926static void __kmem_cache_destroy(struct kmem_cache *cachep)
1927{
1928 int i;
1929 struct kmem_list3 *l3;
1930
1931 for_each_online_cpu(i)
1932 kfree(cachep->array[i]);
1933
1934
1935 for_each_online_node(i) {
1936 l3 = cachep->nodelists[i];
1937 if (l3) {
1938 kfree(l3->shared);
1939 free_alien_cache(l3->alien);
1940 kfree(l3);
1941 }
1942 }
1943 kmem_cache_free(&cache_cache, cachep);
1944}
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960static size_t calculate_slab_order(struct kmem_cache *cachep,
1961 size_t size, size_t align, unsigned long flags)
1962{
1963 unsigned long offslab_limit;
1964 size_t left_over = 0;
1965 int gfporder;
1966
1967 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
1968 unsigned int num;
1969 size_t remainder;
1970
1971 cache_estimate(gfporder, size, align, flags, &remainder, &num);
1972 if (!num)
1973 continue;
1974
1975 if (flags & CFLGS_OFF_SLAB) {
1976
1977
1978
1979
1980
1981 offslab_limit = size - sizeof(struct slab);
1982 offslab_limit /= sizeof(kmem_bufctl_t);
1983
1984 if (num > offslab_limit)
1985 break;
1986 }
1987
1988
1989 cachep->num = num;
1990 cachep->gfporder = gfporder;
1991 left_over = remainder;
1992
1993
1994
1995
1996
1997
1998 if (flags & SLAB_RECLAIM_ACCOUNT)
1999 break;
2000
2001
2002
2003
2004
2005 if (gfporder >= slab_break_gfp_order)
2006 break;
2007
2008
2009
2010
2011 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2012 break;
2013 }
2014 return left_over;
2015}
2016
2017static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2018{
2019 if (g_cpucache_up == FULL)
2020 return enable_cpucache(cachep, gfp);
2021
2022 if (g_cpucache_up == NONE) {
2023
2024
2025
2026
2027
2028 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2029
2030
2031
2032
2033
2034
2035 set_up_list3s(cachep, SIZE_AC);
2036 if (INDEX_AC == INDEX_L3)
2037 g_cpucache_up = PARTIAL_L3;
2038 else
2039 g_cpucache_up = PARTIAL_AC;
2040 } else {
2041 cachep->array[smp_processor_id()] =
2042 kmalloc(sizeof(struct arraycache_init), gfp);
2043
2044 if (g_cpucache_up == PARTIAL_AC) {
2045 set_up_list3s(cachep, SIZE_L3);
2046 g_cpucache_up = PARTIAL_L3;
2047 } else {
2048 int node;
2049 for_each_online_node(node) {
2050 cachep->nodelists[node] =
2051 kmalloc_node(sizeof(struct kmem_list3),
2052 gfp, node);
2053 BUG_ON(!cachep->nodelists[node]);
2054 kmem_list3_init(cachep->nodelists[node]);
2055 }
2056 }
2057 }
2058 cachep->nodelists[numa_node_id()]->next_reap =
2059 jiffies + REAPTIMEOUT_LIST3 +
2060 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2061
2062 cpu_cache_get(cachep)->avail = 0;
2063 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2064 cpu_cache_get(cachep)->batchcount = 1;
2065 cpu_cache_get(cachep)->touched = 0;
2066 cachep->batchcount = 1;
2067 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2068 return 0;
2069}
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100struct kmem_cache *
2101kmem_cache_create (const char *name, size_t size, size_t align,
2102 unsigned long flags, void (*ctor)(void *))
2103{
2104 size_t left_over, slab_size, ralign;
2105 struct kmem_cache *cachep = NULL, *pc;
2106 gfp_t gfp;
2107
2108
2109
2110
2111 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2112 size > KMALLOC_MAX_SIZE) {
2113 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2114 name);
2115 BUG();
2116 }
2117
2118
2119
2120
2121
2122 if (slab_is_available()) {
2123 get_online_cpus();
2124 mutex_lock(&cache_chain_mutex);
2125 }
2126
2127 list_for_each_entry(pc, &cache_chain, next) {
2128 char tmp;
2129 int res;
2130
2131
2132
2133
2134
2135
2136 res = probe_kernel_address(pc->name, tmp);
2137 if (res) {
2138 printk(KERN_ERR
2139 "SLAB: cache with size %d has lost its name\n",
2140 pc->buffer_size);
2141 continue;
2142 }
2143
2144 if (!strcmp(pc->name, name)) {
2145 printk(KERN_ERR
2146 "kmem_cache_create: duplicate cache %s\n", name);
2147 dump_stack();
2148 goto oops;
2149 }
2150 }
2151
2152#if DEBUG
2153 WARN_ON(strchr(name, ' '));
2154#if FORCED_DEBUG
2155
2156
2157
2158
2159
2160
2161 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2162 2 * sizeof(unsigned long long)))
2163 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2164 if (!(flags & SLAB_DESTROY_BY_RCU))
2165 flags |= SLAB_POISON;
2166#endif
2167 if (flags & SLAB_DESTROY_BY_RCU)
2168 BUG_ON(flags & SLAB_POISON);
2169#endif
2170
2171
2172
2173
2174 BUG_ON(flags & ~CREATE_MASK);
2175
2176
2177
2178
2179
2180
2181 if (size & (BYTES_PER_WORD - 1)) {
2182 size += (BYTES_PER_WORD - 1);
2183 size &= ~(BYTES_PER_WORD - 1);
2184 }
2185
2186
2187
2188
2189 if (flags & SLAB_HWCACHE_ALIGN) {
2190
2191
2192
2193
2194
2195 ralign = cache_line_size();
2196 while (size <= ralign / 2)
2197 ralign /= 2;
2198 } else {
2199 ralign = BYTES_PER_WORD;
2200 }
2201
2202
2203
2204
2205
2206
2207 if (flags & SLAB_STORE_USER)
2208 ralign = BYTES_PER_WORD;
2209
2210 if (flags & SLAB_RED_ZONE) {
2211 ralign = REDZONE_ALIGN;
2212
2213
2214 size += REDZONE_ALIGN - 1;
2215 size &= ~(REDZONE_ALIGN - 1);
2216 }
2217
2218
2219 if (ralign < ARCH_SLAB_MINALIGN) {
2220 ralign = ARCH_SLAB_MINALIGN;
2221 }
2222
2223 if (ralign < align) {
2224 ralign = align;
2225 }
2226
2227 if (ralign > __alignof__(unsigned long long))
2228 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2229
2230
2231
2232 align = ralign;
2233
2234 if (slab_is_available())
2235 gfp = GFP_KERNEL;
2236 else
2237 gfp = GFP_NOWAIT;
2238
2239
2240 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2241 if (!cachep)
2242 goto oops;
2243
2244#if DEBUG
2245 cachep->obj_size = size;
2246
2247
2248
2249
2250
2251 if (flags & SLAB_RED_ZONE) {
2252
2253 cachep->obj_offset += sizeof(unsigned long long);
2254 size += 2 * sizeof(unsigned long long);
2255 }
2256 if (flags & SLAB_STORE_USER) {
2257
2258
2259
2260
2261 if (flags & SLAB_RED_ZONE)
2262 size += REDZONE_ALIGN;
2263 else
2264 size += BYTES_PER_WORD;
2265 }
2266#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2267 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2268 && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
2269 cachep->obj_offset += PAGE_SIZE - size;
2270 size = PAGE_SIZE;
2271 }
2272#endif
2273#endif
2274
2275
2276
2277
2278
2279
2280
2281 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2282 !(flags & SLAB_NOLEAKTRACE))
2283
2284
2285
2286
2287 flags |= CFLGS_OFF_SLAB;
2288
2289 size = ALIGN(size, align);
2290
2291 left_over = calculate_slab_order(cachep, size, align, flags);
2292
2293 if (!cachep->num) {
2294 printk(KERN_ERR
2295 "kmem_cache_create: couldn't create cache %s.\n", name);
2296 kmem_cache_free(&cache_cache, cachep);
2297 cachep = NULL;
2298 goto oops;
2299 }
2300 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2301 + sizeof(struct slab), align);
2302
2303
2304
2305
2306
2307 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2308 flags &= ~CFLGS_OFF_SLAB;
2309 left_over -= slab_size;
2310 }
2311
2312 if (flags & CFLGS_OFF_SLAB) {
2313
2314 slab_size =
2315 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2316
2317#ifdef CONFIG_PAGE_POISONING
2318
2319
2320
2321
2322 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2323 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2324#endif
2325 }
2326
2327 cachep->colour_off = cache_line_size();
2328
2329 if (cachep->colour_off < align)
2330 cachep->colour_off = align;
2331 cachep->colour = left_over / cachep->colour_off;
2332 cachep->slab_size = slab_size;
2333 cachep->flags = flags;
2334 cachep->gfpflags = 0;
2335 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2336 cachep->gfpflags |= GFP_DMA;
2337 cachep->buffer_size = size;
2338 cachep->reciprocal_buffer_size = reciprocal_value(size);
2339
2340 if (flags & CFLGS_OFF_SLAB) {
2341 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2342
2343
2344
2345
2346
2347
2348
2349 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2350 }
2351 cachep->ctor = ctor;
2352 cachep->name = name;
2353
2354 if (setup_cpu_cache(cachep, gfp)) {
2355 __kmem_cache_destroy(cachep);
2356 cachep = NULL;
2357 goto oops;
2358 }
2359
2360
2361 list_add(&cachep->next, &cache_chain);
2362oops:
2363 if (!cachep && (flags & SLAB_PANIC))
2364 panic("kmem_cache_create(): failed to create slab `%s'\n",
2365 name);
2366 if (slab_is_available()) {
2367 mutex_unlock(&cache_chain_mutex);
2368 put_online_cpus();
2369 }
2370 return cachep;
2371}
2372EXPORT_SYMBOL(kmem_cache_create);
2373
2374#if DEBUG
2375static void check_irq_off(void)
2376{
2377 BUG_ON(!irqs_disabled());
2378}
2379
2380static void check_irq_on(void)
2381{
2382 BUG_ON(irqs_disabled());
2383}
2384
2385static void check_spinlock_acquired(struct kmem_cache *cachep)
2386{
2387#ifdef CONFIG_SMP
2388 check_irq_off();
2389 assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
2390#endif
2391}
2392
2393static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2394{
2395#ifdef CONFIG_SMP
2396 check_irq_off();
2397 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2398#endif
2399}
2400
2401#else
2402#define check_irq_off() do { } while(0)
2403#define check_irq_on() do { } while(0)
2404#define check_spinlock_acquired(x) do { } while(0)
2405#define check_spinlock_acquired_node(x, y) do { } while(0)
2406#endif
2407
2408static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2409 struct array_cache *ac,
2410 int force, int node);
2411
2412static void do_drain(void *arg)
2413{
2414 struct kmem_cache *cachep = arg;
2415 struct array_cache *ac;
2416 int node = numa_node_id();
2417
2418 check_irq_off();
2419 ac = cpu_cache_get(cachep);
2420 spin_lock(&cachep->nodelists[node]->list_lock);
2421 free_block(cachep, ac->entry, ac->avail, node);
2422 spin_unlock(&cachep->nodelists[node]->list_lock);
2423 ac->avail = 0;
2424}
2425
2426static void drain_cpu_caches(struct kmem_cache *cachep)
2427{
2428 struct kmem_list3 *l3;
2429 int node;
2430
2431 on_each_cpu(do_drain, cachep, 1);
2432 check_irq_on();
2433 for_each_online_node(node) {
2434 l3 = cachep->nodelists[node];
2435 if (l3 && l3->alien)
2436 drain_alien_cache(cachep, l3->alien);
2437 }
2438
2439 for_each_online_node(node) {
2440 l3 = cachep->nodelists[node];
2441 if (l3)
2442 drain_array(cachep, l3, l3->shared, 1, node);
2443 }
2444}
2445
2446
2447
2448
2449
2450
2451
2452static int drain_freelist(struct kmem_cache *cache,
2453 struct kmem_list3 *l3, int tofree)
2454{
2455 struct list_head *p;
2456 int nr_freed;
2457 struct slab *slabp;
2458
2459 nr_freed = 0;
2460 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2461
2462 spin_lock_irq(&l3->list_lock);
2463 p = l3->slabs_free.prev;
2464 if (p == &l3->slabs_free) {
2465 spin_unlock_irq(&l3->list_lock);
2466 goto out;
2467 }
2468
2469 slabp = list_entry(p, struct slab, list);
2470#if DEBUG
2471 BUG_ON(slabp->inuse);
2472#endif
2473 list_del(&slabp->list);
2474
2475
2476
2477
2478 l3->free_objects -= cache->num;
2479 spin_unlock_irq(&l3->list_lock);
2480 slab_destroy(cache, slabp);
2481 nr_freed++;
2482 }
2483out:
2484 return nr_freed;
2485}
2486
2487
2488static int __cache_shrink(struct kmem_cache *cachep)
2489{
2490 int ret = 0, i = 0;
2491 struct kmem_list3 *l3;
2492
2493 drain_cpu_caches(cachep);
2494
2495 check_irq_on();
2496 for_each_online_node(i) {
2497 l3 = cachep->nodelists[i];
2498 if (!l3)
2499 continue;
2500
2501 drain_freelist(cachep, l3, l3->free_objects);
2502
2503 ret += !list_empty(&l3->slabs_full) ||
2504 !list_empty(&l3->slabs_partial);
2505 }
2506 return (ret ? 1 : 0);
2507}
2508
2509
2510
2511
2512
2513
2514
2515
2516int kmem_cache_shrink(struct kmem_cache *cachep)
2517{
2518 int ret;
2519 BUG_ON(!cachep || in_interrupt());
2520
2521 get_online_cpus();
2522 mutex_lock(&cache_chain_mutex);
2523 ret = __cache_shrink(cachep);
2524 mutex_unlock(&cache_chain_mutex);
2525 put_online_cpus();
2526 return ret;
2527}
2528EXPORT_SYMBOL(kmem_cache_shrink);
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546void kmem_cache_destroy(struct kmem_cache *cachep)
2547{
2548 BUG_ON(!cachep || in_interrupt());
2549
2550
2551 get_online_cpus();
2552 mutex_lock(&cache_chain_mutex);
2553
2554
2555
2556 list_del(&cachep->next);
2557 if (__cache_shrink(cachep)) {
2558 slab_error(cachep, "Can't free all objects");
2559 list_add(&cachep->next, &cache_chain);
2560 mutex_unlock(&cache_chain_mutex);
2561 put_online_cpus();
2562 return;
2563 }
2564
2565 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2566 rcu_barrier();
2567
2568 __kmem_cache_destroy(cachep);
2569 mutex_unlock(&cache_chain_mutex);
2570 put_online_cpus();
2571}
2572EXPORT_SYMBOL(kmem_cache_destroy);
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2586 int colour_off, gfp_t local_flags,
2587 int nodeid)
2588{
2589 struct slab *slabp;
2590
2591 if (OFF_SLAB(cachep)) {
2592
2593 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2594 local_flags, nodeid);
2595
2596
2597
2598
2599
2600
2601 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2602 local_flags);
2603 if (!slabp)
2604 return NULL;
2605 } else {
2606 slabp = objp + colour_off;
2607 colour_off += cachep->slab_size;
2608 }
2609 slabp->inuse = 0;
2610 slabp->colouroff = colour_off;
2611 slabp->s_mem = objp + colour_off;
2612 slabp->nodeid = nodeid;
2613 slabp->free = 0;
2614 return slabp;
2615}
2616
2617static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2618{
2619 return (kmem_bufctl_t *) (slabp + 1);
2620}
2621
2622static void cache_init_objs(struct kmem_cache *cachep,
2623 struct slab *slabp)
2624{
2625 int i;
2626
2627 for (i = 0; i < cachep->num; i++) {
2628 void *objp = index_to_obj(cachep, slabp, i);
2629#if DEBUG
2630
2631 if (cachep->flags & SLAB_POISON)
2632 poison_obj(cachep, objp, POISON_FREE);
2633 if (cachep->flags & SLAB_STORE_USER)
2634 *dbg_userword(cachep, objp) = NULL;
2635
2636 if (cachep->flags & SLAB_RED_ZONE) {
2637 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2638 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2639 }
2640
2641
2642
2643
2644
2645 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2646 cachep->ctor(objp + obj_offset(cachep));
2647
2648 if (cachep->flags & SLAB_RED_ZONE) {
2649 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2650 slab_error(cachep, "constructor overwrote the"
2651 " end of an object");
2652 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2653 slab_error(cachep, "constructor overwrote the"
2654 " start of an object");
2655 }
2656 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2657 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2658 kernel_map_pages(virt_to_page(objp),
2659 cachep->buffer_size / PAGE_SIZE, 0);
2660#else
2661 if (cachep->ctor)
2662 cachep->ctor(objp);
2663#endif
2664 slab_bufctl(slabp)[i] = i + 1;
2665 }
2666 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2667}
2668
2669static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2670{
2671 if (CONFIG_ZONE_DMA_FLAG) {
2672 if (flags & GFP_DMA)
2673 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2674 else
2675 BUG_ON(cachep->gfpflags & GFP_DMA);
2676 }
2677}
2678
2679static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2680 int nodeid)
2681{
2682 void *objp = index_to_obj(cachep, slabp, slabp->free);
2683 kmem_bufctl_t next;
2684
2685 slabp->inuse++;
2686 next = slab_bufctl(slabp)[slabp->free];
2687#if DEBUG
2688 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2689 WARN_ON(slabp->nodeid != nodeid);
2690#endif
2691 slabp->free = next;
2692
2693 return objp;
2694}
2695
2696static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2697 void *objp, int nodeid)
2698{
2699 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2700
2701#if DEBUG
2702
2703 WARN_ON(slabp->nodeid != nodeid);
2704
2705 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2706 printk(KERN_ERR "slab: double free detected in cache "
2707 "'%s', objp %p\n", cachep->name, objp);
2708 BUG();
2709 }
2710#endif
2711 slab_bufctl(slabp)[objnr] = slabp->free;
2712 slabp->free = objnr;
2713 slabp->inuse--;
2714}
2715
2716
2717
2718
2719
2720
2721static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2722 void *addr)
2723{
2724 int nr_pages;
2725 struct page *page;
2726
2727 page = virt_to_page(addr);
2728
2729 nr_pages = 1;
2730 if (likely(!PageCompound(page)))
2731 nr_pages <<= cache->gfporder;
2732
2733 do {
2734 page_set_cache(page, cache);
2735 page_set_slab(page, slab);
2736 page++;
2737 } while (--nr_pages);
2738}
2739
2740
2741
2742
2743
2744static int cache_grow(struct kmem_cache *cachep,
2745 gfp_t flags, int nodeid, void *objp)
2746{
2747 struct slab *slabp;
2748 size_t offset;
2749 gfp_t local_flags;
2750 struct kmem_list3 *l3;
2751
2752
2753
2754
2755
2756 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2757 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2758
2759
2760 check_irq_off();
2761 l3 = cachep->nodelists[nodeid];
2762 spin_lock(&l3->list_lock);
2763
2764
2765 offset = l3->colour_next;
2766 l3->colour_next++;
2767 if (l3->colour_next >= cachep->colour)
2768 l3->colour_next = 0;
2769 spin_unlock(&l3->list_lock);
2770
2771 offset *= cachep->colour_off;
2772
2773 if (local_flags & __GFP_WAIT)
2774 local_irq_enable();
2775
2776
2777
2778
2779
2780
2781
2782 kmem_flagcheck(cachep, flags);
2783
2784
2785
2786
2787
2788 if (!objp)
2789 objp = kmem_getpages(cachep, local_flags, nodeid);
2790 if (!objp)
2791 goto failed;
2792
2793
2794 slabp = alloc_slabmgmt(cachep, objp, offset,
2795 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2796 if (!slabp)
2797 goto opps1;
2798
2799 slab_map_pages(cachep, slabp, objp);
2800
2801 cache_init_objs(cachep, slabp);
2802
2803 if (local_flags & __GFP_WAIT)
2804 local_irq_disable();
2805 check_irq_off();
2806 spin_lock(&l3->list_lock);
2807
2808
2809 list_add_tail(&slabp->list, &(l3->slabs_free));
2810 STATS_INC_GROWN(cachep);
2811 l3->free_objects += cachep->num;
2812 spin_unlock(&l3->list_lock);
2813 return 1;
2814opps1:
2815 kmem_freepages(cachep, objp);
2816failed:
2817 if (local_flags & __GFP_WAIT)
2818 local_irq_disable();
2819 return 0;
2820}
2821
2822#if DEBUG
2823
2824
2825
2826
2827
2828
2829static void kfree_debugcheck(const void *objp)
2830{
2831 if (!virt_addr_valid(objp)) {
2832 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2833 (unsigned long)objp);
2834 BUG();
2835 }
2836}
2837
2838static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2839{
2840 unsigned long long redzone1, redzone2;
2841
2842 redzone1 = *dbg_redzone1(cache, obj);
2843 redzone2 = *dbg_redzone2(cache, obj);
2844
2845
2846
2847
2848 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2849 return;
2850
2851 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2852 slab_error(cache, "double free detected");
2853 else
2854 slab_error(cache, "memory outside object was overwritten");
2855
2856 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2857 obj, redzone1, redzone2);
2858}
2859
2860static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2861 void *caller)
2862{
2863 struct page *page;
2864 unsigned int objnr;
2865 struct slab *slabp;
2866
2867 BUG_ON(virt_to_cache(objp) != cachep);
2868
2869 objp -= obj_offset(cachep);
2870 kfree_debugcheck(objp);
2871 page = virt_to_head_page(objp);
2872
2873 slabp = page_get_slab(page);
2874
2875 if (cachep->flags & SLAB_RED_ZONE) {
2876 verify_redzone_free(cachep, objp);
2877 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2878 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2879 }
2880 if (cachep->flags & SLAB_STORE_USER)
2881 *dbg_userword(cachep, objp) = caller;
2882
2883 objnr = obj_to_index(cachep, slabp, objp);
2884
2885 BUG_ON(objnr >= cachep->num);
2886 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2887
2888#ifdef CONFIG_DEBUG_SLAB_LEAK
2889 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2890#endif
2891 if (cachep->flags & SLAB_POISON) {
2892#ifdef CONFIG_DEBUG_PAGEALLOC
2893 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2894 store_stackinfo(cachep, objp, (unsigned long)caller);
2895 kernel_map_pages(virt_to_page(objp),
2896 cachep->buffer_size / PAGE_SIZE, 0);
2897 } else {
2898 poison_obj(cachep, objp, POISON_FREE);
2899 }
2900#else
2901 poison_obj(cachep, objp, POISON_FREE);
2902#endif
2903 }
2904 return objp;
2905}
2906
2907static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2908{
2909 kmem_bufctl_t i;
2910 int entries = 0;
2911
2912
2913 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2914 entries++;
2915 if (entries > cachep->num || i >= cachep->num)
2916 goto bad;
2917 }
2918 if (entries != cachep->num - slabp->inuse) {
2919bad:
2920 printk(KERN_ERR "slab: Internal list corruption detected in "
2921 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2922 cachep->name, cachep->num, slabp, slabp->inuse);
2923 for (i = 0;
2924 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2925 i++) {
2926 if (i % 16 == 0)
2927 printk("\n%03x:", i);
2928 printk(" %02x", ((unsigned char *)slabp)[i]);
2929 }
2930 printk("\n");
2931 BUG();
2932 }
2933}
2934#else
2935#define kfree_debugcheck(x) do { } while(0)
2936#define cache_free_debugcheck(x,objp,z) (objp)
2937#define check_slabp(x,y) do { } while(0)
2938#endif
2939
2940static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2941{
2942 int batchcount;
2943 struct kmem_list3 *l3;
2944 struct array_cache *ac;
2945 int node;
2946
2947retry:
2948 check_irq_off();
2949 node = numa_node_id();
2950 ac = cpu_cache_get(cachep);
2951 batchcount = ac->batchcount;
2952 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2953
2954
2955
2956
2957
2958 batchcount = BATCHREFILL_LIMIT;
2959 }
2960 l3 = cachep->nodelists[node];
2961
2962 BUG_ON(ac->avail > 0 || !l3);
2963 spin_lock(&l3->list_lock);
2964
2965
2966 if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
2967 goto alloc_done;
2968
2969 while (batchcount > 0) {
2970 struct list_head *entry;
2971 struct slab *slabp;
2972
2973 entry = l3->slabs_partial.next;
2974 if (entry == &l3->slabs_partial) {
2975 l3->free_touched = 1;
2976 entry = l3->slabs_free.next;
2977 if (entry == &l3->slabs_free)
2978 goto must_grow;
2979 }
2980
2981 slabp = list_entry(entry, struct slab, list);
2982 check_slabp(cachep, slabp);
2983 check_spinlock_acquired(cachep);
2984
2985
2986
2987
2988
2989
2990 BUG_ON(slabp->inuse >= cachep->num);
2991
2992 while (slabp->inuse < cachep->num && batchcount--) {
2993 STATS_INC_ALLOCED(cachep);
2994 STATS_INC_ACTIVE(cachep);
2995 STATS_SET_HIGH(cachep);
2996
2997 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
2998 node);
2999 }
3000 check_slabp(cachep, slabp);
3001
3002
3003 list_del(&slabp->list);
3004 if (slabp->free == BUFCTL_END)
3005 list_add(&slabp->list, &l3->slabs_full);
3006 else
3007 list_add(&slabp->list, &l3->slabs_partial);
3008 }
3009
3010must_grow:
3011 l3->free_objects -= ac->avail;
3012alloc_done:
3013 spin_unlock(&l3->list_lock);
3014
3015 if (unlikely(!ac->avail)) {
3016 int x;
3017 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3018
3019
3020 ac = cpu_cache_get(cachep);
3021 if (!x && ac->avail == 0)
3022 return NULL;
3023
3024 if (!ac->avail)
3025 goto retry;
3026 }
3027 ac->touched = 1;
3028 return ac->entry[--ac->avail];
3029}
3030
3031static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3032 gfp_t flags)
3033{
3034 might_sleep_if(flags & __GFP_WAIT);
3035#if DEBUG
3036 kmem_flagcheck(cachep, flags);
3037#endif
3038}
3039
3040#if DEBUG
3041static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3042 gfp_t flags, void *objp, void *caller)
3043{
3044 if (!objp)
3045 return objp;
3046 if (cachep->flags & SLAB_POISON) {
3047#ifdef CONFIG_DEBUG_PAGEALLOC
3048 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3049 kernel_map_pages(virt_to_page(objp),
3050 cachep->buffer_size / PAGE_SIZE, 1);
3051 else
3052 check_poison_obj(cachep, objp);
3053#else
3054 check_poison_obj(cachep, objp);
3055#endif
3056 poison_obj(cachep, objp, POISON_INUSE);
3057 }
3058 if (cachep->flags & SLAB_STORE_USER)
3059 *dbg_userword(cachep, objp) = caller;
3060
3061 if (cachep->flags & SLAB_RED_ZONE) {
3062 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3063 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3064 slab_error(cachep, "double free, or memory outside"
3065 " object was overwritten");
3066 printk(KERN_ERR
3067 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3068 objp, *dbg_redzone1(cachep, objp),
3069 *dbg_redzone2(cachep, objp));
3070 }
3071 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3072 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3073 }
3074#ifdef CONFIG_DEBUG_SLAB_LEAK
3075 {
3076 struct slab *slabp;
3077 unsigned objnr;
3078
3079 slabp = page_get_slab(virt_to_head_page(objp));
3080 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3081 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3082 }
3083#endif
3084 objp += obj_offset(cachep);
3085 if (cachep->ctor && cachep->flags & SLAB_POISON)
3086 cachep->ctor(objp);
3087#if ARCH_SLAB_MINALIGN
3088 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3089 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3090 objp, ARCH_SLAB_MINALIGN);
3091 }
3092#endif
3093 return objp;
3094}
3095#else
3096#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3097#endif
3098
3099static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3100{
3101 if (cachep == &cache_cache)
3102 return false;
3103
3104 return should_failslab(obj_size(cachep), flags);
3105}
3106
3107static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3108{
3109 void *objp;
3110 struct array_cache *ac;
3111
3112 check_irq_off();
3113
3114 ac = cpu_cache_get(cachep);
3115 if (likely(ac->avail)) {
3116 STATS_INC_ALLOCHIT(cachep);
3117 ac->touched = 1;
3118 objp = ac->entry[--ac->avail];
3119 } else {
3120 STATS_INC_ALLOCMISS(cachep);
3121 objp = cache_alloc_refill(cachep, flags);
3122
3123
3124
3125
3126 ac = cpu_cache_get(cachep);
3127 }
3128
3129
3130
3131
3132
3133 if (objp)
3134 kmemleak_erase(&ac->entry[ac->avail]);
3135 return objp;
3136}
3137
3138#ifdef CONFIG_NUMA
3139
3140
3141
3142
3143
3144
3145static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3146{
3147 int nid_alloc, nid_here;
3148
3149 if (in_interrupt() || (flags & __GFP_THISNODE))
3150 return NULL;
3151 nid_alloc = nid_here = numa_node_id();
3152 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3153 nid_alloc = cpuset_mem_spread_node();
3154 else if (current->mempolicy)
3155 nid_alloc = slab_node(current->mempolicy);
3156 if (nid_alloc != nid_here)
3157 return ____cache_alloc_node(cachep, flags, nid_alloc);
3158 return NULL;
3159}
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3170{
3171 struct zonelist *zonelist;
3172 gfp_t local_flags;
3173 struct zoneref *z;
3174 struct zone *zone;
3175 enum zone_type high_zoneidx = gfp_zone(flags);
3176 void *obj = NULL;
3177 int nid;
3178
3179 if (flags & __GFP_THISNODE)
3180 return NULL;
3181
3182 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3183 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3184
3185retry:
3186
3187
3188
3189
3190 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3191 nid = zone_to_nid(zone);
3192
3193 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3194 cache->nodelists[nid] &&
3195 cache->nodelists[nid]->free_objects) {
3196 obj = ____cache_alloc_node(cache,
3197 flags | GFP_THISNODE, nid);
3198 if (obj)
3199 break;
3200 }
3201 }
3202
3203 if (!obj) {
3204
3205
3206
3207
3208
3209
3210 if (local_flags & __GFP_WAIT)
3211 local_irq_enable();
3212 kmem_flagcheck(cache, flags);
3213 obj = kmem_getpages(cache, local_flags, numa_node_id());
3214 if (local_flags & __GFP_WAIT)
3215 local_irq_disable();
3216 if (obj) {
3217
3218
3219
3220 nid = page_to_nid(virt_to_page(obj));
3221 if (cache_grow(cache, flags, nid, obj)) {
3222 obj = ____cache_alloc_node(cache,
3223 flags | GFP_THISNODE, nid);
3224 if (!obj)
3225
3226
3227
3228
3229
3230 goto retry;
3231 } else {
3232
3233 obj = NULL;
3234 }
3235 }
3236 }
3237 return obj;
3238}
3239
3240
3241
3242
3243static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3244 int nodeid)
3245{
3246 struct list_head *entry;
3247 struct slab *slabp;
3248 struct kmem_list3 *l3;
3249 void *obj;
3250 int x;
3251
3252 l3 = cachep->nodelists[nodeid];
3253 BUG_ON(!l3);
3254
3255retry:
3256 check_irq_off();
3257 spin_lock(&l3->list_lock);
3258 entry = l3->slabs_partial.next;
3259 if (entry == &l3->slabs_partial) {
3260 l3->free_touched = 1;
3261 entry = l3->slabs_free.next;
3262 if (entry == &l3->slabs_free)
3263 goto must_grow;
3264 }
3265
3266 slabp = list_entry(entry, struct slab, list);
3267 check_spinlock_acquired_node(cachep, nodeid);
3268 check_slabp(cachep, slabp);
3269
3270 STATS_INC_NODEALLOCS(cachep);
3271 STATS_INC_ACTIVE(cachep);
3272 STATS_SET_HIGH(cachep);
3273
3274 BUG_ON(slabp->inuse == cachep->num);
3275
3276 obj = slab_get_obj(cachep, slabp, nodeid);
3277 check_slabp(cachep, slabp);
3278 l3->free_objects--;
3279
3280 list_del(&slabp->list);
3281
3282 if (slabp->free == BUFCTL_END)
3283 list_add(&slabp->list, &l3->slabs_full);
3284 else
3285 list_add(&slabp->list, &l3->slabs_partial);
3286
3287 spin_unlock(&l3->list_lock);
3288 goto done;
3289
3290must_grow:
3291 spin_unlock(&l3->list_lock);
3292 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3293 if (x)
3294 goto retry;
3295
3296 return fallback_alloc(cachep, flags);
3297
3298done:
3299 return obj;
3300}
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314static __always_inline void *
3315__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3316 void *caller)
3317{
3318 unsigned long save_flags;
3319 void *ptr;
3320
3321 flags &= gfp_allowed_mask;
3322
3323 lockdep_trace_alloc(flags);
3324
3325 if (slab_should_failslab(cachep, flags))
3326 return NULL;
3327
3328 cache_alloc_debugcheck_before(cachep, flags);
3329 local_irq_save(save_flags);
3330
3331 if (nodeid == -1)
3332 nodeid = numa_node_id();
3333
3334 if (unlikely(!cachep->nodelists[nodeid])) {
3335
3336 ptr = fallback_alloc(cachep, flags);
3337 goto out;
3338 }
3339
3340 if (nodeid == numa_node_id()) {
3341
3342
3343
3344
3345
3346
3347 ptr = ____cache_alloc(cachep, flags);
3348 if (ptr)
3349 goto out;
3350 }
3351
3352 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3353 out:
3354 local_irq_restore(save_flags);
3355 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3356 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3357 flags);
3358
3359 if (likely(ptr))
3360 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3361
3362 if (unlikely((flags & __GFP_ZERO) && ptr))
3363 memset(ptr, 0, obj_size(cachep));
3364
3365 return ptr;
3366}
3367
3368static __always_inline void *
3369__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3370{
3371 void *objp;
3372
3373 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3374 objp = alternate_node_alloc(cache, flags);
3375 if (objp)
3376 goto out;
3377 }
3378 objp = ____cache_alloc(cache, flags);
3379
3380
3381
3382
3383
3384 if (!objp)
3385 objp = ____cache_alloc_node(cache, flags, numa_node_id());
3386
3387 out:
3388 return objp;
3389}
3390#else
3391
3392static __always_inline void *
3393__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3394{
3395 return ____cache_alloc(cachep, flags);
3396}
3397
3398#endif
3399
3400static __always_inline void *
3401__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3402{
3403 unsigned long save_flags;
3404 void *objp;
3405
3406 flags &= gfp_allowed_mask;
3407
3408 lockdep_trace_alloc(flags);
3409
3410 if (slab_should_failslab(cachep, flags))
3411 return NULL;
3412
3413 cache_alloc_debugcheck_before(cachep, flags);
3414 local_irq_save(save_flags);
3415 objp = __do_cache_alloc(cachep, flags);
3416 local_irq_restore(save_flags);
3417 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3418 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3419 flags);
3420 prefetchw(objp);
3421
3422 if (likely(objp))
3423 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3424
3425 if (unlikely((flags & __GFP_ZERO) && objp))
3426 memset(objp, 0, obj_size(cachep));
3427
3428 return objp;
3429}
3430
3431
3432
3433
3434static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3435 int node)
3436{
3437 int i;
3438 struct kmem_list3 *l3;
3439
3440 for (i = 0; i < nr_objects; i++) {
3441 void *objp = objpp[i];
3442 struct slab *slabp;
3443
3444 slabp = virt_to_slab(objp);
3445 l3 = cachep->nodelists[node];
3446 list_del(&slabp->list);
3447 check_spinlock_acquired_node(cachep, node);
3448 check_slabp(cachep, slabp);
3449 slab_put_obj(cachep, slabp, objp, node);
3450 STATS_DEC_ACTIVE(cachep);
3451 l3->free_objects++;
3452 check_slabp(cachep, slabp);
3453
3454
3455 if (slabp->inuse == 0) {
3456 if (l3->free_objects > l3->free_limit) {
3457 l3->free_objects -= cachep->num;
3458
3459
3460
3461
3462
3463
3464 slab_destroy(cachep, slabp);
3465 } else {
3466 list_add(&slabp->list, &l3->slabs_free);
3467 }
3468 } else {
3469
3470
3471
3472
3473 list_add_tail(&slabp->list, &l3->slabs_partial);
3474 }
3475 }
3476}
3477
3478static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3479{
3480 int batchcount;
3481 struct kmem_list3 *l3;
3482 int node = numa_node_id();
3483
3484 batchcount = ac->batchcount;
3485#if DEBUG
3486 BUG_ON(!batchcount || batchcount > ac->avail);
3487#endif
3488 check_irq_off();
3489 l3 = cachep->nodelists[node];
3490 spin_lock(&l3->list_lock);
3491 if (l3->shared) {
3492 struct array_cache *shared_array = l3->shared;
3493 int max = shared_array->limit - shared_array->avail;
3494 if (max) {
3495 if (batchcount > max)
3496 batchcount = max;
3497 memcpy(&(shared_array->entry[shared_array->avail]),
3498 ac->entry, sizeof(void *) * batchcount);
3499 shared_array->avail += batchcount;
3500 goto free_done;
3501 }
3502 }
3503
3504 free_block(cachep, ac->entry, batchcount, node);
3505free_done:
3506#if STATS
3507 {
3508 int i = 0;
3509 struct list_head *p;
3510
3511 p = l3->slabs_free.next;
3512 while (p != &(l3->slabs_free)) {
3513 struct slab *slabp;
3514
3515 slabp = list_entry(p, struct slab, list);
3516 BUG_ON(slabp->inuse);
3517
3518 i++;
3519 p = p->next;
3520 }
3521 STATS_SET_FREEABLE(cachep, i);
3522 }
3523#endif
3524 spin_unlock(&l3->list_lock);
3525 ac->avail -= batchcount;
3526 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3527}
3528
3529
3530
3531
3532
3533static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3534{
3535 struct array_cache *ac = cpu_cache_get(cachep);
3536
3537 check_irq_off();
3538 kmemleak_free_recursive(objp, cachep->flags);
3539 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3540
3541 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3542
3543
3544
3545
3546
3547
3548
3549
3550 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3551 return;
3552
3553 if (likely(ac->avail < ac->limit)) {
3554 STATS_INC_FREEHIT(cachep);
3555 ac->entry[ac->avail++] = objp;
3556 return;
3557 } else {
3558 STATS_INC_FREEMISS(cachep);
3559 cache_flusharray(cachep, ac);
3560 ac->entry[ac->avail++] = objp;
3561 }
3562}
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3573{
3574 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3575
3576 trace_kmem_cache_alloc(_RET_IP_, ret,
3577 obj_size(cachep), cachep->buffer_size, flags);
3578
3579 return ret;
3580}
3581EXPORT_SYMBOL(kmem_cache_alloc);
3582
3583#ifdef CONFIG_TRACING
3584void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3585{
3586 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3587}
3588EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3589#endif
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3605{
3606 unsigned long addr = (unsigned long)ptr;
3607 unsigned long min_addr = PAGE_OFFSET;
3608 unsigned long align_mask = BYTES_PER_WORD - 1;
3609 unsigned long size = cachep->buffer_size;
3610 struct page *page;
3611
3612 if (unlikely(addr < min_addr))
3613 goto out;
3614 if (unlikely(addr > (unsigned long)high_memory - size))
3615 goto out;
3616 if (unlikely(addr & align_mask))
3617 goto out;
3618 if (unlikely(!kern_addr_valid(addr)))
3619 goto out;
3620 if (unlikely(!kern_addr_valid(addr + size - 1)))
3621 goto out;
3622 page = virt_to_page(ptr);
3623 if (unlikely(!PageSlab(page)))
3624 goto out;
3625 if (unlikely(page_get_cache(page) != cachep))
3626 goto out;
3627 return 1;
3628out:
3629 return 0;
3630}
3631
3632#ifdef CONFIG_NUMA
3633void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3634{
3635 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3636 __builtin_return_address(0));
3637
3638 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3639 obj_size(cachep), cachep->buffer_size,
3640 flags, nodeid);
3641
3642 return ret;
3643}
3644EXPORT_SYMBOL(kmem_cache_alloc_node);
3645
3646#ifdef CONFIG_TRACING
3647void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
3648 gfp_t flags,
3649 int nodeid)
3650{
3651 return __cache_alloc_node(cachep, flags, nodeid,
3652 __builtin_return_address(0));
3653}
3654EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
3655#endif
3656
3657static __always_inline void *
3658__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3659{
3660 struct kmem_cache *cachep;
3661 void *ret;
3662
3663 cachep = kmem_find_general_cachep(size, flags);
3664 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3665 return cachep;
3666 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
3667
3668 trace_kmalloc_node((unsigned long) caller, ret,
3669 size, cachep->buffer_size, flags, node);
3670
3671 return ret;
3672}
3673
3674#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3675void *__kmalloc_node(size_t size, gfp_t flags, int node)
3676{
3677 return __do_kmalloc_node(size, flags, node,
3678 __builtin_return_address(0));
3679}
3680EXPORT_SYMBOL(__kmalloc_node);
3681
3682void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3683 int node, unsigned long caller)
3684{
3685 return __do_kmalloc_node(size, flags, node, (void *)caller);
3686}
3687EXPORT_SYMBOL(__kmalloc_node_track_caller);
3688#else
3689void *__kmalloc_node(size_t size, gfp_t flags, int node)
3690{
3691 return __do_kmalloc_node(size, flags, node, NULL);
3692}
3693EXPORT_SYMBOL(__kmalloc_node);
3694#endif
3695#endif
3696
3697
3698
3699
3700
3701
3702
3703static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3704 void *caller)
3705{
3706 struct kmem_cache *cachep;
3707 void *ret;
3708
3709
3710
3711
3712
3713
3714 cachep = __find_general_cachep(size, flags);
3715 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3716 return cachep;
3717 ret = __cache_alloc(cachep, flags, caller);
3718
3719 trace_kmalloc((unsigned long) caller, ret,
3720 size, cachep->buffer_size, flags);
3721
3722 return ret;
3723}
3724
3725
3726#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3727void *__kmalloc(size_t size, gfp_t flags)
3728{
3729 return __do_kmalloc(size, flags, __builtin_return_address(0));
3730}
3731EXPORT_SYMBOL(__kmalloc);
3732
3733void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3734{
3735 return __do_kmalloc(size, flags, (void *)caller);
3736}
3737EXPORT_SYMBOL(__kmalloc_track_caller);
3738
3739#else
3740void *__kmalloc(size_t size, gfp_t flags)
3741{
3742 return __do_kmalloc(size, flags, NULL);
3743}
3744EXPORT_SYMBOL(__kmalloc);
3745#endif
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3756{
3757 unsigned long flags;
3758
3759 local_irq_save(flags);
3760 debug_check_no_locks_freed(objp, obj_size(cachep));
3761 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3762 debug_check_no_obj_freed(objp, obj_size(cachep));
3763 __cache_free(cachep, objp);
3764 local_irq_restore(flags);
3765
3766 trace_kmem_cache_free(_RET_IP_, objp);
3767}
3768EXPORT_SYMBOL(kmem_cache_free);
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779void kfree(const void *objp)
3780{
3781 struct kmem_cache *c;
3782 unsigned long flags;
3783
3784 trace_kfree(_RET_IP_, objp);
3785
3786 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3787 return;
3788 local_irq_save(flags);
3789 kfree_debugcheck(objp);
3790 c = virt_to_cache(objp);
3791 debug_check_no_locks_freed(objp, obj_size(c));
3792 debug_check_no_obj_freed(objp, obj_size(c));
3793 __cache_free(c, (void *)objp);
3794 local_irq_restore(flags);
3795}
3796EXPORT_SYMBOL(kfree);
3797
3798unsigned int kmem_cache_size(struct kmem_cache *cachep)
3799{
3800 return obj_size(cachep);
3801}
3802EXPORT_SYMBOL(kmem_cache_size);
3803
3804const char *kmem_cache_name(struct kmem_cache *cachep)
3805{
3806 return cachep->name;
3807}
3808EXPORT_SYMBOL_GPL(kmem_cache_name);
3809
3810
3811
3812
3813static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3814{
3815 int node;
3816 struct kmem_list3 *l3;
3817 struct array_cache *new_shared;
3818 struct array_cache **new_alien = NULL;
3819
3820 for_each_online_node(node) {
3821
3822 if (use_alien_caches) {
3823 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3824 if (!new_alien)
3825 goto fail;
3826 }
3827
3828 new_shared = NULL;
3829 if (cachep->shared) {
3830 new_shared = alloc_arraycache(node,
3831 cachep->shared*cachep->batchcount,
3832 0xbaadf00d, gfp);
3833 if (!new_shared) {
3834 free_alien_cache(new_alien);
3835 goto fail;
3836 }
3837 }
3838
3839 l3 = cachep->nodelists[node];
3840 if (l3) {
3841 struct array_cache *shared = l3->shared;
3842
3843 spin_lock_irq(&l3->list_lock);
3844
3845 if (shared)
3846 free_block(cachep, shared->entry,
3847 shared->avail, node);
3848
3849 l3->shared = new_shared;
3850 if (!l3->alien) {
3851 l3->alien = new_alien;
3852 new_alien = NULL;
3853 }
3854 l3->free_limit = (1 + nr_cpus_node(node)) *
3855 cachep->batchcount + cachep->num;
3856 spin_unlock_irq(&l3->list_lock);
3857 kfree(shared);
3858 free_alien_cache(new_alien);
3859 continue;
3860 }
3861 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3862 if (!l3) {
3863 free_alien_cache(new_alien);
3864 kfree(new_shared);
3865 goto fail;
3866 }
3867
3868 kmem_list3_init(l3);
3869 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3870 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3871 l3->shared = new_shared;
3872 l3->alien = new_alien;
3873 l3->free_limit = (1 + nr_cpus_node(node)) *
3874 cachep->batchcount + cachep->num;
3875 cachep->nodelists[node] = l3;
3876 }
3877 return 0;
3878
3879fail:
3880 if (!cachep->next.next) {
3881
3882 node--;
3883 while (node >= 0) {
3884 if (cachep->nodelists[node]) {
3885 l3 = cachep->nodelists[node];
3886
3887 kfree(l3->shared);
3888 free_alien_cache(l3->alien);
3889 kfree(l3);
3890 cachep->nodelists[node] = NULL;
3891 }
3892 node--;
3893 }
3894 }
3895 return -ENOMEM;
3896}
3897
3898struct ccupdate_struct {
3899 struct kmem_cache *cachep;
3900 struct array_cache *new[NR_CPUS];
3901};
3902
3903static void do_ccupdate_local(void *info)
3904{
3905 struct ccupdate_struct *new = info;
3906 struct array_cache *old;
3907
3908 check_irq_off();
3909 old = cpu_cache_get(new->cachep);
3910
3911 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3912 new->new[smp_processor_id()] = old;
3913}
3914
3915
3916static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3917 int batchcount, int shared, gfp_t gfp)
3918{
3919 struct ccupdate_struct *new;
3920 int i;
3921
3922 new = kzalloc(sizeof(*new), gfp);
3923 if (!new)
3924 return -ENOMEM;
3925
3926 for_each_online_cpu(i) {
3927 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3928 batchcount, gfp);
3929 if (!new->new[i]) {
3930 for (i--; i >= 0; i--)
3931 kfree(new->new[i]);
3932 kfree(new);
3933 return -ENOMEM;
3934 }
3935 }
3936 new->cachep = cachep;
3937
3938 on_each_cpu(do_ccupdate_local, (void *)new, 1);
3939
3940 check_irq_on();
3941 cachep->batchcount = batchcount;
3942 cachep->limit = limit;
3943 cachep->shared = shared;
3944
3945 for_each_online_cpu(i) {
3946 struct array_cache *ccold = new->new[i];
3947 if (!ccold)
3948 continue;
3949 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3950 free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
3951 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3952 kfree(ccold);
3953 }
3954 kfree(new);
3955 return alloc_kmemlist(cachep, gfp);
3956}
3957
3958
3959static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3960{
3961 int err;
3962 int limit, shared;
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973 if (cachep->buffer_size > 131072)
3974 limit = 1;
3975 else if (cachep->buffer_size > PAGE_SIZE)
3976 limit = 8;
3977 else if (cachep->buffer_size > 1024)
3978 limit = 24;
3979 else if (cachep->buffer_size > 256)
3980 limit = 54;
3981 else
3982 limit = 120;
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993 shared = 0;
3994 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
3995 shared = 8;
3996
3997#if DEBUG
3998
3999
4000
4001
4002 if (limit > 32)
4003 limit = 32;
4004#endif
4005 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
4006 if (err)
4007 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4008 cachep->name, -err);
4009 return err;
4010}
4011
4012
4013
4014
4015
4016
4017void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4018 struct array_cache *ac, int force, int node)
4019{
4020 int tofree;
4021
4022 if (!ac || !ac->avail)
4023 return;
4024 if (ac->touched && !force) {
4025 ac->touched = 0;
4026 } else {
4027 spin_lock_irq(&l3->list_lock);
4028 if (ac->avail) {
4029 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4030 if (tofree > ac->avail)
4031 tofree = (ac->avail + 1) / 2;
4032 free_block(cachep, ac->entry, tofree, node);
4033 ac->avail -= tofree;
4034 memmove(ac->entry, &(ac->entry[tofree]),
4035 sizeof(void *) * ac->avail);
4036 }
4037 spin_unlock_irq(&l3->list_lock);
4038 }
4039}
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053static void cache_reap(struct work_struct *w)
4054{
4055 struct kmem_cache *searchp;
4056 struct kmem_list3 *l3;
4057 int node = numa_node_id();
4058 struct delayed_work *work = to_delayed_work(w);
4059
4060 if (!mutex_trylock(&cache_chain_mutex))
4061
4062 goto out;
4063
4064 list_for_each_entry(searchp, &cache_chain, next) {
4065 check_irq_on();
4066
4067
4068
4069
4070
4071
4072 l3 = searchp->nodelists[node];
4073
4074 reap_alien(searchp, l3);
4075
4076 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4077
4078
4079
4080
4081
4082 if (time_after(l3->next_reap, jiffies))
4083 goto next;
4084
4085 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4086
4087 drain_array(searchp, l3, l3->shared, 0, node);
4088
4089 if (l3->free_touched)
4090 l3->free_touched = 0;
4091 else {
4092 int freed;
4093
4094 freed = drain_freelist(searchp, l3, (l3->free_limit +
4095 5 * searchp->num - 1) / (5 * searchp->num));
4096 STATS_ADD_REAPED(searchp, freed);
4097 }
4098next:
4099 cond_resched();
4100 }
4101 check_irq_on();
4102 mutex_unlock(&cache_chain_mutex);
4103 next_reap_node();
4104out:
4105
4106 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4107}
4108
4109#ifdef CONFIG_SLABINFO
4110
4111static void print_slabinfo_header(struct seq_file *m)
4112{
4113
4114
4115
4116
4117#if STATS
4118 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4119#else
4120 seq_puts(m, "slabinfo - version: 2.1\n");
4121#endif
4122 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4123 "<objperslab> <pagesperslab>");
4124 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4125 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4126#if STATS
4127 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4128 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4129 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4130#endif
4131 seq_putc(m, '\n');
4132}
4133
4134static void *s_start(struct seq_file *m, loff_t *pos)
4135{
4136 loff_t n = *pos;
4137
4138 mutex_lock(&cache_chain_mutex);
4139 if (!n)
4140 print_slabinfo_header(m);
4141
4142 return seq_list_start(&cache_chain, *pos);
4143}
4144
4145static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4146{
4147 return seq_list_next(p, &cache_chain, pos);
4148}
4149
4150static void s_stop(struct seq_file *m, void *p)
4151{
4152 mutex_unlock(&cache_chain_mutex);
4153}
4154
4155static int s_show(struct seq_file *m, void *p)
4156{
4157 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4158 struct slab *slabp;
4159 unsigned long active_objs;
4160 unsigned long num_objs;
4161 unsigned long active_slabs = 0;
4162 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4163 const char *name;
4164 char *error = NULL;
4165 int node;
4166 struct kmem_list3 *l3;
4167
4168 active_objs = 0;
4169 num_slabs = 0;
4170 for_each_online_node(node) {
4171 l3 = cachep->nodelists[node];
4172 if (!l3)
4173 continue;
4174
4175 check_irq_on();
4176 spin_lock_irq(&l3->list_lock);
4177
4178 list_for_each_entry(slabp, &l3->slabs_full, list) {
4179 if (slabp->inuse != cachep->num && !error)
4180 error = "slabs_full accounting error";
4181 active_objs += cachep->num;
4182 active_slabs++;
4183 }
4184 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4185 if (slabp->inuse == cachep->num && !error)
4186 error = "slabs_partial inuse accounting error";
4187 if (!slabp->inuse && !error)
4188 error = "slabs_partial/inuse accounting error";
4189 active_objs += slabp->inuse;
4190 active_slabs++;
4191 }
4192 list_for_each_entry(slabp, &l3->slabs_free, list) {
4193 if (slabp->inuse && !error)
4194 error = "slabs_free/inuse accounting error";
4195 num_slabs++;
4196 }
4197 free_objects += l3->free_objects;
4198 if (l3->shared)
4199 shared_avail += l3->shared->avail;
4200
4201 spin_unlock_irq(&l3->list_lock);
4202 }
4203 num_slabs += active_slabs;
4204 num_objs = num_slabs * cachep->num;
4205 if (num_objs - active_objs != free_objects && !error)
4206 error = "free_objects accounting error";
4207
4208 name = cachep->name;
4209 if (error)
4210 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4211
4212 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4213 name, active_objs, num_objs, cachep->buffer_size,
4214 cachep->num, (1 << cachep->gfporder));
4215 seq_printf(m, " : tunables %4u %4u %4u",
4216 cachep->limit, cachep->batchcount, cachep->shared);
4217 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4218 active_slabs, num_slabs, shared_avail);
4219#if STATS
4220 {
4221 unsigned long high = cachep->high_mark;
4222 unsigned long allocs = cachep->num_allocations;
4223 unsigned long grown = cachep->grown;
4224 unsigned long reaped = cachep->reaped;
4225 unsigned long errors = cachep->errors;
4226 unsigned long max_freeable = cachep->max_freeable;
4227 unsigned long node_allocs = cachep->node_allocs;
4228 unsigned long node_frees = cachep->node_frees;
4229 unsigned long overflows = cachep->node_overflow;
4230
4231 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
4232 %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
4233 reaped, errors, max_freeable, node_allocs,
4234 node_frees, overflows);
4235 }
4236
4237 {
4238 unsigned long allochit = atomic_read(&cachep->allochit);
4239 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4240 unsigned long freehit = atomic_read(&cachep->freehit);
4241 unsigned long freemiss = atomic_read(&cachep->freemiss);
4242
4243 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4244 allochit, allocmiss, freehit, freemiss);
4245 }
4246#endif
4247 seq_putc(m, '\n');
4248 return 0;
4249}
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265static const struct seq_operations slabinfo_op = {
4266 .start = s_start,
4267 .next = s_next,
4268 .stop = s_stop,
4269 .show = s_show,
4270};
4271
4272#define MAX_SLABINFO_WRITE 128
4273
4274
4275
4276
4277
4278
4279
4280ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4281 size_t count, loff_t *ppos)
4282{
4283 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4284 int limit, batchcount, shared, res;
4285 struct kmem_cache *cachep;
4286
4287 if (count > MAX_SLABINFO_WRITE)
4288 return -EINVAL;
4289 if (copy_from_user(&kbuf, buffer, count))
4290 return -EFAULT;
4291 kbuf[MAX_SLABINFO_WRITE] = '\0';
4292
4293 tmp = strchr(kbuf, ' ');
4294 if (!tmp)
4295 return -EINVAL;
4296 *tmp = '\0';
4297 tmp++;
4298 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4299 return -EINVAL;
4300
4301
4302 mutex_lock(&cache_chain_mutex);
4303 res = -EINVAL;
4304 list_for_each_entry(cachep, &cache_chain, next) {
4305 if (!strcmp(cachep->name, kbuf)) {
4306 if (limit < 1 || batchcount < 1 ||
4307 batchcount > limit || shared < 0) {
4308 res = 0;
4309 } else {
4310 res = do_tune_cpucache(cachep, limit,
4311 batchcount, shared,
4312 GFP_KERNEL);
4313 }
4314 break;
4315 }
4316 }
4317 mutex_unlock(&cache_chain_mutex);
4318 if (res >= 0)
4319 res = count;
4320 return res;
4321}
4322
4323static int slabinfo_open(struct inode *inode, struct file *file)
4324{
4325 return seq_open(file, &slabinfo_op);
4326}
4327
4328static const struct file_operations proc_slabinfo_operations = {
4329 .open = slabinfo_open,
4330 .read = seq_read,
4331 .write = slabinfo_write,
4332 .llseek = seq_lseek,
4333 .release = seq_release,
4334};
4335
4336#ifdef CONFIG_DEBUG_SLAB_LEAK
4337
4338static void *leaks_start(struct seq_file *m, loff_t *pos)
4339{
4340 mutex_lock(&cache_chain_mutex);
4341 return seq_list_start(&cache_chain, *pos);
4342}
4343
4344static inline int add_caller(unsigned long *n, unsigned long v)
4345{
4346 unsigned long *p;
4347 int l;
4348 if (!v)
4349 return 1;
4350 l = n[1];
4351 p = n + 2;
4352 while (l) {
4353 int i = l/2;
4354 unsigned long *q = p + 2 * i;
4355 if (*q == v) {
4356 q[1]++;
4357 return 1;
4358 }
4359 if (*q > v) {
4360 l = i;
4361 } else {
4362 p = q + 2;
4363 l -= i + 1;
4364 }
4365 }
4366 if (++n[1] == n[0])
4367 return 0;
4368 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4369 p[0] = v;
4370 p[1] = 1;
4371 return 1;
4372}
4373
4374static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4375{
4376 void *p;
4377 int i;
4378 if (n[0] == n[1])
4379 return;
4380 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4381 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4382 continue;
4383 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4384 return;
4385 }
4386}
4387
4388static void show_symbol(struct seq_file *m, unsigned long address)
4389{
4390#ifdef CONFIG_KALLSYMS
4391 unsigned long offset, size;
4392 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4393
4394 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4395 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4396 if (modname[0])
4397 seq_printf(m, " [%s]", modname);
4398 return;
4399 }
4400#endif
4401 seq_printf(m, "%p", (void *)address);
4402}
4403
4404static int leaks_show(struct seq_file *m, void *p)
4405{
4406 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4407 struct slab *slabp;
4408 struct kmem_list3 *l3;
4409 const char *name;
4410 unsigned long *n = m->private;
4411 int node;
4412 int i;
4413
4414 if (!(cachep->flags & SLAB_STORE_USER))
4415 return 0;
4416 if (!(cachep->flags & SLAB_RED_ZONE))
4417 return 0;
4418
4419
4420
4421 n[1] = 0;
4422
4423 for_each_online_node(node) {
4424 l3 = cachep->nodelists[node];
4425 if (!l3)
4426 continue;
4427
4428 check_irq_on();
4429 spin_lock_irq(&l3->list_lock);
4430
4431 list_for_each_entry(slabp, &l3->slabs_full, list)
4432 handle_slab(n, cachep, slabp);
4433 list_for_each_entry(slabp, &l3->slabs_partial, list)
4434 handle_slab(n, cachep, slabp);
4435 spin_unlock_irq(&l3->list_lock);
4436 }
4437 name = cachep->name;
4438 if (n[0] == n[1]) {
4439
4440 mutex_unlock(&cache_chain_mutex);
4441 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4442 if (!m->private) {
4443
4444 m->private = n;
4445 mutex_lock(&cache_chain_mutex);
4446 return -ENOMEM;
4447 }
4448 *(unsigned long *)m->private = n[0] * 2;
4449 kfree(n);
4450 mutex_lock(&cache_chain_mutex);
4451
4452 m->count = m->size;
4453 return 0;
4454 }
4455 for (i = 0; i < n[1]; i++) {
4456 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4457 show_symbol(m, n[2*i+2]);
4458 seq_putc(m, '\n');
4459 }
4460
4461 return 0;
4462}
4463
4464static const struct seq_operations slabstats_op = {
4465 .start = leaks_start,
4466 .next = s_next,
4467 .stop = s_stop,
4468 .show = leaks_show,
4469};
4470
4471static int slabstats_open(struct inode *inode, struct file *file)
4472{
4473 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4474 int ret = -ENOMEM;
4475 if (n) {
4476 ret = seq_open(file, &slabstats_op);
4477 if (!ret) {
4478 struct seq_file *m = file->private_data;
4479 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4480 m->private = n;
4481 n = NULL;
4482 }
4483 kfree(n);
4484 }
4485 return ret;
4486}
4487
4488static const struct file_operations proc_slabstats_operations = {
4489 .open = slabstats_open,
4490 .read = seq_read,
4491 .llseek = seq_lseek,
4492 .release = seq_release_private,
4493};
4494#endif
4495
4496static int __init slab_proc_init(void)
4497{
4498 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4499#ifdef CONFIG_DEBUG_SLAB_LEAK
4500 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4501#endif
4502 return 0;
4503}
4504module_init(slab_proc_init);
4505#endif
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519size_t ksize(const void *objp)
4520{
4521 BUG_ON(!objp);
4522 if (unlikely(objp == ZERO_SIZE_PTR))
4523 return 0;
4524
4525 return obj_size(virt_to_cache(objp));
4526}
4527EXPORT_SYMBOL(ksize);
4528