1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/seq_file.h>
99#include <linux/notifier.h>
100#include <linux/kallsyms.h>
101#include <linux/cpu.h>
102#include <linux/sysctl.h>
103#include <linux/module.h>
104#include <linux/rcupdate.h>
105#include <linux/string.h>
106#include <linux/uaccess.h>
107#include <linux/nodemask.h>
108#include <linux/mempolicy.h>
109#include <linux/mutex.h>
110#include <linux/fault-inject.h>
111#include <linux/rtmutex.h>
112#include <linux/reciprocal_div.h>
113
114#include <asm/cacheflush.h>
115#include <asm/tlbflush.h>
116#include <asm/page.h>
117
118
119
120
121
122
123
124
125
126
127
128#ifdef CONFIG_DEBUG_SLAB
129#define DEBUG 1
130#define STATS 1
131#define FORCED_DEBUG 1
132#else
133#define DEBUG 0
134#define STATS 0
135#define FORCED_DEBUG 0
136#endif
137
138
139#define BYTES_PER_WORD sizeof(void *)
140#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
141
142#ifndef cache_line_size
143#define cache_line_size() L1_CACHE_BYTES
144#endif
145
146#ifndef ARCH_KMALLOC_MINALIGN
147
148
149
150
151
152
153
154
155
156#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
157#endif
158
159#ifndef ARCH_SLAB_MINALIGN
160
161
162
163
164
165
166
167#define ARCH_SLAB_MINALIGN 0
168#endif
169
170#ifndef ARCH_KMALLOC_FLAGS
171#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
172#endif
173
174
175#if DEBUG
176# define CREATE_MASK (SLAB_RED_ZONE | \
177 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
178 SLAB_CACHE_DMA | \
179 SLAB_STORE_USER | \
180 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
181 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
182#else
183# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
184 SLAB_CACHE_DMA | \
185 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
186 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD)
187#endif
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208typedef unsigned int kmem_bufctl_t;
209#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
210#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
211#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
212#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
213
214
215
216
217
218
219
220
221struct slab {
222 struct list_head list;
223 unsigned long colouroff;
224 void *s_mem;
225 unsigned int inuse;
226 kmem_bufctl_t free;
227 unsigned short nodeid;
228};
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246struct slab_rcu {
247 struct rcu_head head;
248 struct kmem_cache *cachep;
249 void *addr;
250};
251
252
253
254
255
256
257
258
259
260
261
262
263
264struct array_cache {
265 unsigned int avail;
266 unsigned int limit;
267 unsigned int batchcount;
268 unsigned int touched;
269 spinlock_t lock;
270 void *entry[0];
271
272
273
274
275
276};
277
278
279
280
281
282#define BOOT_CPUCACHE_ENTRIES 1
283struct arraycache_init {
284 struct array_cache cache;
285 void *entries[BOOT_CPUCACHE_ENTRIES];
286};
287
288
289
290
291struct kmem_list3 {
292 struct list_head slabs_partial;
293 struct list_head slabs_full;
294 struct list_head slabs_free;
295 unsigned long free_objects;
296 unsigned int free_limit;
297 unsigned int colour_next;
298 spinlock_t list_lock;
299 struct array_cache *shared;
300 struct array_cache **alien;
301 unsigned long next_reap;
302 int free_touched;
303};
304
305
306
307
308#define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1)
309struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
310#define CACHE_CACHE 0
311#define SIZE_AC 1
312#define SIZE_L3 (1 + MAX_NUMNODES)
313
314static int drain_freelist(struct kmem_cache *cache,
315 struct kmem_list3 *l3, int tofree);
316static void free_block(struct kmem_cache *cachep, void **objpp, int len,
317 int node);
318static int enable_cpucache(struct kmem_cache *cachep);
319static void cache_reap(struct work_struct *unused);
320
321
322
323
324
325static __always_inline int index_of(const size_t size)
326{
327 extern void __bad_size(void);
328
329 if (__builtin_constant_p(size)) {
330 int i = 0;
331
332#define CACHE(x) \
333 if (size <=x) \
334 return i; \
335 else \
336 i++;
337#include "linux/kmalloc_sizes.h"
338#undef CACHE
339 __bad_size();
340 } else
341 __bad_size();
342 return 0;
343}
344
345static int slab_early_init = 1;
346
347#define INDEX_AC index_of(sizeof(struct arraycache_init))
348#define INDEX_L3 index_of(sizeof(struct kmem_list3))
349
350static void kmem_list3_init(struct kmem_list3 *parent)
351{
352 INIT_LIST_HEAD(&parent->slabs_full);
353 INIT_LIST_HEAD(&parent->slabs_partial);
354 INIT_LIST_HEAD(&parent->slabs_free);
355 parent->shared = NULL;
356 parent->alien = NULL;
357 parent->colour_next = 0;
358 spin_lock_init(&parent->list_lock);
359 parent->free_objects = 0;
360 parent->free_touched = 0;
361}
362
363#define MAKE_LIST(cachep, listp, slab, nodeid) \
364 do { \
365 INIT_LIST_HEAD(listp); \
366 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
367 } while (0)
368
369#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
370 do { \
371 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
372 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
373 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
374 } while (0)
375
376
377
378
379
380
381
382struct kmem_cache {
383
384 struct array_cache *array[NR_CPUS];
385
386 unsigned int batchcount;
387 unsigned int limit;
388 unsigned int shared;
389
390 unsigned int buffer_size;
391 u32 reciprocal_buffer_size;
392
393
394 unsigned int flags;
395 unsigned int num;
396
397
398
399 unsigned int gfporder;
400
401
402 gfp_t gfpflags;
403
404 size_t colour;
405 unsigned int colour_off;
406 struct kmem_cache *slabp_cache;
407 unsigned int slab_size;
408 unsigned int dflags;
409
410
411 void (*ctor) (void *, struct kmem_cache *, unsigned long);
412
413
414 const char *name;
415 struct list_head next;
416
417
418#if STATS
419 unsigned long num_active;
420 unsigned long num_allocations;
421 unsigned long high_mark;
422 unsigned long grown;
423 unsigned long reaped;
424 unsigned long errors;
425 unsigned long max_freeable;
426 unsigned long node_allocs;
427 unsigned long node_frees;
428 unsigned long node_overflow;
429 atomic_t allochit;
430 atomic_t allocmiss;
431 atomic_t freehit;
432 atomic_t freemiss;
433#endif
434#if DEBUG
435
436
437
438
439
440
441 int obj_offset;
442 int obj_size;
443#endif
444
445
446
447
448
449
450
451 struct kmem_list3 *nodelists[MAX_NUMNODES];
452
453
454
455};
456
457#define CFLGS_OFF_SLAB (0x80000000UL)
458#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
459
460#define BATCHREFILL_LIMIT 16
461
462
463
464
465
466
467
468#define REAPTIMEOUT_CPUC (2*HZ)
469#define REAPTIMEOUT_LIST3 (4*HZ)
470
471#if STATS
472#define STATS_INC_ACTIVE(x) ((x)->num_active++)
473#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
474#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
475#define STATS_INC_GROWN(x) ((x)->grown++)
476#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
477#define STATS_SET_HIGH(x) \
478 do { \
479 if ((x)->num_active > (x)->high_mark) \
480 (x)->high_mark = (x)->num_active; \
481 } while (0)
482#define STATS_INC_ERR(x) ((x)->errors++)
483#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
484#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
485#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
486#define STATS_SET_FREEABLE(x, i) \
487 do { \
488 if ((x)->max_freeable < i) \
489 (x)->max_freeable = i; \
490 } while (0)
491#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
492#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
493#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
494#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
495#else
496#define STATS_INC_ACTIVE(x) do { } while (0)
497#define STATS_DEC_ACTIVE(x) do { } while (0)
498#define STATS_INC_ALLOCED(x) do { } while (0)
499#define STATS_INC_GROWN(x) do { } while (0)
500#define STATS_ADD_REAPED(x,y) do { } while (0)
501#define STATS_SET_HIGH(x) do { } while (0)
502#define STATS_INC_ERR(x) do { } while (0)
503#define STATS_INC_NODEALLOCS(x) do { } while (0)
504#define STATS_INC_NODEFREES(x) do { } while (0)
505#define STATS_INC_ACOVERFLOW(x) do { } while (0)
506#define STATS_SET_FREEABLE(x, i) do { } while (0)
507#define STATS_INC_ALLOCHIT(x) do { } while (0)
508#define STATS_INC_ALLOCMISS(x) do { } while (0)
509#define STATS_INC_FREEHIT(x) do { } while (0)
510#define STATS_INC_FREEMISS(x) do { } while (0)
511#endif
512
513#if DEBUG
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528static int obj_offset(struct kmem_cache *cachep)
529{
530 return cachep->obj_offset;
531}
532
533static int obj_size(struct kmem_cache *cachep)
534{
535 return cachep->obj_size;
536}
537
538static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
539{
540 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
541 return (unsigned long long*) (objp + obj_offset(cachep) -
542 sizeof(unsigned long long));
543}
544
545static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
546{
547 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
548 if (cachep->flags & SLAB_STORE_USER)
549 return (unsigned long long *)(objp + cachep->buffer_size -
550 sizeof(unsigned long long) -
551 REDZONE_ALIGN);
552 return (unsigned long long *) (objp + cachep->buffer_size -
553 sizeof(unsigned long long));
554}
555
556static void **dbg_userword(struct kmem_cache *cachep, void *objp)
557{
558 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
559 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
560}
561
562#else
563
564#define obj_offset(x) 0
565#define obj_size(cachep) (cachep->buffer_size)
566#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
567#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
568#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
569
570#endif
571
572
573
574
575#define BREAK_GFP_ORDER_HI 1
576#define BREAK_GFP_ORDER_LO 0
577static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
578
579
580
581
582
583
584static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
585{
586 page->lru.next = (struct list_head *)cache;
587}
588
589static inline struct kmem_cache *page_get_cache(struct page *page)
590{
591 page = compound_head(page);
592 BUG_ON(!PageSlab(page));
593 return (struct kmem_cache *)page->lru.next;
594}
595
596static inline void page_set_slab(struct page *page, struct slab *slab)
597{
598 page->lru.prev = (struct list_head *)slab;
599}
600
601static inline struct slab *page_get_slab(struct page *page)
602{
603 BUG_ON(!PageSlab(page));
604 return (struct slab *)page->lru.prev;
605}
606
607static inline struct kmem_cache *virt_to_cache(const void *obj)
608{
609 struct page *page = virt_to_head_page(obj);
610 return page_get_cache(page);
611}
612
613static inline struct slab *virt_to_slab(const void *obj)
614{
615 struct page *page = virt_to_head_page(obj);
616 return page_get_slab(page);
617}
618
619static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
620 unsigned int idx)
621{
622 return slab->s_mem + cache->buffer_size * idx;
623}
624
625
626
627
628
629
630
631static inline unsigned int obj_to_index(const struct kmem_cache *cache,
632 const struct slab *slab, void *obj)
633{
634 u32 offset = (obj - slab->s_mem);
635 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
636}
637
638
639
640
641struct cache_sizes malloc_sizes[] = {
642#define CACHE(x) { .cs_size = (x) },
643#include <linux/kmalloc_sizes.h>
644 CACHE(ULONG_MAX)
645#undef CACHE
646};
647EXPORT_SYMBOL(malloc_sizes);
648
649
650struct cache_names {
651 char *name;
652 char *name_dma;
653};
654
655static struct cache_names __initdata cache_names[] = {
656#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
657#include <linux/kmalloc_sizes.h>
658 {NULL,}
659#undef CACHE
660};
661
662static struct arraycache_init initarray_cache __initdata =
663 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
664static struct arraycache_init initarray_generic =
665 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
666
667
668static struct kmem_cache cache_cache = {
669 .batchcount = 1,
670 .limit = BOOT_CPUCACHE_ENTRIES,
671 .shared = 1,
672 .buffer_size = sizeof(struct kmem_cache),
673 .name = "kmem_cache",
674};
675
676#define BAD_ALIEN_MAGIC 0x01020304ul
677
678#ifdef CONFIG_LOCKDEP
679
680
681
682
683
684
685
686
687
688
689
690
691static struct lock_class_key on_slab_l3_key;
692static struct lock_class_key on_slab_alc_key;
693
694static inline void init_lock_keys(void)
695
696{
697 int q;
698 struct cache_sizes *s = malloc_sizes;
699
700 while (s->cs_size != ULONG_MAX) {
701 for_each_node(q) {
702 struct array_cache **alc;
703 int r;
704 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
705 if (!l3 || OFF_SLAB(s->cs_cachep))
706 continue;
707 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
708 alc = l3->alien;
709
710
711
712
713
714
715
716 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
717 continue;
718 for_each_node(r) {
719 if (alc[r])
720 lockdep_set_class(&alc[r]->lock,
721 &on_slab_alc_key);
722 }
723 }
724 s++;
725 }
726}
727#else
728static inline void init_lock_keys(void)
729{
730}
731#endif
732
733
734
735
736
737static DEFINE_MUTEX(cache_chain_mutex);
738static struct list_head cache_chain;
739
740
741
742
743
744static enum {
745 NONE,
746 PARTIAL_AC,
747 PARTIAL_L3,
748 FULL
749} g_cpucache_up;
750
751
752
753
754int slab_is_available(void)
755{
756 return g_cpucache_up == FULL;
757}
758
759static DEFINE_PER_CPU(struct delayed_work, reap_work);
760
761static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
762{
763 return cachep->array[smp_processor_id()];
764}
765
766static inline struct kmem_cache *__find_general_cachep(size_t size,
767 gfp_t gfpflags)
768{
769 struct cache_sizes *csizep = malloc_sizes;
770
771#if DEBUG
772
773
774
775
776 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
777#endif
778 while (size > csizep->cs_size)
779 csizep++;
780
781
782
783
784
785
786#ifdef CONFIG_ZONE_DMA
787 if (unlikely(gfpflags & GFP_DMA))
788 return csizep->cs_dmacachep;
789#endif
790 return csizep->cs_cachep;
791}
792
793static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
794{
795 return __find_general_cachep(size, gfpflags);
796}
797
798static size_t slab_mgmt_size(size_t nr_objs, size_t align)
799{
800 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
801}
802
803
804
805
806static void cache_estimate(unsigned long gfporder, size_t buffer_size,
807 size_t align, int flags, size_t *left_over,
808 unsigned int *num)
809{
810 int nr_objs;
811 size_t mgmt_size;
812 size_t slab_size = PAGE_SIZE << gfporder;
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829 if (flags & CFLGS_OFF_SLAB) {
830 mgmt_size = 0;
831 nr_objs = slab_size / buffer_size;
832
833 if (nr_objs > SLAB_LIMIT)
834 nr_objs = SLAB_LIMIT;
835 } else {
836
837
838
839
840
841
842
843
844 nr_objs = (slab_size - sizeof(struct slab)) /
845 (buffer_size + sizeof(kmem_bufctl_t));
846
847
848
849
850
851 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
852 > slab_size)
853 nr_objs--;
854
855 if (nr_objs > SLAB_LIMIT)
856 nr_objs = SLAB_LIMIT;
857
858 mgmt_size = slab_mgmt_size(nr_objs, align);
859 }
860 *num = nr_objs;
861 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
862}
863
864#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg)
865
866static void __slab_error(const char *function, struct kmem_cache *cachep,
867 char *msg)
868{
869 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
870 function, cachep->name, msg);
871 dump_stack();
872}
873
874
875
876
877
878
879
880
881
882static int use_alien_caches __read_mostly = 1;
883static int __init noaliencache_setup(char *s)
884{
885 use_alien_caches = 0;
886 return 1;
887}
888__setup("noaliencache", noaliencache_setup);
889
890#ifdef CONFIG_NUMA
891
892
893
894
895
896
897static DEFINE_PER_CPU(unsigned long, reap_node);
898
899static void init_reap_node(int cpu)
900{
901 int node;
902
903 node = next_node(cpu_to_node(cpu), node_online_map);
904 if (node == MAX_NUMNODES)
905 node = first_node(node_online_map);
906
907 per_cpu(reap_node, cpu) = node;
908}
909
910static void next_reap_node(void)
911{
912 int node = __get_cpu_var(reap_node);
913
914 node = next_node(node, node_online_map);
915 if (unlikely(node >= MAX_NUMNODES))
916 node = first_node(node_online_map);
917 __get_cpu_var(reap_node) = node;
918}
919
920#else
921#define init_reap_node(cpu) do { } while (0)
922#define next_reap_node(void) do { } while (0)
923#endif
924
925
926
927
928
929
930
931
932static void __devinit start_cpu_timer(int cpu)
933{
934 struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
935
936
937
938
939
940
941 if (keventd_up() && reap_work->work.func == NULL) {
942 init_reap_node(cpu);
943 INIT_DELAYED_WORK(reap_work, cache_reap);
944 schedule_delayed_work_on(cpu, reap_work,
945 __round_jiffies_relative(HZ, cpu));
946 }
947}
948
949static struct array_cache *alloc_arraycache(int node, int entries,
950 int batchcount)
951{
952 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
953 struct array_cache *nc = NULL;
954
955 nc = kmalloc_node(memsize, GFP_KERNEL, node);
956 if (nc) {
957 nc->avail = 0;
958 nc->limit = entries;
959 nc->batchcount = batchcount;
960 nc->touched = 0;
961 spin_lock_init(&nc->lock);
962 }
963 return nc;
964}
965
966
967
968
969
970
971
972static int transfer_objects(struct array_cache *to,
973 struct array_cache *from, unsigned int max)
974{
975
976 int nr = min(min(from->avail, max), to->limit - to->avail);
977
978 if (!nr)
979 return 0;
980
981 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
982 sizeof(void *) *nr);
983
984 from->avail -= nr;
985 to->avail += nr;
986 to->touched = 1;
987 return nr;
988}
989
990#ifndef CONFIG_NUMA
991
992#define drain_alien_cache(cachep, alien) do { } while (0)
993#define reap_alien(cachep, l3) do { } while (0)
994
995static inline struct array_cache **alloc_alien_cache(int node, int limit)
996{
997 return (struct array_cache **)BAD_ALIEN_MAGIC;
998}
999
1000static inline void free_alien_cache(struct array_cache **ac_ptr)
1001{
1002}
1003
1004static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1005{
1006 return 0;
1007}
1008
1009static inline void *alternate_node_alloc(struct kmem_cache *cachep,
1010 gfp_t flags)
1011{
1012 return NULL;
1013}
1014
1015static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1016 gfp_t flags, int nodeid)
1017{
1018 return NULL;
1019}
1020
1021#else
1022
1023static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1024static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1025
1026static struct array_cache **alloc_alien_cache(int node, int limit)
1027{
1028 struct array_cache **ac_ptr;
1029 int memsize = sizeof(void *) * nr_node_ids;
1030 int i;
1031
1032 if (limit > 1)
1033 limit = 12;
1034 ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
1035 if (ac_ptr) {
1036 for_each_node(i) {
1037 if (i == node || !node_online(i)) {
1038 ac_ptr[i] = NULL;
1039 continue;
1040 }
1041 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
1042 if (!ac_ptr[i]) {
1043 for (i--; i <= 0; i--)
1044 kfree(ac_ptr[i]);
1045 kfree(ac_ptr);
1046 return NULL;
1047 }
1048 }
1049 }
1050 return ac_ptr;
1051}
1052
1053static void free_alien_cache(struct array_cache **ac_ptr)
1054{
1055 int i;
1056
1057 if (!ac_ptr)
1058 return;
1059 for_each_node(i)
1060 kfree(ac_ptr[i]);
1061 kfree(ac_ptr);
1062}
1063
1064static void __drain_alien_cache(struct kmem_cache *cachep,
1065 struct array_cache *ac, int node)
1066{
1067 struct kmem_list3 *rl3 = cachep->nodelists[node];
1068
1069 if (ac->avail) {
1070 spin_lock(&rl3->list_lock);
1071
1072
1073
1074
1075
1076 if (rl3->shared)
1077 transfer_objects(rl3->shared, ac, ac->limit);
1078
1079 free_block(cachep, ac->entry, ac->avail, node);
1080 ac->avail = 0;
1081 spin_unlock(&rl3->list_lock);
1082 }
1083}
1084
1085
1086
1087
1088static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1089{
1090 int node = __get_cpu_var(reap_node);
1091
1092 if (l3->alien) {
1093 struct array_cache *ac = l3->alien[node];
1094
1095 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1096 __drain_alien_cache(cachep, ac, node);
1097 spin_unlock_irq(&ac->lock);
1098 }
1099 }
1100}
1101
1102static void drain_alien_cache(struct kmem_cache *cachep,
1103 struct array_cache **alien)
1104{
1105 int i = 0;
1106 struct array_cache *ac;
1107 unsigned long flags;
1108
1109 for_each_online_node(i) {
1110 ac = alien[i];
1111 if (ac) {
1112 spin_lock_irqsave(&ac->lock, flags);
1113 __drain_alien_cache(cachep, ac, i);
1114 spin_unlock_irqrestore(&ac->lock, flags);
1115 }
1116 }
1117}
1118
1119static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1120{
1121 struct slab *slabp = virt_to_slab(objp);
1122 int nodeid = slabp->nodeid;
1123 struct kmem_list3 *l3;
1124 struct array_cache *alien = NULL;
1125 int node;
1126
1127 node = numa_node_id();
1128
1129
1130
1131
1132
1133 if (likely(slabp->nodeid == node))
1134 return 0;
1135
1136 l3 = cachep->nodelists[node];
1137 STATS_INC_NODEFREES(cachep);
1138 if (l3->alien && l3->alien[nodeid]) {
1139 alien = l3->alien[nodeid];
1140 spin_lock(&alien->lock);
1141 if (unlikely(alien->avail == alien->limit)) {
1142 STATS_INC_ACOVERFLOW(cachep);
1143 __drain_alien_cache(cachep, alien, nodeid);
1144 }
1145 alien->entry[alien->avail++] = objp;
1146 spin_unlock(&alien->lock);
1147 } else {
1148 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1149 free_block(cachep, &objp, 1, nodeid);
1150 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1151 }
1152 return 1;
1153}
1154#endif
1155
1156static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1157 unsigned long action, void *hcpu)
1158{
1159 long cpu = (long)hcpu;
1160 struct kmem_cache *cachep;
1161 struct kmem_list3 *l3 = NULL;
1162 int node = cpu_to_node(cpu);
1163 int memsize = sizeof(struct kmem_list3);
1164
1165 switch (action) {
1166 case CPU_LOCK_ACQUIRE:
1167 mutex_lock(&cache_chain_mutex);
1168 break;
1169 case CPU_UP_PREPARE:
1170 case CPU_UP_PREPARE_FROZEN:
1171
1172
1173
1174
1175
1176
1177
1178 list_for_each_entry(cachep, &cache_chain, next) {
1179
1180
1181
1182
1183
1184 if (!cachep->nodelists[node]) {
1185 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1186 if (!l3)
1187 goto bad;
1188 kmem_list3_init(l3);
1189 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1190 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1191
1192
1193
1194
1195
1196
1197 cachep->nodelists[node] = l3;
1198 }
1199
1200 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1201 cachep->nodelists[node]->free_limit =
1202 (1 + nr_cpus_node(node)) *
1203 cachep->batchcount + cachep->num;
1204 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1205 }
1206
1207
1208
1209
1210
1211 list_for_each_entry(cachep, &cache_chain, next) {
1212 struct array_cache *nc;
1213 struct array_cache *shared = NULL;
1214 struct array_cache **alien = NULL;
1215
1216 nc = alloc_arraycache(node, cachep->limit,
1217 cachep->batchcount);
1218 if (!nc)
1219 goto bad;
1220 if (cachep->shared) {
1221 shared = alloc_arraycache(node,
1222 cachep->shared * cachep->batchcount,
1223 0xbaadf00d);
1224 if (!shared)
1225 goto bad;
1226 }
1227 if (use_alien_caches) {
1228 alien = alloc_alien_cache(node, cachep->limit);
1229 if (!alien)
1230 goto bad;
1231 }
1232 cachep->array[cpu] = nc;
1233 l3 = cachep->nodelists[node];
1234 BUG_ON(!l3);
1235
1236 spin_lock_irq(&l3->list_lock);
1237 if (!l3->shared) {
1238
1239
1240
1241
1242 l3->shared = shared;
1243 shared = NULL;
1244 }
1245#ifdef CONFIG_NUMA
1246 if (!l3->alien) {
1247 l3->alien = alien;
1248 alien = NULL;
1249 }
1250#endif
1251 spin_unlock_irq(&l3->list_lock);
1252 kfree(shared);
1253 free_alien_cache(alien);
1254 }
1255 break;
1256 case CPU_ONLINE:
1257 case CPU_ONLINE_FROZEN:
1258 start_cpu_timer(cpu);
1259 break;
1260#ifdef CONFIG_HOTPLUG_CPU
1261 case CPU_DOWN_PREPARE:
1262 case CPU_DOWN_PREPARE_FROZEN:
1263
1264
1265
1266
1267
1268
1269 cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
1270
1271 per_cpu(reap_work, cpu).work.func = NULL;
1272 break;
1273 case CPU_DOWN_FAILED:
1274 case CPU_DOWN_FAILED_FROZEN:
1275 start_cpu_timer(cpu);
1276 break;
1277 case CPU_DEAD:
1278 case CPU_DEAD_FROZEN:
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288#endif
1289 case CPU_UP_CANCELED:
1290 case CPU_UP_CANCELED_FROZEN:
1291 list_for_each_entry(cachep, &cache_chain, next) {
1292 struct array_cache *nc;
1293 struct array_cache *shared;
1294 struct array_cache **alien;
1295 cpumask_t mask;
1296
1297 mask = node_to_cpumask(node);
1298
1299 nc = cachep->array[cpu];
1300 cachep->array[cpu] = NULL;
1301 l3 = cachep->nodelists[node];
1302
1303 if (!l3)
1304 goto free_array_cache;
1305
1306 spin_lock_irq(&l3->list_lock);
1307
1308
1309 l3->free_limit -= cachep->batchcount;
1310 if (nc)
1311 free_block(cachep, nc->entry, nc->avail, node);
1312
1313 if (!cpus_empty(mask)) {
1314 spin_unlock_irq(&l3->list_lock);
1315 goto free_array_cache;
1316 }
1317
1318 shared = l3->shared;
1319 if (shared) {
1320 free_block(cachep, shared->entry,
1321 shared->avail, node);
1322 l3->shared = NULL;
1323 }
1324
1325 alien = l3->alien;
1326 l3->alien = NULL;
1327
1328 spin_unlock_irq(&l3->list_lock);
1329
1330 kfree(shared);
1331 if (alien) {
1332 drain_alien_cache(cachep, alien);
1333 free_alien_cache(alien);
1334 }
1335free_array_cache:
1336 kfree(nc);
1337 }
1338
1339
1340
1341
1342
1343 list_for_each_entry(cachep, &cache_chain, next) {
1344 l3 = cachep->nodelists[node];
1345 if (!l3)
1346 continue;
1347 drain_freelist(cachep, l3, l3->free_objects);
1348 }
1349 break;
1350 case CPU_LOCK_RELEASE:
1351 mutex_unlock(&cache_chain_mutex);
1352 break;
1353 }
1354 return NOTIFY_OK;
1355bad:
1356 return NOTIFY_BAD;
1357}
1358
1359static struct notifier_block __cpuinitdata cpucache_notifier = {
1360 &cpuup_callback, NULL, 0
1361};
1362
1363
1364
1365
1366static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1367 int nodeid)
1368{
1369 struct kmem_list3 *ptr;
1370
1371 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
1372 BUG_ON(!ptr);
1373
1374 local_irq_disable();
1375 memcpy(ptr, list, sizeof(struct kmem_list3));
1376
1377
1378
1379 spin_lock_init(&ptr->list_lock);
1380
1381 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1382 cachep->nodelists[nodeid] = ptr;
1383 local_irq_enable();
1384}
1385
1386
1387
1388
1389
1390void __init kmem_cache_init(void)
1391{
1392 size_t left_over;
1393 struct cache_sizes *sizes;
1394 struct cache_names *names;
1395 int i;
1396 int order;
1397 int node;
1398
1399 if (num_possible_nodes() == 1)
1400 use_alien_caches = 0;
1401
1402 for (i = 0; i < NUM_INIT_LISTS; i++) {
1403 kmem_list3_init(&initkmem_list3[i]);
1404 if (i < MAX_NUMNODES)
1405 cache_cache.nodelists[i] = NULL;
1406 }
1407
1408
1409
1410
1411
1412 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
1413 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435 node = numa_node_id();
1436
1437
1438 INIT_LIST_HEAD(&cache_chain);
1439 list_add(&cache_cache.next, &cache_chain);
1440 cache_cache.colour_off = cache_line_size();
1441 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1442 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE];
1443
1444
1445
1446
1447
1448 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1449 nr_node_ids * sizeof(struct kmem_list3 *);
1450#if DEBUG
1451 cache_cache.obj_size = cache_cache.buffer_size;
1452#endif
1453 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1454 cache_line_size());
1455 cache_cache.reciprocal_buffer_size =
1456 reciprocal_value(cache_cache.buffer_size);
1457
1458 for (order = 0; order < MAX_ORDER; order++) {
1459 cache_estimate(order, cache_cache.buffer_size,
1460 cache_line_size(), 0, &left_over, &cache_cache.num);
1461 if (cache_cache.num)
1462 break;
1463 }
1464 BUG_ON(!cache_cache.num);
1465 cache_cache.gfporder = order;
1466 cache_cache.colour = left_over / cache_cache.colour_off;
1467 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1468 sizeof(struct slab), cache_line_size());
1469
1470
1471 sizes = malloc_sizes;
1472 names = cache_names;
1473
1474
1475
1476
1477
1478
1479
1480 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1481 sizes[INDEX_AC].cs_size,
1482 ARCH_KMALLOC_MINALIGN,
1483 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1484 NULL, NULL);
1485
1486 if (INDEX_AC != INDEX_L3) {
1487 sizes[INDEX_L3].cs_cachep =
1488 kmem_cache_create(names[INDEX_L3].name,
1489 sizes[INDEX_L3].cs_size,
1490 ARCH_KMALLOC_MINALIGN,
1491 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1492 NULL, NULL);
1493 }
1494
1495 slab_early_init = 0;
1496
1497 while (sizes->cs_size != ULONG_MAX) {
1498
1499
1500
1501
1502
1503
1504
1505 if (!sizes->cs_cachep) {
1506 sizes->cs_cachep = kmem_cache_create(names->name,
1507 sizes->cs_size,
1508 ARCH_KMALLOC_MINALIGN,
1509 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1510 NULL, NULL);
1511 }
1512#ifdef CONFIG_ZONE_DMA
1513 sizes->cs_dmacachep = kmem_cache_create(
1514 names->name_dma,
1515 sizes->cs_size,
1516 ARCH_KMALLOC_MINALIGN,
1517 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1518 SLAB_PANIC,
1519 NULL, NULL);
1520#endif
1521 sizes++;
1522 names++;
1523 }
1524
1525 {
1526 struct array_cache *ptr;
1527
1528 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1529
1530 local_irq_disable();
1531 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1532 memcpy(ptr, cpu_cache_get(&cache_cache),
1533 sizeof(struct arraycache_init));
1534
1535
1536
1537 spin_lock_init(&ptr->lock);
1538
1539 cache_cache.array[smp_processor_id()] = ptr;
1540 local_irq_enable();
1541
1542 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1543
1544 local_irq_disable();
1545 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1546 != &initarray_generic.cache);
1547 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1548 sizeof(struct arraycache_init));
1549
1550
1551
1552 spin_lock_init(&ptr->lock);
1553
1554 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1555 ptr;
1556 local_irq_enable();
1557 }
1558
1559 {
1560 int nid;
1561
1562
1563 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node);
1564
1565 for_each_online_node(nid) {
1566 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1567 &initkmem_list3[SIZE_AC + nid], nid);
1568
1569 if (INDEX_AC != INDEX_L3) {
1570 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1571 &initkmem_list3[SIZE_L3 + nid], nid);
1572 }
1573 }
1574 }
1575
1576
1577 {
1578 struct kmem_cache *cachep;
1579 mutex_lock(&cache_chain_mutex);
1580 list_for_each_entry(cachep, &cache_chain, next)
1581 if (enable_cpucache(cachep))
1582 BUG();
1583 mutex_unlock(&cache_chain_mutex);
1584 }
1585
1586
1587 init_lock_keys();
1588
1589
1590
1591 g_cpucache_up = FULL;
1592
1593
1594
1595
1596
1597 register_cpu_notifier(&cpucache_notifier);
1598
1599
1600
1601
1602
1603}
1604
1605static int __init cpucache_init(void)
1606{
1607 int cpu;
1608
1609
1610
1611
1612 for_each_online_cpu(cpu)
1613 start_cpu_timer(cpu);
1614 return 0;
1615}
1616__initcall(cpucache_init);
1617
1618
1619
1620
1621
1622
1623
1624
1625static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1626{
1627 struct page *page;
1628 int nr_pages;
1629 int i;
1630
1631#ifndef CONFIG_MMU
1632
1633
1634
1635
1636 flags |= __GFP_COMP;
1637#endif
1638
1639 flags |= cachep->gfpflags;
1640
1641 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1642 if (!page)
1643 return NULL;
1644
1645 nr_pages = (1 << cachep->gfporder);
1646 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1647 add_zone_page_state(page_zone(page),
1648 NR_SLAB_RECLAIMABLE, nr_pages);
1649 else
1650 add_zone_page_state(page_zone(page),
1651 NR_SLAB_UNRECLAIMABLE, nr_pages);
1652 for (i = 0; i < nr_pages; i++)
1653 __SetPageSlab(page + i);
1654 return page_address(page);
1655}
1656
1657
1658
1659
1660static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1661{
1662 unsigned long i = (1 << cachep->gfporder);
1663 struct page *page = virt_to_page(addr);
1664 const unsigned long nr_freed = i;
1665
1666 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1667 sub_zone_page_state(page_zone(page),
1668 NR_SLAB_RECLAIMABLE, nr_freed);
1669 else
1670 sub_zone_page_state(page_zone(page),
1671 NR_SLAB_UNRECLAIMABLE, nr_freed);
1672 while (i--) {
1673 BUG_ON(!PageSlab(page));
1674 __ClearPageSlab(page);
1675 page++;
1676 }
1677 if (current->reclaim_state)
1678 current->reclaim_state->reclaimed_slab += nr_freed;
1679 free_pages((unsigned long)addr, cachep->gfporder);
1680}
1681
1682static void kmem_rcu_free(struct rcu_head *head)
1683{
1684 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1685 struct kmem_cache *cachep = slab_rcu->cachep;
1686
1687 kmem_freepages(cachep, slab_rcu->addr);
1688 if (OFF_SLAB(cachep))
1689 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1690}
1691
1692#if DEBUG
1693
1694#ifdef CONFIG_DEBUG_PAGEALLOC
1695static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1696 unsigned long caller)
1697{
1698 int size = obj_size(cachep);
1699
1700 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1701
1702 if (size < 5 * sizeof(unsigned long))
1703 return;
1704
1705 *addr++ = 0x12345678;
1706 *addr++ = caller;
1707 *addr++ = smp_processor_id();
1708 size -= 3 * sizeof(unsigned long);
1709 {
1710 unsigned long *sptr = &caller;
1711 unsigned long svalue;
1712
1713 while (!kstack_end(sptr)) {
1714 svalue = *sptr++;
1715 if (kernel_text_address(svalue)) {
1716 *addr++ = svalue;
1717 size -= sizeof(unsigned long);
1718 if (size <= sizeof(unsigned long))
1719 break;
1720 }
1721 }
1722
1723 }
1724 *addr++ = 0x87654321;
1725}
1726#endif
1727
1728static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1729{
1730 int size = obj_size(cachep);
1731 addr = &((char *)addr)[obj_offset(cachep)];
1732
1733 memset(addr, val, size);
1734 *(unsigned char *)(addr + size - 1) = POISON_END;
1735}
1736
1737static void dump_line(char *data, int offset, int limit)
1738{
1739 int i;
1740 unsigned char error = 0;
1741 int bad_count = 0;
1742
1743 printk(KERN_ERR "%03x:", offset);
1744 for (i = 0; i < limit; i++) {
1745 if (data[offset + i] != POISON_FREE) {
1746 error = data[offset + i];
1747 bad_count++;
1748 }
1749 printk(" %02x", (unsigned char)data[offset + i]);
1750 }
1751 printk("\n");
1752
1753 if (bad_count == 1) {
1754 error ^= POISON_FREE;
1755 if (!(error & (error - 1))) {
1756 printk(KERN_ERR "Single bit error detected. Probably "
1757 "bad RAM.\n");
1758#ifdef CONFIG_X86
1759 printk(KERN_ERR "Run memtest86+ or a similar memory "
1760 "test tool.\n");
1761#else
1762 printk(KERN_ERR "Run a memory test tool.\n");
1763#endif
1764 }
1765 }
1766}
1767#endif
1768
1769#if DEBUG
1770
1771static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1772{
1773 int i, size;
1774 char *realobj;
1775
1776 if (cachep->flags & SLAB_RED_ZONE) {
1777 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1778 *dbg_redzone1(cachep, objp),
1779 *dbg_redzone2(cachep, objp));
1780 }
1781
1782 if (cachep->flags & SLAB_STORE_USER) {
1783 printk(KERN_ERR "Last user: [<%p>]",
1784 *dbg_userword(cachep, objp));
1785 print_symbol("(%s)",
1786 (unsigned long)*dbg_userword(cachep, objp));
1787 printk("\n");
1788 }
1789 realobj = (char *)objp + obj_offset(cachep);
1790 size = obj_size(cachep);
1791 for (i = 0; i < size && lines; i += 16, lines--) {
1792 int limit;
1793 limit = 16;
1794 if (i + limit > size)
1795 limit = size - i;
1796 dump_line(realobj, i, limit);
1797 }
1798}
1799
1800static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1801{
1802 char *realobj;
1803 int size, i;
1804 int lines = 0;
1805
1806 realobj = (char *)objp + obj_offset(cachep);
1807 size = obj_size(cachep);
1808
1809 for (i = 0; i < size; i++) {
1810 char exp = POISON_FREE;
1811 if (i == size - 1)
1812 exp = POISON_END;
1813 if (realobj[i] != exp) {
1814 int limit;
1815
1816
1817 if (lines == 0) {
1818 printk(KERN_ERR
1819 "Slab corruption: %s start=%p, len=%d\n",
1820 cachep->name, realobj, size);
1821 print_objinfo(cachep, objp, 0);
1822 }
1823
1824 i = (i / 16) * 16;
1825 limit = 16;
1826 if (i + limit > size)
1827 limit = size - i;
1828 dump_line(realobj, i, limit);
1829 i += 16;
1830 lines++;
1831
1832 if (lines > 5)
1833 break;
1834 }
1835 }
1836 if (lines != 0) {
1837
1838
1839
1840 struct slab *slabp = virt_to_slab(objp);
1841 unsigned int objnr;
1842
1843 objnr = obj_to_index(cachep, slabp, objp);
1844 if (objnr) {
1845 objp = index_to_obj(cachep, slabp, objnr - 1);
1846 realobj = (char *)objp + obj_offset(cachep);
1847 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1848 realobj, size);
1849 print_objinfo(cachep, objp, 2);
1850 }
1851 if (objnr + 1 < cachep->num) {
1852 objp = index_to_obj(cachep, slabp, objnr + 1);
1853 realobj = (char *)objp + obj_offset(cachep);
1854 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1855 realobj, size);
1856 print_objinfo(cachep, objp, 2);
1857 }
1858 }
1859}
1860#endif
1861
1862#if DEBUG
1863
1864
1865
1866
1867
1868
1869
1870
1871static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1872{
1873 int i;
1874 for (i = 0; i < cachep->num; i++) {
1875 void *objp = index_to_obj(cachep, slabp, i);
1876
1877 if (cachep->flags & SLAB_POISON) {
1878#ifdef CONFIG_DEBUG_PAGEALLOC
1879 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1880 OFF_SLAB(cachep))
1881 kernel_map_pages(virt_to_page(objp),
1882 cachep->buffer_size / PAGE_SIZE, 1);
1883 else
1884 check_poison_obj(cachep, objp);
1885#else
1886 check_poison_obj(cachep, objp);
1887#endif
1888 }
1889 if (cachep->flags & SLAB_RED_ZONE) {
1890 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1891 slab_error(cachep, "start of a freed object "
1892 "was overwritten");
1893 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1894 slab_error(cachep, "end of a freed object "
1895 "was overwritten");
1896 }
1897 }
1898}
1899#else
1900static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1901{
1902}
1903#endif
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1915{
1916 void *addr = slabp->s_mem - slabp->colouroff;
1917
1918 slab_destroy_objs(cachep, slabp);
1919 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1920 struct slab_rcu *slab_rcu;
1921
1922 slab_rcu = (struct slab_rcu *)slabp;
1923 slab_rcu->cachep = cachep;
1924 slab_rcu->addr = addr;
1925 call_rcu(&slab_rcu->head, kmem_rcu_free);
1926 } else {
1927 kmem_freepages(cachep, addr);
1928 if (OFF_SLAB(cachep))
1929 kmem_cache_free(cachep->slabp_cache, slabp);
1930 }
1931}
1932
1933
1934
1935
1936
1937static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1938{
1939 int node;
1940
1941 for_each_online_node(node) {
1942 cachep->nodelists[node] = &initkmem_list3[index + node];
1943 cachep->nodelists[node]->next_reap = jiffies +
1944 REAPTIMEOUT_LIST3 +
1945 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1946 }
1947}
1948
1949static void __kmem_cache_destroy(struct kmem_cache *cachep)
1950{
1951 int i;
1952 struct kmem_list3 *l3;
1953
1954 for_each_online_cpu(i)
1955 kfree(cachep->array[i]);
1956
1957
1958 for_each_online_node(i) {
1959 l3 = cachep->nodelists[i];
1960 if (l3) {
1961 kfree(l3->shared);
1962 free_alien_cache(l3->alien);
1963 kfree(l3);
1964 }
1965 }
1966 kmem_cache_free(&cache_cache, cachep);
1967}
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983static size_t calculate_slab_order(struct kmem_cache *cachep,
1984 size_t size, size_t align, unsigned long flags)
1985{
1986 unsigned long offslab_limit;
1987 size_t left_over = 0;
1988 int gfporder;
1989
1990 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
1991 unsigned int num;
1992 size_t remainder;
1993
1994 cache_estimate(gfporder, size, align, flags, &remainder, &num);
1995 if (!num)
1996 continue;
1997
1998 if (flags & CFLGS_OFF_SLAB) {
1999
2000
2001
2002
2003
2004 offslab_limit = size - sizeof(struct slab);
2005 offslab_limit /= sizeof(kmem_bufctl_t);
2006
2007 if (num > offslab_limit)
2008 break;
2009 }
2010
2011
2012 cachep->num = num;
2013 cachep->gfporder = gfporder;
2014 left_over = remainder;
2015
2016
2017
2018
2019
2020
2021 if (flags & SLAB_RECLAIM_ACCOUNT)
2022 break;
2023
2024
2025
2026
2027
2028 if (gfporder >= slab_break_gfp_order)
2029 break;
2030
2031
2032
2033
2034 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2035 break;
2036 }
2037 return left_over;
2038}
2039
2040static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2041{
2042 if (g_cpucache_up == FULL)
2043 return enable_cpucache(cachep);
2044
2045 if (g_cpucache_up == NONE) {
2046
2047
2048
2049
2050
2051 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2052
2053
2054
2055
2056
2057
2058 set_up_list3s(cachep, SIZE_AC);
2059 if (INDEX_AC == INDEX_L3)
2060 g_cpucache_up = PARTIAL_L3;
2061 else
2062 g_cpucache_up = PARTIAL_AC;
2063 } else {
2064 cachep->array[smp_processor_id()] =
2065 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
2066
2067 if (g_cpucache_up == PARTIAL_AC) {
2068 set_up_list3s(cachep, SIZE_L3);
2069 g_cpucache_up = PARTIAL_L3;
2070 } else {
2071 int node;
2072 for_each_online_node(node) {
2073 cachep->nodelists[node] =
2074 kmalloc_node(sizeof(struct kmem_list3),
2075 GFP_KERNEL, node);
2076 BUG_ON(!cachep->nodelists[node]);
2077 kmem_list3_init(cachep->nodelists[node]);
2078 }
2079 }
2080 }
2081 cachep->nodelists[numa_node_id()]->next_reap =
2082 jiffies + REAPTIMEOUT_LIST3 +
2083 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2084
2085 cpu_cache_get(cachep)->avail = 0;
2086 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2087 cpu_cache_get(cachep)->batchcount = 1;
2088 cpu_cache_get(cachep)->touched = 0;
2089 cachep->batchcount = 1;
2090 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2091 return 0;
2092}
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123struct kmem_cache *
2124kmem_cache_create (const char *name, size_t size, size_t align,
2125 unsigned long flags,
2126 void (*ctor)(void*, struct kmem_cache *, unsigned long),
2127 void (*dtor)(void*, struct kmem_cache *, unsigned long))
2128{
2129 size_t left_over, slab_size, ralign;
2130 struct kmem_cache *cachep = NULL, *pc;
2131
2132
2133
2134
2135 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2136 size > KMALLOC_MAX_SIZE || dtor) {
2137 printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
2138 name);
2139 BUG();
2140 }
2141
2142
2143
2144
2145
2146 mutex_lock(&cache_chain_mutex);
2147
2148 list_for_each_entry(pc, &cache_chain, next) {
2149 char tmp;
2150 int res;
2151
2152
2153
2154
2155
2156
2157 res = probe_kernel_address(pc->name, tmp);
2158 if (res) {
2159 printk(KERN_ERR
2160 "SLAB: cache with size %d has lost its name\n",
2161 pc->buffer_size);
2162 continue;
2163 }
2164
2165 if (!strcmp(pc->name, name)) {
2166 printk(KERN_ERR
2167 "kmem_cache_create: duplicate cache %s\n", name);
2168 dump_stack();
2169 goto oops;
2170 }
2171 }
2172
2173#if DEBUG
2174 WARN_ON(strchr(name, ' '));
2175#if FORCED_DEBUG
2176
2177
2178
2179
2180
2181
2182 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2183 2 * sizeof(unsigned long long)))
2184 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2185 if (!(flags & SLAB_DESTROY_BY_RCU))
2186 flags |= SLAB_POISON;
2187#endif
2188 if (flags & SLAB_DESTROY_BY_RCU)
2189 BUG_ON(flags & SLAB_POISON);
2190#endif
2191
2192
2193
2194
2195 BUG_ON(flags & ~CREATE_MASK);
2196
2197
2198
2199
2200
2201
2202 if (size & (BYTES_PER_WORD - 1)) {
2203 size += (BYTES_PER_WORD - 1);
2204 size &= ~(BYTES_PER_WORD - 1);
2205 }
2206
2207
2208
2209
2210 if (flags & SLAB_HWCACHE_ALIGN) {
2211
2212
2213
2214
2215
2216 ralign = cache_line_size();
2217 while (size <= ralign / 2)
2218 ralign /= 2;
2219 } else {
2220 ralign = BYTES_PER_WORD;
2221 }
2222
2223
2224
2225
2226
2227
2228 if (flags & SLAB_STORE_USER)
2229 ralign = BYTES_PER_WORD;
2230
2231 if (flags & SLAB_RED_ZONE) {
2232 ralign = REDZONE_ALIGN;
2233
2234
2235 size += REDZONE_ALIGN - 1;
2236 size &= ~(REDZONE_ALIGN - 1);
2237 }
2238
2239
2240 if (ralign < ARCH_SLAB_MINALIGN) {
2241 ralign = ARCH_SLAB_MINALIGN;
2242 }
2243
2244 if (ralign < align) {
2245 ralign = align;
2246 }
2247
2248 if (ralign > __alignof__(unsigned long long))
2249 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2250
2251
2252
2253 align = ralign;
2254
2255
2256 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
2257 if (!cachep)
2258 goto oops;
2259
2260#if DEBUG
2261 cachep->obj_size = size;
2262
2263
2264
2265
2266
2267 if (flags & SLAB_RED_ZONE) {
2268
2269 cachep->obj_offset += sizeof(unsigned long long);
2270 size += 2 * sizeof(unsigned long long);
2271 }
2272 if (flags & SLAB_STORE_USER) {
2273
2274
2275
2276
2277 if (flags & SLAB_RED_ZONE)
2278 size += REDZONE_ALIGN;
2279 else
2280 size += BYTES_PER_WORD;
2281 }
2282#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2283 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2284 && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
2285 cachep->obj_offset += PAGE_SIZE - size;
2286 size = PAGE_SIZE;
2287 }
2288#endif
2289#endif
2290
2291
2292
2293
2294
2295
2296 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
2297
2298
2299
2300
2301 flags |= CFLGS_OFF_SLAB;
2302
2303 size = ALIGN(size, align);
2304
2305 left_over = calculate_slab_order(cachep, size, align, flags);
2306
2307 if (!cachep->num) {
2308 printk(KERN_ERR
2309 "kmem_cache_create: couldn't create cache %s.\n", name);
2310 kmem_cache_free(&cache_cache, cachep);
2311 cachep = NULL;
2312 goto oops;
2313 }
2314 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2315 + sizeof(struct slab), align);
2316
2317
2318
2319
2320
2321 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2322 flags &= ~CFLGS_OFF_SLAB;
2323 left_over -= slab_size;
2324 }
2325
2326 if (flags & CFLGS_OFF_SLAB) {
2327
2328 slab_size =
2329 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2330 }
2331
2332 cachep->colour_off = cache_line_size();
2333
2334 if (cachep->colour_off < align)
2335 cachep->colour_off = align;
2336 cachep->colour = left_over / cachep->colour_off;
2337 cachep->slab_size = slab_size;
2338 cachep->flags = flags;
2339 cachep->gfpflags = 0;
2340 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2341 cachep->gfpflags |= GFP_DMA;
2342 cachep->buffer_size = size;
2343 cachep->reciprocal_buffer_size = reciprocal_value(size);
2344
2345 if (flags & CFLGS_OFF_SLAB) {
2346 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2347
2348
2349
2350
2351
2352
2353
2354 BUG_ON(!cachep->slabp_cache);
2355 }
2356 cachep->ctor = ctor;
2357 cachep->name = name;
2358
2359 if (setup_cpu_cache(cachep)) {
2360 __kmem_cache_destroy(cachep);
2361 cachep = NULL;
2362 goto oops;
2363 }
2364
2365
2366 list_add(&cachep->next, &cache_chain);
2367oops:
2368 if (!cachep && (flags & SLAB_PANIC))
2369 panic("kmem_cache_create(): failed to create slab `%s'\n",
2370 name);
2371 mutex_unlock(&cache_chain_mutex);
2372 return cachep;
2373}
2374EXPORT_SYMBOL(kmem_cache_create);
2375
2376#if DEBUG
2377static void check_irq_off(void)
2378{
2379 BUG_ON(!irqs_disabled());
2380}
2381
2382static void check_irq_on(void)
2383{
2384 BUG_ON(irqs_disabled());
2385}
2386
2387static void check_spinlock_acquired(struct kmem_cache *cachep)
2388{
2389#ifdef CONFIG_SMP
2390 check_irq_off();
2391 assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
2392#endif
2393}
2394
2395static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2396{
2397#ifdef CONFIG_SMP
2398 check_irq_off();
2399 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2400#endif
2401}
2402
2403#else
2404#define check_irq_off() do { } while(0)
2405#define check_irq_on() do { } while(0)
2406#define check_spinlock_acquired(x) do { } while(0)
2407#define check_spinlock_acquired_node(x, y) do { } while(0)
2408#endif
2409
2410static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2411 struct array_cache *ac,
2412 int force, int node);
2413
2414static void do_drain(void *arg)
2415{
2416 struct kmem_cache *cachep = arg;
2417 struct array_cache *ac;
2418 int node = numa_node_id();
2419
2420 check_irq_off();
2421 ac = cpu_cache_get(cachep);
2422 spin_lock(&cachep->nodelists[node]->list_lock);
2423 free_block(cachep, ac->entry, ac->avail, node);
2424 spin_unlock(&cachep->nodelists[node]->list_lock);
2425 ac->avail = 0;
2426}
2427
2428static void drain_cpu_caches(struct kmem_cache *cachep)
2429{
2430 struct kmem_list3 *l3;
2431 int node;
2432
2433 on_each_cpu(do_drain, cachep, 1, 1);
2434 check_irq_on();
2435 for_each_online_node(node) {
2436 l3 = cachep->nodelists[node];
2437 if (l3 && l3->alien)
2438 drain_alien_cache(cachep, l3->alien);
2439 }
2440
2441 for_each_online_node(node) {
2442 l3 = cachep->nodelists[node];
2443 if (l3)
2444 drain_array(cachep, l3, l3->shared, 1, node);
2445 }
2446}
2447
2448
2449
2450
2451
2452
2453
2454static int drain_freelist(struct kmem_cache *cache,
2455 struct kmem_list3 *l3, int tofree)
2456{
2457 struct list_head *p;
2458 int nr_freed;
2459 struct slab *slabp;
2460
2461 nr_freed = 0;
2462 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2463
2464 spin_lock_irq(&l3->list_lock);
2465 p = l3->slabs_free.prev;
2466 if (p == &l3->slabs_free) {
2467 spin_unlock_irq(&l3->list_lock);
2468 goto out;
2469 }
2470
2471 slabp = list_entry(p, struct slab, list);
2472#if DEBUG
2473 BUG_ON(slabp->inuse);
2474#endif
2475 list_del(&slabp->list);
2476
2477
2478
2479
2480 l3->free_objects -= cache->num;
2481 spin_unlock_irq(&l3->list_lock);
2482 slab_destroy(cache, slabp);
2483 nr_freed++;
2484 }
2485out:
2486 return nr_freed;
2487}
2488
2489
2490static int __cache_shrink(struct kmem_cache *cachep)
2491{
2492 int ret = 0, i = 0;
2493 struct kmem_list3 *l3;
2494
2495 drain_cpu_caches(cachep);
2496
2497 check_irq_on();
2498 for_each_online_node(i) {
2499 l3 = cachep->nodelists[i];
2500 if (!l3)
2501 continue;
2502
2503 drain_freelist(cachep, l3, l3->free_objects);
2504
2505 ret += !list_empty(&l3->slabs_full) ||
2506 !list_empty(&l3->slabs_partial);
2507 }
2508 return (ret ? 1 : 0);
2509}
2510
2511
2512
2513
2514
2515
2516
2517
2518int kmem_cache_shrink(struct kmem_cache *cachep)
2519{
2520 int ret;
2521 BUG_ON(!cachep || in_interrupt());
2522
2523 mutex_lock(&cache_chain_mutex);
2524 ret = __cache_shrink(cachep);
2525 mutex_unlock(&cache_chain_mutex);
2526 return ret;
2527}
2528EXPORT_SYMBOL(kmem_cache_shrink);
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546void kmem_cache_destroy(struct kmem_cache *cachep)
2547{
2548 BUG_ON(!cachep || in_interrupt());
2549
2550
2551 mutex_lock(&cache_chain_mutex);
2552
2553
2554
2555 list_del(&cachep->next);
2556 if (__cache_shrink(cachep)) {
2557 slab_error(cachep, "Can't free all objects");
2558 list_add(&cachep->next, &cache_chain);
2559 mutex_unlock(&cache_chain_mutex);
2560 return;
2561 }
2562
2563 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2564 synchronize_rcu();
2565
2566 __kmem_cache_destroy(cachep);
2567 mutex_unlock(&cache_chain_mutex);
2568}
2569EXPORT_SYMBOL(kmem_cache_destroy);
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2583 int colour_off, gfp_t local_flags,
2584 int nodeid)
2585{
2586 struct slab *slabp;
2587
2588 if (OFF_SLAB(cachep)) {
2589
2590 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2591 local_flags & ~GFP_THISNODE, nodeid);
2592 if (!slabp)
2593 return NULL;
2594 } else {
2595 slabp = objp + colour_off;
2596 colour_off += cachep->slab_size;
2597 }
2598 slabp->inuse = 0;
2599 slabp->colouroff = colour_off;
2600 slabp->s_mem = objp + colour_off;
2601 slabp->nodeid = nodeid;
2602 return slabp;
2603}
2604
2605static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2606{
2607 return (kmem_bufctl_t *) (slabp + 1);
2608}
2609
2610static void cache_init_objs(struct kmem_cache *cachep,
2611 struct slab *slabp)
2612{
2613 int i;
2614
2615 for (i = 0; i < cachep->num; i++) {
2616 void *objp = index_to_obj(cachep, slabp, i);
2617#if DEBUG
2618
2619 if (cachep->flags & SLAB_POISON)
2620 poison_obj(cachep, objp, POISON_FREE);
2621 if (cachep->flags & SLAB_STORE_USER)
2622 *dbg_userword(cachep, objp) = NULL;
2623
2624 if (cachep->flags & SLAB_RED_ZONE) {
2625 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2626 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2627 }
2628
2629
2630
2631
2632
2633 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2634 cachep->ctor(objp + obj_offset(cachep), cachep,
2635 0);
2636
2637 if (cachep->flags & SLAB_RED_ZONE) {
2638 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2639 slab_error(cachep, "constructor overwrote the"
2640 " end of an object");
2641 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2642 slab_error(cachep, "constructor overwrote the"
2643 " start of an object");
2644 }
2645 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2646 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2647 kernel_map_pages(virt_to_page(objp),
2648 cachep->buffer_size / PAGE_SIZE, 0);
2649#else
2650 if (cachep->ctor)
2651 cachep->ctor(objp, cachep, 0);
2652#endif
2653 slab_bufctl(slabp)[i] = i + 1;
2654 }
2655 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2656 slabp->free = 0;
2657}
2658
2659static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2660{
2661 if (CONFIG_ZONE_DMA_FLAG) {
2662 if (flags & GFP_DMA)
2663 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2664 else
2665 BUG_ON(cachep->gfpflags & GFP_DMA);
2666 }
2667}
2668
2669static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2670 int nodeid)
2671{
2672 void *objp = index_to_obj(cachep, slabp, slabp->free);
2673 kmem_bufctl_t next;
2674
2675 slabp->inuse++;
2676 next = slab_bufctl(slabp)[slabp->free];
2677#if DEBUG
2678 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2679 WARN_ON(slabp->nodeid != nodeid);
2680#endif
2681 slabp->free = next;
2682
2683 return objp;
2684}
2685
2686static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2687 void *objp, int nodeid)
2688{
2689 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2690
2691#if DEBUG
2692
2693 WARN_ON(slabp->nodeid != nodeid);
2694
2695 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2696 printk(KERN_ERR "slab: double free detected in cache "
2697 "'%s', objp %p\n", cachep->name, objp);
2698 BUG();
2699 }
2700#endif
2701 slab_bufctl(slabp)[objnr] = slabp->free;
2702 slabp->free = objnr;
2703 slabp->inuse--;
2704}
2705
2706
2707
2708
2709
2710
2711static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2712 void *addr)
2713{
2714 int nr_pages;
2715 struct page *page;
2716
2717 page = virt_to_page(addr);
2718
2719 nr_pages = 1;
2720 if (likely(!PageCompound(page)))
2721 nr_pages <<= cache->gfporder;
2722
2723 do {
2724 page_set_cache(page, cache);
2725 page_set_slab(page, slab);
2726 page++;
2727 } while (--nr_pages);
2728}
2729
2730
2731
2732
2733
2734static int cache_grow(struct kmem_cache *cachep,
2735 gfp_t flags, int nodeid, void *objp)
2736{
2737 struct slab *slabp;
2738 size_t offset;
2739 gfp_t local_flags;
2740 struct kmem_list3 *l3;
2741
2742
2743
2744
2745
2746 BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK));
2747
2748 local_flags = (flags & GFP_LEVEL_MASK);
2749
2750 check_irq_off();
2751 l3 = cachep->nodelists[nodeid];
2752 spin_lock(&l3->list_lock);
2753
2754
2755 offset = l3->colour_next;
2756 l3->colour_next++;
2757 if (l3->colour_next >= cachep->colour)
2758 l3->colour_next = 0;
2759 spin_unlock(&l3->list_lock);
2760
2761 offset *= cachep->colour_off;
2762
2763 if (local_flags & __GFP_WAIT)
2764 local_irq_enable();
2765
2766
2767
2768
2769
2770
2771
2772 kmem_flagcheck(cachep, flags);
2773
2774
2775
2776
2777
2778 if (!objp)
2779 objp = kmem_getpages(cachep, flags, nodeid);
2780 if (!objp)
2781 goto failed;
2782
2783
2784 slabp = alloc_slabmgmt(cachep, objp, offset,
2785 local_flags & ~GFP_THISNODE, nodeid);
2786 if (!slabp)
2787 goto opps1;
2788
2789 slabp->nodeid = nodeid;
2790 slab_map_pages(cachep, slabp, objp);
2791
2792 cache_init_objs(cachep, slabp);
2793
2794 if (local_flags & __GFP_WAIT)
2795 local_irq_disable();
2796 check_irq_off();
2797 spin_lock(&l3->list_lock);
2798
2799
2800 list_add_tail(&slabp->list, &(l3->slabs_free));
2801 STATS_INC_GROWN(cachep);
2802 l3->free_objects += cachep->num;
2803 spin_unlock(&l3->list_lock);
2804 return 1;
2805opps1:
2806 kmem_freepages(cachep, objp);
2807failed:
2808 if (local_flags & __GFP_WAIT)
2809 local_irq_disable();
2810 return 0;
2811}
2812
2813#if DEBUG
2814
2815
2816
2817
2818
2819
2820static void kfree_debugcheck(const void *objp)
2821{
2822 if (!virt_addr_valid(objp)) {
2823 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2824 (unsigned long)objp);
2825 BUG();
2826 }
2827}
2828
2829static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2830{
2831 unsigned long long redzone1, redzone2;
2832
2833 redzone1 = *dbg_redzone1(cache, obj);
2834 redzone2 = *dbg_redzone2(cache, obj);
2835
2836
2837
2838
2839 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2840 return;
2841
2842 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2843 slab_error(cache, "double free detected");
2844 else
2845 slab_error(cache, "memory outside object was overwritten");
2846
2847 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2848 obj, redzone1, redzone2);
2849}
2850
2851static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2852 void *caller)
2853{
2854 struct page *page;
2855 unsigned int objnr;
2856 struct slab *slabp;
2857
2858 objp -= obj_offset(cachep);
2859 kfree_debugcheck(objp);
2860 page = virt_to_head_page(objp);
2861
2862 slabp = page_get_slab(page);
2863
2864 if (cachep->flags & SLAB_RED_ZONE) {
2865 verify_redzone_free(cachep, objp);
2866 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2867 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2868 }
2869 if (cachep->flags & SLAB_STORE_USER)
2870 *dbg_userword(cachep, objp) = caller;
2871
2872 objnr = obj_to_index(cachep, slabp, objp);
2873
2874 BUG_ON(objnr >= cachep->num);
2875 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2876
2877#ifdef CONFIG_DEBUG_SLAB_LEAK
2878 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2879#endif
2880 if (cachep->flags & SLAB_POISON) {
2881#ifdef CONFIG_DEBUG_PAGEALLOC
2882 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2883 store_stackinfo(cachep, objp, (unsigned long)caller);
2884 kernel_map_pages(virt_to_page(objp),
2885 cachep->buffer_size / PAGE_SIZE, 0);
2886 } else {
2887 poison_obj(cachep, objp, POISON_FREE);
2888 }
2889#else
2890 poison_obj(cachep, objp, POISON_FREE);
2891#endif
2892 }
2893 return objp;
2894}
2895
2896static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2897{
2898 kmem_bufctl_t i;
2899 int entries = 0;
2900
2901
2902 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2903 entries++;
2904 if (entries > cachep->num || i >= cachep->num)
2905 goto bad;
2906 }
2907 if (entries != cachep->num - slabp->inuse) {
2908bad:
2909 printk(KERN_ERR "slab: Internal list corruption detected in "
2910 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2911 cachep->name, cachep->num, slabp, slabp->inuse);
2912 for (i = 0;
2913 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2914 i++) {
2915 if (i % 16 == 0)
2916 printk("\n%03x:", i);
2917 printk(" %02x", ((unsigned char *)slabp)[i]);
2918 }
2919 printk("\n");
2920 BUG();
2921 }
2922}
2923#else
2924#define kfree_debugcheck(x) do { } while(0)
2925#define cache_free_debugcheck(x,objp,z) (objp)
2926#define check_slabp(x,y) do { } while(0)
2927#endif
2928
2929static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2930{
2931 int batchcount;
2932 struct kmem_list3 *l3;
2933 struct array_cache *ac;
2934 int node;
2935
2936 node = numa_node_id();
2937
2938 check_irq_off();
2939 ac = cpu_cache_get(cachep);
2940retry:
2941 batchcount = ac->batchcount;
2942 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2943
2944
2945
2946
2947
2948 batchcount = BATCHREFILL_LIMIT;
2949 }
2950 l3 = cachep->nodelists[node];
2951
2952 BUG_ON(ac->avail > 0 || !l3);
2953 spin_lock(&l3->list_lock);
2954
2955
2956 if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
2957 goto alloc_done;
2958
2959 while (batchcount > 0) {
2960 struct list_head *entry;
2961 struct slab *slabp;
2962
2963 entry = l3->slabs_partial.next;
2964 if (entry == &l3->slabs_partial) {
2965 l3->free_touched = 1;
2966 entry = l3->slabs_free.next;
2967 if (entry == &l3->slabs_free)
2968 goto must_grow;
2969 }
2970
2971 slabp = list_entry(entry, struct slab, list);
2972 check_slabp(cachep, slabp);
2973 check_spinlock_acquired(cachep);
2974
2975
2976
2977
2978
2979
2980 BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);
2981
2982 while (slabp->inuse < cachep->num && batchcount--) {
2983 STATS_INC_ALLOCED(cachep);
2984 STATS_INC_ACTIVE(cachep);
2985 STATS_SET_HIGH(cachep);
2986
2987 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
2988 node);
2989 }
2990 check_slabp(cachep, slabp);
2991
2992
2993 list_del(&slabp->list);
2994 if (slabp->free == BUFCTL_END)
2995 list_add(&slabp->list, &l3->slabs_full);
2996 else
2997 list_add(&slabp->list, &l3->slabs_partial);
2998 }
2999
3000must_grow:
3001 l3->free_objects -= ac->avail;
3002alloc_done:
3003 spin_unlock(&l3->list_lock);
3004
3005 if (unlikely(!ac->avail)) {
3006 int x;
3007 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3008
3009
3010 ac = cpu_cache_get(cachep);
3011 if (!x && ac->avail == 0)
3012 return NULL;
3013
3014 if (!ac->avail)
3015 goto retry;
3016 }
3017 ac->touched = 1;
3018 return ac->entry[--ac->avail];
3019}
3020
3021static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3022 gfp_t flags)
3023{
3024 might_sleep_if(flags & __GFP_WAIT);
3025#if DEBUG
3026 kmem_flagcheck(cachep, flags);
3027#endif
3028}
3029
3030#if DEBUG
3031static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3032 gfp_t flags, void *objp, void *caller)
3033{
3034 if (!objp)
3035 return objp;
3036 if (cachep->flags & SLAB_POISON) {
3037#ifdef CONFIG_DEBUG_PAGEALLOC
3038 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3039 kernel_map_pages(virt_to_page(objp),
3040 cachep->buffer_size / PAGE_SIZE, 1);
3041 else
3042 check_poison_obj(cachep, objp);
3043#else
3044 check_poison_obj(cachep, objp);
3045#endif
3046 poison_obj(cachep, objp, POISON_INUSE);
3047 }
3048 if (cachep->flags & SLAB_STORE_USER)
3049 *dbg_userword(cachep, objp) = caller;
3050
3051 if (cachep->flags & SLAB_RED_ZONE) {
3052 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3053 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3054 slab_error(cachep, "double free, or memory outside"
3055 " object was overwritten");
3056 printk(KERN_ERR
3057 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3058 objp, *dbg_redzone1(cachep, objp),
3059 *dbg_redzone2(cachep, objp));
3060 }
3061 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3062 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3063 }
3064#ifdef CONFIG_DEBUG_SLAB_LEAK
3065 {
3066 struct slab *slabp;
3067 unsigned objnr;
3068
3069 slabp = page_get_slab(virt_to_head_page(objp));
3070 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3071 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3072 }
3073#endif
3074 objp += obj_offset(cachep);
3075 if (cachep->ctor && cachep->flags & SLAB_POISON)
3076 cachep->ctor(objp, cachep, 0);
3077#if ARCH_SLAB_MINALIGN
3078 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3079 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3080 objp, ARCH_SLAB_MINALIGN);
3081 }
3082#endif
3083 return objp;
3084}
3085#else
3086#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3087#endif
3088
3089#ifdef CONFIG_FAILSLAB
3090
3091static struct failslab_attr {
3092
3093 struct fault_attr attr;
3094
3095 u32 ignore_gfp_wait;
3096#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3097 struct dentry *ignore_gfp_wait_file;
3098#endif
3099
3100} failslab = {
3101 .attr = FAULT_ATTR_INITIALIZER,
3102 .ignore_gfp_wait = 1,
3103};
3104
3105static int __init setup_failslab(char *str)
3106{
3107 return setup_fault_attr(&failslab.attr, str);
3108}
3109__setup("failslab=", setup_failslab);
3110
3111static int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3112{
3113 if (cachep == &cache_cache)
3114 return 0;
3115 if (flags & __GFP_NOFAIL)
3116 return 0;
3117 if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT))
3118 return 0;
3119
3120 return should_fail(&failslab.attr, obj_size(cachep));
3121}
3122
3123#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
3124
3125static int __init failslab_debugfs(void)
3126{
3127 mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
3128 struct dentry *dir;
3129 int err;
3130
3131 err = init_fault_attr_dentries(&failslab.attr, "failslab");
3132 if (err)
3133 return err;
3134 dir = failslab.attr.dentries.dir;
3135
3136 failslab.ignore_gfp_wait_file =
3137 debugfs_create_bool("ignore-gfp-wait", mode, dir,
3138 &failslab.ignore_gfp_wait);
3139
3140 if (!failslab.ignore_gfp_wait_file) {
3141 err = -ENOMEM;
3142 debugfs_remove(failslab.ignore_gfp_wait_file);
3143 cleanup_fault_attr_dentries(&failslab.attr);
3144 }
3145
3146 return err;
3147}
3148
3149late_initcall(failslab_debugfs);
3150
3151#endif
3152
3153#else
3154
3155static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags)
3156{
3157 return 0;
3158}
3159
3160#endif
3161
3162static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3163{
3164 void *objp;
3165 struct array_cache *ac;
3166
3167 check_irq_off();
3168
3169 ac = cpu_cache_get(cachep);
3170 if (likely(ac->avail)) {
3171 STATS_INC_ALLOCHIT(cachep);
3172 ac->touched = 1;
3173 objp = ac->entry[--ac->avail];
3174 } else {
3175 STATS_INC_ALLOCMISS(cachep);
3176 objp = cache_alloc_refill(cachep, flags);
3177 }
3178 return objp;
3179}
3180
3181#ifdef CONFIG_NUMA
3182
3183
3184
3185
3186
3187
3188static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3189{
3190 int nid_alloc, nid_here;
3191
3192 if (in_interrupt() || (flags & __GFP_THISNODE))
3193 return NULL;
3194 nid_alloc = nid_here = numa_node_id();
3195 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3196 nid_alloc = cpuset_mem_spread_node();
3197 else if (current->mempolicy)
3198 nid_alloc = slab_node(current->mempolicy);
3199 if (nid_alloc != nid_here)
3200 return ____cache_alloc_node(cachep, flags, nid_alloc);
3201 return NULL;
3202}
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3213{
3214 struct zonelist *zonelist;
3215 gfp_t local_flags;
3216 struct zone **z;
3217 void *obj = NULL;
3218 int nid;
3219
3220 if (flags & __GFP_THISNODE)
3221 return NULL;
3222
3223 zonelist = &NODE_DATA(slab_node(current->mempolicy))
3224 ->node_zonelists[gfp_zone(flags)];
3225 local_flags = (flags & GFP_LEVEL_MASK);
3226
3227retry:
3228
3229
3230
3231
3232 for (z = zonelist->zones; *z && !obj; z++) {
3233 nid = zone_to_nid(*z);
3234
3235 if (cpuset_zone_allowed_hardwall(*z, flags) &&
3236 cache->nodelists[nid] &&
3237 cache->nodelists[nid]->free_objects)
3238 obj = ____cache_alloc_node(cache,
3239 flags | GFP_THISNODE, nid);
3240 }
3241
3242 if (!obj) {
3243
3244
3245
3246
3247
3248
3249 if (local_flags & __GFP_WAIT)
3250 local_irq_enable();
3251 kmem_flagcheck(cache, flags);
3252 obj = kmem_getpages(cache, flags, -1);
3253 if (local_flags & __GFP_WAIT)
3254 local_irq_disable();
3255 if (obj) {
3256
3257
3258
3259 nid = page_to_nid(virt_to_page(obj));
3260 if (cache_grow(cache, flags, nid, obj)) {
3261 obj = ____cache_alloc_node(cache,
3262 flags | GFP_THISNODE, nid);
3263 if (!obj)
3264
3265
3266
3267
3268
3269 goto retry;
3270 } else {
3271
3272 obj = NULL;
3273 }
3274 }
3275 }
3276 return obj;
3277}
3278
3279
3280
3281
3282static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3283 int nodeid)
3284{
3285 struct list_head *entry;
3286 struct slab *slabp;
3287 struct kmem_list3 *l3;
3288 void *obj;
3289 int x;
3290
3291 l3 = cachep->nodelists[nodeid];
3292 BUG_ON(!l3);
3293
3294retry:
3295 check_irq_off();
3296 spin_lock(&l3->list_lock);
3297 entry = l3->slabs_partial.next;
3298 if (entry == &l3->slabs_partial) {
3299 l3->free_touched = 1;
3300 entry = l3->slabs_free.next;
3301 if (entry == &l3->slabs_free)
3302 goto must_grow;
3303 }
3304
3305 slabp = list_entry(entry, struct slab, list);
3306 check_spinlock_acquired_node(cachep, nodeid);
3307 check_slabp(cachep, slabp);
3308
3309 STATS_INC_NODEALLOCS(cachep);
3310 STATS_INC_ACTIVE(cachep);
3311 STATS_SET_HIGH(cachep);
3312
3313 BUG_ON(slabp->inuse == cachep->num);
3314
3315 obj = slab_get_obj(cachep, slabp, nodeid);
3316 check_slabp(cachep, slabp);
3317 l3->free_objects--;
3318
3319 list_del(&slabp->list);
3320
3321 if (slabp->free == BUFCTL_END)
3322 list_add(&slabp->list, &l3->slabs_full);
3323 else
3324 list_add(&slabp->list, &l3->slabs_partial);
3325
3326 spin_unlock(&l3->list_lock);
3327 goto done;
3328
3329must_grow:
3330 spin_unlock(&l3->list_lock);
3331 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3332 if (x)
3333 goto retry;
3334
3335 return fallback_alloc(cachep, flags);
3336
3337done:
3338 return obj;
3339}
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353static __always_inline void *
3354__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3355 void *caller)
3356{
3357 unsigned long save_flags;
3358 void *ptr;
3359
3360 if (should_failslab(cachep, flags))
3361 return NULL;
3362
3363 cache_alloc_debugcheck_before(cachep, flags);
3364 local_irq_save(save_flags);
3365
3366 if (unlikely(nodeid == -1))
3367 nodeid = numa_node_id();
3368
3369 if (unlikely(!cachep->nodelists[nodeid])) {
3370
3371 ptr = fallback_alloc(cachep, flags);
3372 goto out;
3373 }
3374
3375 if (nodeid == numa_node_id()) {
3376
3377
3378
3379
3380
3381
3382 ptr = ____cache_alloc(cachep, flags);
3383 if (ptr)
3384 goto out;
3385 }
3386
3387 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3388 out:
3389 local_irq_restore(save_flags);
3390 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3391
3392 return ptr;
3393}
3394
3395static __always_inline void *
3396__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3397{
3398 void *objp;
3399
3400 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3401 objp = alternate_node_alloc(cache, flags);
3402 if (objp)
3403 goto out;
3404 }
3405 objp = ____cache_alloc(cache, flags);
3406
3407
3408
3409
3410
3411 if (!objp)
3412 objp = ____cache_alloc_node(cache, flags, numa_node_id());
3413
3414 out:
3415 return objp;
3416}
3417#else
3418
3419static __always_inline void *
3420__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3421{
3422 return ____cache_alloc(cachep, flags);
3423}
3424
3425#endif
3426
3427static __always_inline void *
3428__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3429{
3430 unsigned long save_flags;
3431 void *objp;
3432
3433 if (should_failslab(cachep, flags))
3434 return NULL;
3435
3436 cache_alloc_debugcheck_before(cachep, flags);
3437 local_irq_save(save_flags);
3438 objp = __do_cache_alloc(cachep, flags);
3439 local_irq_restore(save_flags);
3440 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3441 prefetchw(objp);
3442
3443 return objp;
3444}
3445
3446
3447
3448
3449static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3450 int node)
3451{
3452 int i;
3453 struct kmem_list3 *l3;
3454
3455 for (i = 0; i < nr_objects; i++) {
3456 void *objp = objpp[i];
3457 struct slab *slabp;
3458
3459 slabp = virt_to_slab(objp);
3460 l3 = cachep->nodelists[node];
3461 list_del(&slabp->list);
3462 check_spinlock_acquired_node(cachep, node);
3463 check_slabp(cachep, slabp);
3464 slab_put_obj(cachep, slabp, objp, node);
3465 STATS_DEC_ACTIVE(cachep);
3466 l3->free_objects++;
3467 check_slabp(cachep, slabp);
3468
3469
3470 if (slabp->inuse == 0) {
3471 if (l3->free_objects > l3->free_limit) {
3472 l3->free_objects -= cachep->num;
3473
3474
3475
3476
3477
3478
3479 slab_destroy(cachep, slabp);
3480 } else {
3481 list_add(&slabp->list, &l3->slabs_free);
3482 }
3483 } else {
3484
3485
3486
3487
3488 list_add_tail(&slabp->list, &l3->slabs_partial);
3489 }
3490 }
3491}
3492
3493static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3494{
3495 int batchcount;
3496 struct kmem_list3 *l3;
3497 int node = numa_node_id();
3498
3499 batchcount = ac->batchcount;
3500#if DEBUG
3501 BUG_ON(!batchcount || batchcount > ac->avail);
3502#endif
3503 check_irq_off();
3504 l3 = cachep->nodelists[node];
3505 spin_lock(&l3->list_lock);
3506 if (l3->shared) {
3507 struct array_cache *shared_array = l3->shared;
3508 int max = shared_array->limit - shared_array->avail;
3509 if (max) {
3510 if (batchcount > max)
3511 batchcount = max;
3512 memcpy(&(shared_array->entry[shared_array->avail]),
3513 ac->entry, sizeof(void *) * batchcount);
3514 shared_array->avail += batchcount;
3515 goto free_done;
3516 }
3517 }
3518
3519 free_block(cachep, ac->entry, batchcount, node);
3520free_done:
3521#if STATS
3522 {
3523 int i = 0;
3524 struct list_head *p;
3525
3526 p = l3->slabs_free.next;
3527 while (p != &(l3->slabs_free)) {
3528 struct slab *slabp;
3529
3530 slabp = list_entry(p, struct slab, list);
3531 BUG_ON(slabp->inuse);
3532
3533 i++;
3534 p = p->next;
3535 }
3536 STATS_SET_FREEABLE(cachep, i);
3537 }
3538#endif
3539 spin_unlock(&l3->list_lock);
3540 ac->avail -= batchcount;
3541 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3542}
3543
3544
3545
3546
3547
3548static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3549{
3550 struct array_cache *ac = cpu_cache_get(cachep);
3551
3552 check_irq_off();
3553 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3554
3555 if (cache_free_alien(cachep, objp))
3556 return;
3557
3558 if (likely(ac->avail < ac->limit)) {
3559 STATS_INC_FREEHIT(cachep);
3560 ac->entry[ac->avail++] = objp;
3561 return;
3562 } else {
3563 STATS_INC_FREEMISS(cachep);
3564 cache_flusharray(cachep, ac);
3565 ac->entry[ac->avail++] = objp;
3566 }
3567}
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3578{
3579 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3580}
3581EXPORT_SYMBOL(kmem_cache_alloc);
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags)
3592{
3593 void *ret = __cache_alloc(cache, flags, __builtin_return_address(0));
3594 if (ret)
3595 memset(ret, 0, obj_size(cache));
3596 return ret;
3597}
3598EXPORT_SYMBOL(kmem_cache_zalloc);
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3615{
3616 unsigned long addr = (unsigned long)ptr;
3617 unsigned long min_addr = PAGE_OFFSET;
3618 unsigned long align_mask = BYTES_PER_WORD - 1;
3619 unsigned long size = cachep->buffer_size;
3620 struct page *page;
3621
3622 if (unlikely(addr < min_addr))
3623 goto out;
3624 if (unlikely(addr > (unsigned long)high_memory - size))
3625 goto out;
3626 if (unlikely(addr & align_mask))
3627 goto out;
3628 if (unlikely(!kern_addr_valid(addr)))
3629 goto out;
3630 if (unlikely(!kern_addr_valid(addr + size - 1)))
3631 goto out;
3632 page = virt_to_page(ptr);
3633 if (unlikely(!PageSlab(page)))
3634 goto out;
3635 if (unlikely(page_get_cache(page) != cachep))
3636 goto out;
3637 return 1;
3638out:
3639 return 0;
3640}
3641
3642#ifdef CONFIG_NUMA
3643void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3644{
3645 return __cache_alloc_node(cachep, flags, nodeid,
3646 __builtin_return_address(0));
3647}
3648EXPORT_SYMBOL(kmem_cache_alloc_node);
3649
3650static __always_inline void *
3651__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3652{
3653 struct kmem_cache *cachep;
3654
3655 cachep = kmem_find_general_cachep(size, flags);
3656 if (unlikely(cachep == NULL))
3657 return NULL;
3658 return kmem_cache_alloc_node(cachep, flags, node);
3659}
3660
3661#ifdef CONFIG_DEBUG_SLAB
3662void *__kmalloc_node(size_t size, gfp_t flags, int node)
3663{
3664 return __do_kmalloc_node(size, flags, node,
3665 __builtin_return_address(0));
3666}
3667EXPORT_SYMBOL(__kmalloc_node);
3668
3669void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3670 int node, void *caller)
3671{
3672 return __do_kmalloc_node(size, flags, node, caller);
3673}
3674EXPORT_SYMBOL(__kmalloc_node_track_caller);
3675#else
3676void *__kmalloc_node(size_t size, gfp_t flags, int node)
3677{
3678 return __do_kmalloc_node(size, flags, node, NULL);
3679}
3680EXPORT_SYMBOL(__kmalloc_node);
3681#endif
3682#endif
3683
3684
3685
3686
3687
3688
3689
3690static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3691 void *caller)
3692{
3693 struct kmem_cache *cachep;
3694
3695
3696
3697
3698
3699
3700 cachep = __find_general_cachep(size, flags);
3701 if (unlikely(cachep == NULL))
3702 return NULL;
3703 return __cache_alloc(cachep, flags, caller);
3704}
3705
3706
3707#ifdef CONFIG_DEBUG_SLAB
3708void *__kmalloc(size_t size, gfp_t flags)
3709{
3710 return __do_kmalloc(size, flags, __builtin_return_address(0));
3711}
3712EXPORT_SYMBOL(__kmalloc);
3713
3714void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
3715{
3716 return __do_kmalloc(size, flags, caller);
3717}
3718EXPORT_SYMBOL(__kmalloc_track_caller);
3719
3720#else
3721void *__kmalloc(size_t size, gfp_t flags)
3722{
3723 return __do_kmalloc(size, flags, NULL);
3724}
3725EXPORT_SYMBOL(__kmalloc);
3726#endif
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739void *krealloc(const void *p, size_t new_size, gfp_t flags)
3740{
3741 struct kmem_cache *cache, *new_cache;
3742 void *ret;
3743
3744 if (unlikely(!p))
3745 return kmalloc_track_caller(new_size, flags);
3746
3747 if (unlikely(!new_size)) {
3748 kfree(p);
3749 return NULL;
3750 }
3751
3752 cache = virt_to_cache(p);
3753 new_cache = __find_general_cachep(new_size, flags);
3754
3755
3756
3757
3758 if (likely(cache == new_cache))
3759 return (void *)p;
3760
3761
3762
3763
3764
3765 ret = kmalloc_track_caller(new_size, flags);
3766 if (ret) {
3767 memcpy(ret, p, min(new_size, ksize(p)));
3768 kfree(p);
3769 }
3770 return ret;
3771}
3772EXPORT_SYMBOL(krealloc);
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3783{
3784 unsigned long flags;
3785
3786 BUG_ON(virt_to_cache(objp) != cachep);
3787
3788 local_irq_save(flags);
3789 debug_check_no_locks_freed(objp, obj_size(cachep));
3790 __cache_free(cachep, objp);
3791 local_irq_restore(flags);
3792}
3793EXPORT_SYMBOL(kmem_cache_free);
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804void kfree(const void *objp)
3805{
3806 struct kmem_cache *c;
3807 unsigned long flags;
3808
3809 if (unlikely(!objp))
3810 return;
3811 local_irq_save(flags);
3812 kfree_debugcheck(objp);
3813 c = virt_to_cache(objp);
3814 debug_check_no_locks_freed(objp, obj_size(c));
3815 __cache_free(c, (void *)objp);
3816 local_irq_restore(flags);
3817}
3818EXPORT_SYMBOL(kfree);
3819
3820unsigned int kmem_cache_size(struct kmem_cache *cachep)
3821{
3822 return obj_size(cachep);
3823}
3824EXPORT_SYMBOL(kmem_cache_size);
3825
3826const char *kmem_cache_name(struct kmem_cache *cachep)
3827{
3828 return cachep->name;
3829}
3830EXPORT_SYMBOL_GPL(kmem_cache_name);
3831
3832
3833
3834
3835static int alloc_kmemlist(struct kmem_cache *cachep)
3836{
3837 int node;
3838 struct kmem_list3 *l3;
3839 struct array_cache *new_shared;
3840 struct array_cache **new_alien = NULL;
3841
3842 for_each_online_node(node) {
3843
3844 if (use_alien_caches) {
3845 new_alien = alloc_alien_cache(node, cachep->limit);
3846 if (!new_alien)
3847 goto fail;
3848 }
3849
3850 new_shared = NULL;
3851 if (cachep->shared) {
3852 new_shared = alloc_arraycache(node,
3853 cachep->shared*cachep->batchcount,
3854 0xbaadf00d);
3855 if (!new_shared) {
3856 free_alien_cache(new_alien);
3857 goto fail;
3858 }
3859 }
3860
3861 l3 = cachep->nodelists[node];
3862 if (l3) {
3863 struct array_cache *shared = l3->shared;
3864
3865 spin_lock_irq(&l3->list_lock);
3866
3867 if (shared)
3868 free_block(cachep, shared->entry,
3869 shared->avail, node);
3870
3871 l3->shared = new_shared;
3872 if (!l3->alien) {
3873 l3->alien = new_alien;
3874 new_alien = NULL;
3875 }
3876 l3->free_limit = (1 + nr_cpus_node(node)) *
3877 cachep->batchcount + cachep->num;
3878 spin_unlock_irq(&l3->list_lock);
3879 kfree(shared);
3880 free_alien_cache(new_alien);
3881 continue;
3882 }
3883 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
3884 if (!l3) {
3885 free_alien_cache(new_alien);
3886 kfree(new_shared);
3887 goto fail;
3888 }
3889
3890 kmem_list3_init(l3);
3891 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3892 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3893 l3->shared = new_shared;
3894 l3->alien = new_alien;
3895 l3->free_limit = (1 + nr_cpus_node(node)) *
3896 cachep->batchcount + cachep->num;
3897 cachep->nodelists[node] = l3;
3898 }
3899 return 0;
3900
3901fail:
3902 if (!cachep->next.next) {
3903
3904 node--;
3905 while (node >= 0) {
3906 if (cachep->nodelists[node]) {
3907 l3 = cachep->nodelists[node];
3908
3909 kfree(l3->shared);
3910 free_alien_cache(l3->alien);
3911 kfree(l3);
3912 cachep->nodelists[node] = NULL;
3913 }
3914 node--;
3915 }
3916 }
3917 return -ENOMEM;
3918}
3919
3920struct ccupdate_struct {
3921 struct kmem_cache *cachep;
3922 struct array_cache *new[NR_CPUS];
3923};
3924
3925static void do_ccupdate_local(void *info)
3926{
3927 struct ccupdate_struct *new = info;
3928 struct array_cache *old;
3929
3930 check_irq_off();
3931 old = cpu_cache_get(new->cachep);
3932
3933 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3934 new->new[smp_processor_id()] = old;
3935}
3936
3937
3938static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3939 int batchcount, int shared)
3940{
3941 struct ccupdate_struct *new;
3942 int i;
3943
3944 new = kzalloc(sizeof(*new), GFP_KERNEL);
3945 if (!new)
3946 return -ENOMEM;
3947
3948 for_each_online_cpu(i) {
3949 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3950 batchcount);
3951 if (!new->new[i]) {
3952 for (i--; i >= 0; i--)
3953 kfree(new->new[i]);
3954 kfree(new);
3955 return -ENOMEM;
3956 }
3957 }
3958 new->cachep = cachep;
3959
3960 on_each_cpu(do_ccupdate_local, (void *)new, 1, 1);
3961
3962 check_irq_on();
3963 cachep->batchcount = batchcount;
3964 cachep->limit = limit;
3965 cachep->shared = shared;
3966
3967 for_each_online_cpu(i) {
3968 struct array_cache *ccold = new->new[i];
3969 if (!ccold)
3970 continue;
3971 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3972 free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
3973 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3974 kfree(ccold);
3975 }
3976 kfree(new);
3977 return alloc_kmemlist(cachep);
3978}
3979
3980
3981static int enable_cpucache(struct kmem_cache *cachep)
3982{
3983 int err;
3984 int limit, shared;
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995 if (cachep->buffer_size > 131072)
3996 limit = 1;
3997 else if (cachep->buffer_size > PAGE_SIZE)
3998 limit = 8;
3999 else if (cachep->buffer_size > 1024)
4000 limit = 24;
4001 else if (cachep->buffer_size > 256)
4002 limit = 54;
4003 else
4004 limit = 120;
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015 shared = 0;
4016 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
4017 shared = 8;
4018
4019#if DEBUG
4020
4021
4022
4023
4024 if (limit > 32)
4025 limit = 32;
4026#endif
4027 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
4028 if (err)
4029 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4030 cachep->name, -err);
4031 return err;
4032}
4033
4034
4035
4036
4037
4038
4039void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4040 struct array_cache *ac, int force, int node)
4041{
4042 int tofree;
4043
4044 if (!ac || !ac->avail)
4045 return;
4046 if (ac->touched && !force) {
4047 ac->touched = 0;
4048 } else {
4049 spin_lock_irq(&l3->list_lock);
4050 if (ac->avail) {
4051 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4052 if (tofree > ac->avail)
4053 tofree = (ac->avail + 1) / 2;
4054 free_block(cachep, ac->entry, tofree, node);
4055 ac->avail -= tofree;
4056 memmove(ac->entry, &(ac->entry[tofree]),
4057 sizeof(void *) * ac->avail);
4058 }
4059 spin_unlock_irq(&l3->list_lock);
4060 }
4061}
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075static void cache_reap(struct work_struct *w)
4076{
4077 struct kmem_cache *searchp;
4078 struct kmem_list3 *l3;
4079 int node = numa_node_id();
4080 struct delayed_work *work =
4081 container_of(w, struct delayed_work, work);
4082
4083 if (!mutex_trylock(&cache_chain_mutex))
4084
4085 goto out;
4086
4087 list_for_each_entry(searchp, &cache_chain, next) {
4088 check_irq_on();
4089
4090
4091
4092
4093
4094
4095 l3 = searchp->nodelists[node];
4096
4097 reap_alien(searchp, l3);
4098
4099 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4100
4101
4102
4103
4104
4105 if (time_after(l3->next_reap, jiffies))
4106 goto next;
4107
4108 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4109
4110 drain_array(searchp, l3, l3->shared, 0, node);
4111
4112 if (l3->free_touched)
4113 l3->free_touched = 0;
4114 else {
4115 int freed;
4116
4117 freed = drain_freelist(searchp, l3, (l3->free_limit +
4118 5 * searchp->num - 1) / (5 * searchp->num));
4119 STATS_ADD_REAPED(searchp, freed);
4120 }
4121next:
4122 cond_resched();
4123 }
4124 check_irq_on();
4125 mutex_unlock(&cache_chain_mutex);
4126 next_reap_node();
4127out:
4128
4129 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4130}
4131
4132#ifdef CONFIG_PROC_FS
4133
4134static void print_slabinfo_header(struct seq_file *m)
4135{
4136
4137
4138
4139
4140#if STATS
4141 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4142#else
4143 seq_puts(m, "slabinfo - version: 2.1\n");
4144#endif
4145 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4146 "<objperslab> <pagesperslab>");
4147 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4148 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4149#if STATS
4150 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4151 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4152 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4153#endif
4154 seq_putc(m, '\n');
4155}
4156
4157static void *s_start(struct seq_file *m, loff_t *pos)
4158{
4159 loff_t n = *pos;
4160 struct list_head *p;
4161
4162 mutex_lock(&cache_chain_mutex);
4163 if (!n)
4164 print_slabinfo_header(m);
4165 p = cache_chain.next;
4166 while (n--) {
4167 p = p->next;
4168 if (p == &cache_chain)
4169 return NULL;
4170 }
4171 return list_entry(p, struct kmem_cache, next);
4172}
4173
4174static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4175{
4176 struct kmem_cache *cachep = p;
4177 ++*pos;
4178 return cachep->next.next == &cache_chain ?
4179 NULL : list_entry(cachep->next.next, struct kmem_cache, next);
4180}
4181
4182static void s_stop(struct seq_file *m, void *p)
4183{
4184 mutex_unlock(&cache_chain_mutex);
4185}
4186
4187static int s_show(struct seq_file *m, void *p)
4188{
4189 struct kmem_cache *cachep = p;
4190 struct slab *slabp;
4191 unsigned long active_objs;
4192 unsigned long num_objs;
4193 unsigned long active_slabs = 0;
4194 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4195 const char *name;
4196 char *error = NULL;
4197 int node;
4198 struct kmem_list3 *l3;
4199
4200 active_objs = 0;
4201 num_slabs = 0;
4202 for_each_online_node(node) {
4203 l3 = cachep->nodelists[node];
4204 if (!l3)
4205 continue;
4206
4207 check_irq_on();
4208 spin_lock_irq(&l3->list_lock);
4209
4210 list_for_each_entry(slabp, &l3->slabs_full, list) {
4211 if (slabp->inuse != cachep->num && !error)
4212 error = "slabs_full accounting error";
4213 active_objs += cachep->num;
4214 active_slabs++;
4215 }
4216 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4217 if (slabp->inuse == cachep->num && !error)
4218 error = "slabs_partial inuse accounting error";
4219 if (!slabp->inuse && !error)
4220 error = "slabs_partial/inuse accounting error";
4221 active_objs += slabp->inuse;
4222 active_slabs++;
4223 }
4224 list_for_each_entry(slabp, &l3->slabs_free, list) {
4225 if (slabp->inuse && !error)
4226 error = "slabs_free/inuse accounting error";
4227 num_slabs++;
4228 }
4229 free_objects += l3->free_objects;
4230 if (l3->shared)
4231 shared_avail += l3->shared->avail;
4232
4233 spin_unlock_irq(&l3->list_lock);
4234 }
4235 num_slabs += active_slabs;
4236 num_objs = num_slabs * cachep->num;
4237 if (num_objs - active_objs != free_objects && !error)
4238 error = "free_objects accounting error";
4239
4240 name = cachep->name;
4241 if (error)
4242 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4243
4244 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4245 name, active_objs, num_objs, cachep->buffer_size,
4246 cachep->num, (1 << cachep->gfporder));
4247 seq_printf(m, " : tunables %4u %4u %4u",
4248 cachep->limit, cachep->batchcount, cachep->shared);
4249 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4250 active_slabs, num_slabs, shared_avail);
4251#if STATS
4252 {
4253 unsigned long high = cachep->high_mark;
4254 unsigned long allocs = cachep->num_allocations;
4255 unsigned long grown = cachep->grown;
4256 unsigned long reaped = cachep->reaped;
4257 unsigned long errors = cachep->errors;
4258 unsigned long max_freeable = cachep->max_freeable;
4259 unsigned long node_allocs = cachep->node_allocs;
4260 unsigned long node_frees = cachep->node_frees;
4261 unsigned long overflows = cachep->node_overflow;
4262
4263 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
4264 %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
4265 reaped, errors, max_freeable, node_allocs,
4266 node_frees, overflows);
4267 }
4268
4269 {
4270 unsigned long allochit = atomic_read(&cachep->allochit);
4271 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4272 unsigned long freehit = atomic_read(&cachep->freehit);
4273 unsigned long freemiss = atomic_read(&cachep->freemiss);
4274
4275 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4276 allochit, allocmiss, freehit, freemiss);
4277 }
4278#endif
4279 seq_putc(m, '\n');
4280 return 0;
4281}
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297const struct seq_operations slabinfo_op = {
4298 .start = s_start,
4299 .next = s_next,
4300 .stop = s_stop,
4301 .show = s_show,
4302};
4303
4304#define MAX_SLABINFO_WRITE 128
4305
4306
4307
4308
4309
4310
4311
4312ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4313 size_t count, loff_t *ppos)
4314{
4315 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4316 int limit, batchcount, shared, res;
4317 struct kmem_cache *cachep;
4318
4319 if (count > MAX_SLABINFO_WRITE)
4320 return -EINVAL;
4321 if (copy_from_user(&kbuf, buffer, count))
4322 return -EFAULT;
4323 kbuf[MAX_SLABINFO_WRITE] = '\0';
4324
4325 tmp = strchr(kbuf, ' ');
4326 if (!tmp)
4327 return -EINVAL;
4328 *tmp = '\0';
4329 tmp++;
4330 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4331 return -EINVAL;
4332
4333
4334 mutex_lock(&cache_chain_mutex);
4335 res = -EINVAL;
4336 list_for_each_entry(cachep, &cache_chain, next) {
4337 if (!strcmp(cachep->name, kbuf)) {
4338 if (limit < 1 || batchcount < 1 ||
4339 batchcount > limit || shared < 0) {
4340 res = 0;
4341 } else {
4342 res = do_tune_cpucache(cachep, limit,
4343 batchcount, shared);
4344 }
4345 break;
4346 }
4347 }
4348 mutex_unlock(&cache_chain_mutex);
4349 if (res >= 0)
4350 res = count;
4351 return res;
4352}
4353
4354#ifdef CONFIG_DEBUG_SLAB_LEAK
4355
4356static void *leaks_start(struct seq_file *m, loff_t *pos)
4357{
4358 loff_t n = *pos;
4359 struct list_head *p;
4360
4361 mutex_lock(&cache_chain_mutex);
4362 p = cache_chain.next;
4363 while (n--) {
4364 p = p->next;
4365 if (p == &cache_chain)
4366 return NULL;
4367 }
4368 return list_entry(p, struct kmem_cache, next);
4369}
4370
4371static inline int add_caller(unsigned long *n, unsigned long v)
4372{
4373 unsigned long *p;
4374 int l;
4375 if (!v)
4376 return 1;
4377 l = n[1];
4378 p = n + 2;
4379 while (l) {
4380 int i = l/2;
4381 unsigned long *q = p + 2 * i;
4382 if (*q == v) {
4383 q[1]++;
4384 return 1;
4385 }
4386 if (*q > v) {
4387 l = i;
4388 } else {
4389 p = q + 2;
4390 l -= i + 1;
4391 }
4392 }
4393 if (++n[1] == n[0])
4394 return 0;
4395 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4396 p[0] = v;
4397 p[1] = 1;
4398 return 1;
4399}
4400
4401static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4402{
4403 void *p;
4404 int i;
4405 if (n[0] == n[1])
4406 return;
4407 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4408 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4409 continue;
4410 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4411 return;
4412 }
4413}
4414
4415static void show_symbol(struct seq_file *m, unsigned long address)
4416{
4417#ifdef CONFIG_KALLSYMS
4418 unsigned long offset, size;
4419 char modname[MODULE_NAME_LEN + 1], name[KSYM_NAME_LEN + 1];
4420
4421 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4422 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4423 if (modname[0])
4424 seq_printf(m, " [%s]", modname);
4425 return;
4426 }
4427#endif
4428 seq_printf(m, "%p", (void *)address);
4429}
4430
4431static int leaks_show(struct seq_file *m, void *p)
4432{
4433 struct kmem_cache *cachep = p;
4434 struct slab *slabp;
4435 struct kmem_list3 *l3;
4436 const char *name;
4437 unsigned long *n = m->private;
4438 int node;
4439 int i;
4440
4441 if (!(cachep->flags & SLAB_STORE_USER))
4442 return 0;
4443 if (!(cachep->flags & SLAB_RED_ZONE))
4444 return 0;
4445
4446
4447
4448 n[1] = 0;
4449
4450 for_each_online_node(node) {
4451 l3 = cachep->nodelists[node];
4452 if (!l3)
4453 continue;
4454
4455 check_irq_on();
4456 spin_lock_irq(&l3->list_lock);
4457
4458 list_for_each_entry(slabp, &l3->slabs_full, list)
4459 handle_slab(n, cachep, slabp);
4460 list_for_each_entry(slabp, &l3->slabs_partial, list)
4461 handle_slab(n, cachep, slabp);
4462 spin_unlock_irq(&l3->list_lock);
4463 }
4464 name = cachep->name;
4465 if (n[0] == n[1]) {
4466
4467 mutex_unlock(&cache_chain_mutex);
4468 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4469 if (!m->private) {
4470
4471 m->private = n;
4472 mutex_lock(&cache_chain_mutex);
4473 return -ENOMEM;
4474 }
4475 *(unsigned long *)m->private = n[0] * 2;
4476 kfree(n);
4477 mutex_lock(&cache_chain_mutex);
4478
4479 m->count = m->size;
4480 return 0;
4481 }
4482 for (i = 0; i < n[1]; i++) {
4483 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4484 show_symbol(m, n[2*i+2]);
4485 seq_putc(m, '\n');
4486 }
4487
4488 return 0;
4489}
4490
4491const struct seq_operations slabstats_op = {
4492 .start = leaks_start,
4493 .next = s_next,
4494 .stop = s_stop,
4495 .show = leaks_show,
4496};
4497#endif
4498#endif
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512size_t ksize(const void *objp)
4513{
4514 if (unlikely(objp == NULL))
4515 return 0;
4516
4517 return obj_size(virt_to_cache(objp));
4518}
4519