1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <trace/kmemtrace.h>
106#include <linux/rcupdate.h>
107#include <linux/string.h>
108#include <linux/uaccess.h>
109#include <linux/nodemask.h>
110#include <linux/mempolicy.h>
111#include <linux/mutex.h>
112#include <linux/fault-inject.h>
113#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h>
116
117#include <asm/cacheflush.h>
118#include <asm/tlbflush.h>
119#include <asm/page.h>
120
121
122
123
124
125
126
127
128
129
130
131#ifdef CONFIG_DEBUG_SLAB
132#define DEBUG 1
133#define STATS 1
134#define FORCED_DEBUG 1
135#else
136#define DEBUG 0
137#define STATS 0
138#define FORCED_DEBUG 0
139#endif
140
141
142#define BYTES_PER_WORD sizeof(void *)
143#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
144
145#ifndef ARCH_KMALLOC_MINALIGN
146
147
148
149
150
151
152
153
154
155#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
156#endif
157
158#ifndef ARCH_SLAB_MINALIGN
159
160
161
162
163
164
165
166#define ARCH_SLAB_MINALIGN 0
167#endif
168
169#ifndef ARCH_KMALLOC_FLAGS
170#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
171#endif
172
173
174#if DEBUG
175# define CREATE_MASK (SLAB_RED_ZONE | \
176 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
177 SLAB_CACHE_DMA | \
178 SLAB_STORE_USER | \
179 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
180 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
181 SLAB_DEBUG_OBJECTS)
182#else
183# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
184 SLAB_CACHE_DMA | \
185 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
186 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
187 SLAB_DEBUG_OBJECTS)
188#endif
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209typedef unsigned int kmem_bufctl_t;
210#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
211#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
212#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
213#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
214
215
216
217
218
219
220
221
222struct slab {
223 struct list_head list;
224 unsigned long colouroff;
225 void *s_mem;
226 unsigned int inuse;
227 kmem_bufctl_t free;
228 unsigned short nodeid;
229};
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247struct slab_rcu {
248 struct rcu_head head;
249 struct kmem_cache *cachep;
250 void *addr;
251};
252
253
254
255
256
257
258
259
260
261
262
263
264
265struct array_cache {
266 unsigned int avail;
267 unsigned int limit;
268 unsigned int batchcount;
269 unsigned int touched;
270 spinlock_t lock;
271 void *entry[];
272
273
274
275
276};
277
278
279
280
281
282#define BOOT_CPUCACHE_ENTRIES 1
283struct arraycache_init {
284 struct array_cache cache;
285 void *entries[BOOT_CPUCACHE_ENTRIES];
286};
287
288
289
290
291struct kmem_list3 {
292 struct list_head slabs_partial;
293 struct list_head slabs_full;
294 struct list_head slabs_free;
295 unsigned long free_objects;
296 unsigned int free_limit;
297 unsigned int colour_next;
298 spinlock_t list_lock;
299 struct array_cache *shared;
300 struct array_cache **alien;
301 unsigned long next_reap;
302 int free_touched;
303};
304
305
306
307
308#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
309struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
310#define CACHE_CACHE 0
311#define SIZE_AC MAX_NUMNODES
312#define SIZE_L3 (2 * MAX_NUMNODES)
313
314static int drain_freelist(struct kmem_cache *cache,
315 struct kmem_list3 *l3, int tofree);
316static void free_block(struct kmem_cache *cachep, void **objpp, int len,
317 int node);
318static int enable_cpucache(struct kmem_cache *cachep);
319static void cache_reap(struct work_struct *unused);
320
321
322
323
324
325static __always_inline int index_of(const size_t size)
326{
327 extern void __bad_size(void);
328
329 if (__builtin_constant_p(size)) {
330 int i = 0;
331
332#define CACHE(x) \
333 if (size <=x) \
334 return i; \
335 else \
336 i++;
337#include <linux/kmalloc_sizes.h>
338#undef CACHE
339 __bad_size();
340 } else
341 __bad_size();
342 return 0;
343}
344
345static int slab_early_init = 1;
346
347#define INDEX_AC index_of(sizeof(struct arraycache_init))
348#define INDEX_L3 index_of(sizeof(struct kmem_list3))
349
350static void kmem_list3_init(struct kmem_list3 *parent)
351{
352 INIT_LIST_HEAD(&parent->slabs_full);
353 INIT_LIST_HEAD(&parent->slabs_partial);
354 INIT_LIST_HEAD(&parent->slabs_free);
355 parent->shared = NULL;
356 parent->alien = NULL;
357 parent->colour_next = 0;
358 spin_lock_init(&parent->list_lock);
359 parent->free_objects = 0;
360 parent->free_touched = 0;
361}
362
363#define MAKE_LIST(cachep, listp, slab, nodeid) \
364 do { \
365 INIT_LIST_HEAD(listp); \
366 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
367 } while (0)
368
369#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
370 do { \
371 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
372 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
373 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
374 } while (0)
375
376
377
378
379
380
381
382struct kmem_cache {
383
384 struct array_cache *array[NR_CPUS];
385
386 unsigned int batchcount;
387 unsigned int limit;
388 unsigned int shared;
389
390 unsigned int buffer_size;
391 u32 reciprocal_buffer_size;
392
393
394 unsigned int flags;
395 unsigned int num;
396
397
398
399 unsigned int gfporder;
400
401
402 gfp_t gfpflags;
403
404 size_t colour;
405 unsigned int colour_off;
406 struct kmem_cache *slabp_cache;
407 unsigned int slab_size;
408 unsigned int dflags;
409
410
411 void (*ctor)(void *obj);
412
413
414 const char *name;
415 struct list_head next;
416
417
418#if STATS
419 unsigned long num_active;
420 unsigned long num_allocations;
421 unsigned long high_mark;
422 unsigned long grown;
423 unsigned long reaped;
424 unsigned long errors;
425 unsigned long max_freeable;
426 unsigned long node_allocs;
427 unsigned long node_frees;
428 unsigned long node_overflow;
429 atomic_t allochit;
430 atomic_t allocmiss;
431 atomic_t freehit;
432 atomic_t freemiss;
433#endif
434#if DEBUG
435
436
437
438
439
440
441 int obj_offset;
442 int obj_size;
443#endif
444
445
446
447
448
449
450
451 struct kmem_list3 *nodelists[MAX_NUMNODES];
452
453
454
455};
456
457#define CFLGS_OFF_SLAB (0x80000000UL)
458#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
459
460#define BATCHREFILL_LIMIT 16
461
462
463
464
465
466
467
468#define REAPTIMEOUT_CPUC (2*HZ)
469#define REAPTIMEOUT_LIST3 (4*HZ)
470
471#if STATS
472#define STATS_INC_ACTIVE(x) ((x)->num_active++)
473#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
474#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
475#define STATS_INC_GROWN(x) ((x)->grown++)
476#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
477#define STATS_SET_HIGH(x) \
478 do { \
479 if ((x)->num_active > (x)->high_mark) \
480 (x)->high_mark = (x)->num_active; \
481 } while (0)
482#define STATS_INC_ERR(x) ((x)->errors++)
483#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
484#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
485#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
486#define STATS_SET_FREEABLE(x, i) \
487 do { \
488 if ((x)->max_freeable < i) \
489 (x)->max_freeable = i; \
490 } while (0)
491#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
492#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
493#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
494#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
495#else
496#define STATS_INC_ACTIVE(x) do { } while (0)
497#define STATS_DEC_ACTIVE(x) do { } while (0)
498#define STATS_INC_ALLOCED(x) do { } while (0)
499#define STATS_INC_GROWN(x) do { } while (0)
500#define STATS_ADD_REAPED(x,y) do { } while (0)
501#define STATS_SET_HIGH(x) do { } while (0)
502#define STATS_INC_ERR(x) do { } while (0)
503#define STATS_INC_NODEALLOCS(x) do { } while (0)
504#define STATS_INC_NODEFREES(x) do { } while (0)
505#define STATS_INC_ACOVERFLOW(x) do { } while (0)
506#define STATS_SET_FREEABLE(x, i) do { } while (0)
507#define STATS_INC_ALLOCHIT(x) do { } while (0)
508#define STATS_INC_ALLOCMISS(x) do { } while (0)
509#define STATS_INC_FREEHIT(x) do { } while (0)
510#define STATS_INC_FREEMISS(x) do { } while (0)
511#endif
512
513#if DEBUG
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528static int obj_offset(struct kmem_cache *cachep)
529{
530 return cachep->obj_offset;
531}
532
533static int obj_size(struct kmem_cache *cachep)
534{
535 return cachep->obj_size;
536}
537
538static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
539{
540 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
541 return (unsigned long long*) (objp + obj_offset(cachep) -
542 sizeof(unsigned long long));
543}
544
545static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
546{
547 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
548 if (cachep->flags & SLAB_STORE_USER)
549 return (unsigned long long *)(objp + cachep->buffer_size -
550 sizeof(unsigned long long) -
551 REDZONE_ALIGN);
552 return (unsigned long long *) (objp + cachep->buffer_size -
553 sizeof(unsigned long long));
554}
555
556static void **dbg_userword(struct kmem_cache *cachep, void *objp)
557{
558 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
559 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
560}
561
562#else
563
564#define obj_offset(x) 0
565#define obj_size(cachep) (cachep->buffer_size)
566#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
567#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
568#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
569
570#endif
571
572#ifdef CONFIG_KMEMTRACE
573size_t slab_buffer_size(struct kmem_cache *cachep)
574{
575 return cachep->buffer_size;
576}
577EXPORT_SYMBOL(slab_buffer_size);
578#endif
579
580
581
582
583#define BREAK_GFP_ORDER_HI 1
584#define BREAK_GFP_ORDER_LO 0
585static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
586
587
588
589
590
591
592static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
593{
594 page->lru.next = (struct list_head *)cache;
595}
596
597static inline struct kmem_cache *page_get_cache(struct page *page)
598{
599 page = compound_head(page);
600 BUG_ON(!PageSlab(page));
601 return (struct kmem_cache *)page->lru.next;
602}
603
604static inline void page_set_slab(struct page *page, struct slab *slab)
605{
606 page->lru.prev = (struct list_head *)slab;
607}
608
609static inline struct slab *page_get_slab(struct page *page)
610{
611 BUG_ON(!PageSlab(page));
612 return (struct slab *)page->lru.prev;
613}
614
615static inline struct kmem_cache *virt_to_cache(const void *obj)
616{
617 struct page *page = virt_to_head_page(obj);
618 return page_get_cache(page);
619}
620
621static inline struct slab *virt_to_slab(const void *obj)
622{
623 struct page *page = virt_to_head_page(obj);
624 return page_get_slab(page);
625}
626
627static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
628 unsigned int idx)
629{
630 return slab->s_mem + cache->buffer_size * idx;
631}
632
633
634
635
636
637
638
639static inline unsigned int obj_to_index(const struct kmem_cache *cache,
640 const struct slab *slab, void *obj)
641{
642 u32 offset = (obj - slab->s_mem);
643 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
644}
645
646
647
648
649struct cache_sizes malloc_sizes[] = {
650#define CACHE(x) { .cs_size = (x) },
651#include <linux/kmalloc_sizes.h>
652 CACHE(ULONG_MAX)
653#undef CACHE
654};
655EXPORT_SYMBOL(malloc_sizes);
656
657
658struct cache_names {
659 char *name;
660 char *name_dma;
661};
662
663static struct cache_names __initdata cache_names[] = {
664#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
665#include <linux/kmalloc_sizes.h>
666 {NULL,}
667#undef CACHE
668};
669
670static struct arraycache_init initarray_cache __initdata =
671 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
672static struct arraycache_init initarray_generic =
673 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
674
675
676static struct kmem_cache cache_cache = {
677 .batchcount = 1,
678 .limit = BOOT_CPUCACHE_ENTRIES,
679 .shared = 1,
680 .buffer_size = sizeof(struct kmem_cache),
681 .name = "kmem_cache",
682};
683
684#define BAD_ALIEN_MAGIC 0x01020304ul
685
686#ifdef CONFIG_LOCKDEP
687
688
689
690
691
692
693
694
695
696
697
698
699static struct lock_class_key on_slab_l3_key;
700static struct lock_class_key on_slab_alc_key;
701
702static inline void init_lock_keys(void)
703
704{
705 int q;
706 struct cache_sizes *s = malloc_sizes;
707
708 while (s->cs_size != ULONG_MAX) {
709 for_each_node(q) {
710 struct array_cache **alc;
711 int r;
712 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
713 if (!l3 || OFF_SLAB(s->cs_cachep))
714 continue;
715 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
716 alc = l3->alien;
717
718
719
720
721
722
723
724 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
725 continue;
726 for_each_node(r) {
727 if (alc[r])
728 lockdep_set_class(&alc[r]->lock,
729 &on_slab_alc_key);
730 }
731 }
732 s++;
733 }
734}
735#else
736static inline void init_lock_keys(void)
737{
738}
739#endif
740
741
742
743
744static DEFINE_MUTEX(cache_chain_mutex);
745static struct list_head cache_chain;
746
747
748
749
750
751static enum {
752 NONE,
753 PARTIAL_AC,
754 PARTIAL_L3,
755 FULL
756} g_cpucache_up;
757
758
759
760
761int slab_is_available(void)
762{
763 return g_cpucache_up == FULL;
764}
765
766static DEFINE_PER_CPU(struct delayed_work, reap_work);
767
768static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
769{
770 return cachep->array[smp_processor_id()];
771}
772
773static inline struct kmem_cache *__find_general_cachep(size_t size,
774 gfp_t gfpflags)
775{
776 struct cache_sizes *csizep = malloc_sizes;
777
778#if DEBUG
779
780
781
782
783 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
784#endif
785 if (!size)
786 return ZERO_SIZE_PTR;
787
788 while (size > csizep->cs_size)
789 csizep++;
790
791
792
793
794
795
796#ifdef CONFIG_ZONE_DMA
797 if (unlikely(gfpflags & GFP_DMA))
798 return csizep->cs_dmacachep;
799#endif
800 return csizep->cs_cachep;
801}
802
803static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
804{
805 return __find_general_cachep(size, gfpflags);
806}
807
808static size_t slab_mgmt_size(size_t nr_objs, size_t align)
809{
810 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
811}
812
813
814
815
816static void cache_estimate(unsigned long gfporder, size_t buffer_size,
817 size_t align, int flags, size_t *left_over,
818 unsigned int *num)
819{
820 int nr_objs;
821 size_t mgmt_size;
822 size_t slab_size = PAGE_SIZE << gfporder;
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839 if (flags & CFLGS_OFF_SLAB) {
840 mgmt_size = 0;
841 nr_objs = slab_size / buffer_size;
842
843 if (nr_objs > SLAB_LIMIT)
844 nr_objs = SLAB_LIMIT;
845 } else {
846
847
848
849
850
851
852
853
854 nr_objs = (slab_size - sizeof(struct slab)) /
855 (buffer_size + sizeof(kmem_bufctl_t));
856
857
858
859
860
861 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
862 > slab_size)
863 nr_objs--;
864
865 if (nr_objs > SLAB_LIMIT)
866 nr_objs = SLAB_LIMIT;
867
868 mgmt_size = slab_mgmt_size(nr_objs, align);
869 }
870 *num = nr_objs;
871 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
872}
873
874#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
875
876static void __slab_error(const char *function, struct kmem_cache *cachep,
877 char *msg)
878{
879 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
880 function, cachep->name, msg);
881 dump_stack();
882}
883
884
885
886
887
888
889
890
891
892static int use_alien_caches __read_mostly = 1;
893static int numa_platform __read_mostly = 1;
894static int __init noaliencache_setup(char *s)
895{
896 use_alien_caches = 0;
897 return 1;
898}
899__setup("noaliencache", noaliencache_setup);
900
901#ifdef CONFIG_NUMA
902
903
904
905
906
907
908static DEFINE_PER_CPU(unsigned long, reap_node);
909
910static void init_reap_node(int cpu)
911{
912 int node;
913
914 node = next_node(cpu_to_node(cpu), node_online_map);
915 if (node == MAX_NUMNODES)
916 node = first_node(node_online_map);
917
918 per_cpu(reap_node, cpu) = node;
919}
920
921static void next_reap_node(void)
922{
923 int node = __get_cpu_var(reap_node);
924
925 node = next_node(node, node_online_map);
926 if (unlikely(node >= MAX_NUMNODES))
927 node = first_node(node_online_map);
928 __get_cpu_var(reap_node) = node;
929}
930
931#else
932#define init_reap_node(cpu) do { } while (0)
933#define next_reap_node(void) do { } while (0)
934#endif
935
936
937
938
939
940
941
942
943static void __cpuinit start_cpu_timer(int cpu)
944{
945 struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
946
947
948
949
950
951
952 if (keventd_up() && reap_work->work.func == NULL) {
953 init_reap_node(cpu);
954 INIT_DELAYED_WORK(reap_work, cache_reap);
955 schedule_delayed_work_on(cpu, reap_work,
956 __round_jiffies_relative(HZ, cpu));
957 }
958}
959
960static struct array_cache *alloc_arraycache(int node, int entries,
961 int batchcount)
962{
963 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
964 struct array_cache *nc = NULL;
965
966 nc = kmalloc_node(memsize, GFP_KERNEL, node);
967 if (nc) {
968 nc->avail = 0;
969 nc->limit = entries;
970 nc->batchcount = batchcount;
971 nc->touched = 0;
972 spin_lock_init(&nc->lock);
973 }
974 return nc;
975}
976
977
978
979
980
981
982
983static int transfer_objects(struct array_cache *to,
984 struct array_cache *from, unsigned int max)
985{
986
987 int nr = min(min(from->avail, max), to->limit - to->avail);
988
989 if (!nr)
990 return 0;
991
992 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
993 sizeof(void *) *nr);
994
995 from->avail -= nr;
996 to->avail += nr;
997 to->touched = 1;
998 return nr;
999}
1000
1001#ifndef CONFIG_NUMA
1002
1003#define drain_alien_cache(cachep, alien) do { } while (0)
1004#define reap_alien(cachep, l3) do { } while (0)
1005
1006static inline struct array_cache **alloc_alien_cache(int node, int limit)
1007{
1008 return (struct array_cache **)BAD_ALIEN_MAGIC;
1009}
1010
1011static inline void free_alien_cache(struct array_cache **ac_ptr)
1012{
1013}
1014
1015static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1016{
1017 return 0;
1018}
1019
1020static inline void *alternate_node_alloc(struct kmem_cache *cachep,
1021 gfp_t flags)
1022{
1023 return NULL;
1024}
1025
1026static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1027 gfp_t flags, int nodeid)
1028{
1029 return NULL;
1030}
1031
1032#else
1033
1034static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1035static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1036
1037static struct array_cache **alloc_alien_cache(int node, int limit)
1038{
1039 struct array_cache **ac_ptr;
1040 int memsize = sizeof(void *) * nr_node_ids;
1041 int i;
1042
1043 if (limit > 1)
1044 limit = 12;
1045 ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node);
1046 if (ac_ptr) {
1047 for_each_node(i) {
1048 if (i == node || !node_online(i)) {
1049 ac_ptr[i] = NULL;
1050 continue;
1051 }
1052 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
1053 if (!ac_ptr[i]) {
1054 for (i--; i >= 0; i--)
1055 kfree(ac_ptr[i]);
1056 kfree(ac_ptr);
1057 return NULL;
1058 }
1059 }
1060 }
1061 return ac_ptr;
1062}
1063
1064static void free_alien_cache(struct array_cache **ac_ptr)
1065{
1066 int i;
1067
1068 if (!ac_ptr)
1069 return;
1070 for_each_node(i)
1071 kfree(ac_ptr[i]);
1072 kfree(ac_ptr);
1073}
1074
1075static void __drain_alien_cache(struct kmem_cache *cachep,
1076 struct array_cache *ac, int node)
1077{
1078 struct kmem_list3 *rl3 = cachep->nodelists[node];
1079
1080 if (ac->avail) {
1081 spin_lock(&rl3->list_lock);
1082
1083
1084
1085
1086
1087 if (rl3->shared)
1088 transfer_objects(rl3->shared, ac, ac->limit);
1089
1090 free_block(cachep, ac->entry, ac->avail, node);
1091 ac->avail = 0;
1092 spin_unlock(&rl3->list_lock);
1093 }
1094}
1095
1096
1097
1098
1099static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1100{
1101 int node = __get_cpu_var(reap_node);
1102
1103 if (l3->alien) {
1104 struct array_cache *ac = l3->alien[node];
1105
1106 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1107 __drain_alien_cache(cachep, ac, node);
1108 spin_unlock_irq(&ac->lock);
1109 }
1110 }
1111}
1112
1113static void drain_alien_cache(struct kmem_cache *cachep,
1114 struct array_cache **alien)
1115{
1116 int i = 0;
1117 struct array_cache *ac;
1118 unsigned long flags;
1119
1120 for_each_online_node(i) {
1121 ac = alien[i];
1122 if (ac) {
1123 spin_lock_irqsave(&ac->lock, flags);
1124 __drain_alien_cache(cachep, ac, i);
1125 spin_unlock_irqrestore(&ac->lock, flags);
1126 }
1127 }
1128}
1129
1130static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1131{
1132 struct slab *slabp = virt_to_slab(objp);
1133 int nodeid = slabp->nodeid;
1134 struct kmem_list3 *l3;
1135 struct array_cache *alien = NULL;
1136 int node;
1137
1138 node = numa_node_id();
1139
1140
1141
1142
1143
1144 if (likely(slabp->nodeid == node))
1145 return 0;
1146
1147 l3 = cachep->nodelists[node];
1148 STATS_INC_NODEFREES(cachep);
1149 if (l3->alien && l3->alien[nodeid]) {
1150 alien = l3->alien[nodeid];
1151 spin_lock(&alien->lock);
1152 if (unlikely(alien->avail == alien->limit)) {
1153 STATS_INC_ACOVERFLOW(cachep);
1154 __drain_alien_cache(cachep, alien, nodeid);
1155 }
1156 alien->entry[alien->avail++] = objp;
1157 spin_unlock(&alien->lock);
1158 } else {
1159 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1160 free_block(cachep, &objp, 1, nodeid);
1161 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1162 }
1163 return 1;
1164}
1165#endif
1166
1167static void __cpuinit cpuup_canceled(long cpu)
1168{
1169 struct kmem_cache *cachep;
1170 struct kmem_list3 *l3 = NULL;
1171 int node = cpu_to_node(cpu);
1172 const struct cpumask *mask = cpumask_of_node(node);
1173
1174 list_for_each_entry(cachep, &cache_chain, next) {
1175 struct array_cache *nc;
1176 struct array_cache *shared;
1177 struct array_cache **alien;
1178
1179
1180 nc = cachep->array[cpu];
1181 cachep->array[cpu] = NULL;
1182 l3 = cachep->nodelists[node];
1183
1184 if (!l3)
1185 goto free_array_cache;
1186
1187 spin_lock_irq(&l3->list_lock);
1188
1189
1190 l3->free_limit -= cachep->batchcount;
1191 if (nc)
1192 free_block(cachep, nc->entry, nc->avail, node);
1193
1194 if (!cpus_empty(*mask)) {
1195 spin_unlock_irq(&l3->list_lock);
1196 goto free_array_cache;
1197 }
1198
1199 shared = l3->shared;
1200 if (shared) {
1201 free_block(cachep, shared->entry,
1202 shared->avail, node);
1203 l3->shared = NULL;
1204 }
1205
1206 alien = l3->alien;
1207 l3->alien = NULL;
1208
1209 spin_unlock_irq(&l3->list_lock);
1210
1211 kfree(shared);
1212 if (alien) {
1213 drain_alien_cache(cachep, alien);
1214 free_alien_cache(alien);
1215 }
1216free_array_cache:
1217 kfree(nc);
1218 }
1219
1220
1221
1222
1223
1224 list_for_each_entry(cachep, &cache_chain, next) {
1225 l3 = cachep->nodelists[node];
1226 if (!l3)
1227 continue;
1228 drain_freelist(cachep, l3, l3->free_objects);
1229 }
1230}
1231
1232static int __cpuinit cpuup_prepare(long cpu)
1233{
1234 struct kmem_cache *cachep;
1235 struct kmem_list3 *l3 = NULL;
1236 int node = cpu_to_node(cpu);
1237 const int memsize = sizeof(struct kmem_list3);
1238
1239
1240
1241
1242
1243
1244
1245
1246 list_for_each_entry(cachep, &cache_chain, next) {
1247
1248
1249
1250
1251
1252 if (!cachep->nodelists[node]) {
1253 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1254 if (!l3)
1255 goto bad;
1256 kmem_list3_init(l3);
1257 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1258 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1259
1260
1261
1262
1263
1264
1265 cachep->nodelists[node] = l3;
1266 }
1267
1268 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1269 cachep->nodelists[node]->free_limit =
1270 (1 + nr_cpus_node(node)) *
1271 cachep->batchcount + cachep->num;
1272 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1273 }
1274
1275
1276
1277
1278
1279 list_for_each_entry(cachep, &cache_chain, next) {
1280 struct array_cache *nc;
1281 struct array_cache *shared = NULL;
1282 struct array_cache **alien = NULL;
1283
1284 nc = alloc_arraycache(node, cachep->limit,
1285 cachep->batchcount);
1286 if (!nc)
1287 goto bad;
1288 if (cachep->shared) {
1289 shared = alloc_arraycache(node,
1290 cachep->shared * cachep->batchcount,
1291 0xbaadf00d);
1292 if (!shared) {
1293 kfree(nc);
1294 goto bad;
1295 }
1296 }
1297 if (use_alien_caches) {
1298 alien = alloc_alien_cache(node, cachep->limit);
1299 if (!alien) {
1300 kfree(shared);
1301 kfree(nc);
1302 goto bad;
1303 }
1304 }
1305 cachep->array[cpu] = nc;
1306 l3 = cachep->nodelists[node];
1307 BUG_ON(!l3);
1308
1309 spin_lock_irq(&l3->list_lock);
1310 if (!l3->shared) {
1311
1312
1313
1314
1315 l3->shared = shared;
1316 shared = NULL;
1317 }
1318#ifdef CONFIG_NUMA
1319 if (!l3->alien) {
1320 l3->alien = alien;
1321 alien = NULL;
1322 }
1323#endif
1324 spin_unlock_irq(&l3->list_lock);
1325 kfree(shared);
1326 free_alien_cache(alien);
1327 }
1328 return 0;
1329bad:
1330 cpuup_canceled(cpu);
1331 return -ENOMEM;
1332}
1333
1334static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1335 unsigned long action, void *hcpu)
1336{
1337 long cpu = (long)hcpu;
1338 int err = 0;
1339
1340 switch (action) {
1341 case CPU_UP_PREPARE:
1342 case CPU_UP_PREPARE_FROZEN:
1343 mutex_lock(&cache_chain_mutex);
1344 err = cpuup_prepare(cpu);
1345 mutex_unlock(&cache_chain_mutex);
1346 break;
1347 case CPU_ONLINE:
1348 case CPU_ONLINE_FROZEN:
1349 start_cpu_timer(cpu);
1350 break;
1351#ifdef CONFIG_HOTPLUG_CPU
1352 case CPU_DOWN_PREPARE:
1353 case CPU_DOWN_PREPARE_FROZEN:
1354
1355
1356
1357
1358
1359
1360 cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
1361
1362 per_cpu(reap_work, cpu).work.func = NULL;
1363 break;
1364 case CPU_DOWN_FAILED:
1365 case CPU_DOWN_FAILED_FROZEN:
1366 start_cpu_timer(cpu);
1367 break;
1368 case CPU_DEAD:
1369 case CPU_DEAD_FROZEN:
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379#endif
1380 case CPU_UP_CANCELED:
1381 case CPU_UP_CANCELED_FROZEN:
1382 mutex_lock(&cache_chain_mutex);
1383 cpuup_canceled(cpu);
1384 mutex_unlock(&cache_chain_mutex);
1385 break;
1386 }
1387 return err ? NOTIFY_BAD : NOTIFY_OK;
1388}
1389
1390static struct notifier_block __cpuinitdata cpucache_notifier = {
1391 &cpuup_callback, NULL, 0
1392};
1393
1394
1395
1396
1397static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1398 int nodeid)
1399{
1400 struct kmem_list3 *ptr;
1401
1402 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
1403 BUG_ON(!ptr);
1404
1405 local_irq_disable();
1406 memcpy(ptr, list, sizeof(struct kmem_list3));
1407
1408
1409
1410 spin_lock_init(&ptr->list_lock);
1411
1412 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1413 cachep->nodelists[nodeid] = ptr;
1414 local_irq_enable();
1415}
1416
1417
1418
1419
1420
1421static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1422{
1423 int node;
1424
1425 for_each_online_node(node) {
1426 cachep->nodelists[node] = &initkmem_list3[index + node];
1427 cachep->nodelists[node]->next_reap = jiffies +
1428 REAPTIMEOUT_LIST3 +
1429 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1430 }
1431}
1432
1433
1434
1435
1436
1437void __init kmem_cache_init(void)
1438{
1439 size_t left_over;
1440 struct cache_sizes *sizes;
1441 struct cache_names *names;
1442 int i;
1443 int order;
1444 int node;
1445
1446 if (num_possible_nodes() == 1) {
1447 use_alien_caches = 0;
1448 numa_platform = 0;
1449 }
1450
1451 for (i = 0; i < NUM_INIT_LISTS; i++) {
1452 kmem_list3_init(&initkmem_list3[i]);
1453 if (i < MAX_NUMNODES)
1454 cache_cache.nodelists[i] = NULL;
1455 }
1456 set_up_list3s(&cache_cache, CACHE_CACHE);
1457
1458
1459
1460
1461
1462 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
1463 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485 node = numa_node_id();
1486
1487
1488 INIT_LIST_HEAD(&cache_chain);
1489 list_add(&cache_cache.next, &cache_chain);
1490 cache_cache.colour_off = cache_line_size();
1491 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1492 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1493
1494
1495
1496
1497
1498 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1499 nr_node_ids * sizeof(struct kmem_list3 *);
1500#if DEBUG
1501 cache_cache.obj_size = cache_cache.buffer_size;
1502#endif
1503 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1504 cache_line_size());
1505 cache_cache.reciprocal_buffer_size =
1506 reciprocal_value(cache_cache.buffer_size);
1507
1508 for (order = 0; order < MAX_ORDER; order++) {
1509 cache_estimate(order, cache_cache.buffer_size,
1510 cache_line_size(), 0, &left_over, &cache_cache.num);
1511 if (cache_cache.num)
1512 break;
1513 }
1514 BUG_ON(!cache_cache.num);
1515 cache_cache.gfporder = order;
1516 cache_cache.colour = left_over / cache_cache.colour_off;
1517 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1518 sizeof(struct slab), cache_line_size());
1519
1520
1521 sizes = malloc_sizes;
1522 names = cache_names;
1523
1524
1525
1526
1527
1528
1529
1530 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1531 sizes[INDEX_AC].cs_size,
1532 ARCH_KMALLOC_MINALIGN,
1533 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1534 NULL);
1535
1536 if (INDEX_AC != INDEX_L3) {
1537 sizes[INDEX_L3].cs_cachep =
1538 kmem_cache_create(names[INDEX_L3].name,
1539 sizes[INDEX_L3].cs_size,
1540 ARCH_KMALLOC_MINALIGN,
1541 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1542 NULL);
1543 }
1544
1545 slab_early_init = 0;
1546
1547 while (sizes->cs_size != ULONG_MAX) {
1548
1549
1550
1551
1552
1553
1554
1555 if (!sizes->cs_cachep) {
1556 sizes->cs_cachep = kmem_cache_create(names->name,
1557 sizes->cs_size,
1558 ARCH_KMALLOC_MINALIGN,
1559 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1560 NULL);
1561 }
1562#ifdef CONFIG_ZONE_DMA
1563 sizes->cs_dmacachep = kmem_cache_create(
1564 names->name_dma,
1565 sizes->cs_size,
1566 ARCH_KMALLOC_MINALIGN,
1567 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1568 SLAB_PANIC,
1569 NULL);
1570#endif
1571 sizes++;
1572 names++;
1573 }
1574
1575 {
1576 struct array_cache *ptr;
1577
1578 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1579
1580 local_irq_disable();
1581 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1582 memcpy(ptr, cpu_cache_get(&cache_cache),
1583 sizeof(struct arraycache_init));
1584
1585
1586
1587 spin_lock_init(&ptr->lock);
1588
1589 cache_cache.array[smp_processor_id()] = ptr;
1590 local_irq_enable();
1591
1592 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1593
1594 local_irq_disable();
1595 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1596 != &initarray_generic.cache);
1597 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1598 sizeof(struct arraycache_init));
1599
1600
1601
1602 spin_lock_init(&ptr->lock);
1603
1604 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1605 ptr;
1606 local_irq_enable();
1607 }
1608
1609 {
1610 int nid;
1611
1612 for_each_online_node(nid) {
1613 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1614
1615 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1616 &initkmem_list3[SIZE_AC + nid], nid);
1617
1618 if (INDEX_AC != INDEX_L3) {
1619 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1620 &initkmem_list3[SIZE_L3 + nid], nid);
1621 }
1622 }
1623 }
1624
1625
1626 {
1627 struct kmem_cache *cachep;
1628 mutex_lock(&cache_chain_mutex);
1629 list_for_each_entry(cachep, &cache_chain, next)
1630 if (enable_cpucache(cachep))
1631 BUG();
1632 mutex_unlock(&cache_chain_mutex);
1633 }
1634
1635
1636 init_lock_keys();
1637
1638
1639
1640 g_cpucache_up = FULL;
1641
1642
1643
1644
1645
1646 register_cpu_notifier(&cpucache_notifier);
1647
1648
1649
1650
1651
1652}
1653
1654static int __init cpucache_init(void)
1655{
1656 int cpu;
1657
1658
1659
1660
1661 for_each_online_cpu(cpu)
1662 start_cpu_timer(cpu);
1663 return 0;
1664}
1665__initcall(cpucache_init);
1666
1667
1668
1669
1670
1671
1672
1673
1674static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1675{
1676 struct page *page;
1677 int nr_pages;
1678 int i;
1679
1680#ifndef CONFIG_MMU
1681
1682
1683
1684
1685 flags |= __GFP_COMP;
1686#endif
1687
1688 flags |= cachep->gfpflags;
1689 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1690 flags |= __GFP_RECLAIMABLE;
1691
1692 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1693 if (!page)
1694 return NULL;
1695
1696 nr_pages = (1 << cachep->gfporder);
1697 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1698 add_zone_page_state(page_zone(page),
1699 NR_SLAB_RECLAIMABLE, nr_pages);
1700 else
1701 add_zone_page_state(page_zone(page),
1702 NR_SLAB_UNRECLAIMABLE, nr_pages);
1703 for (i = 0; i < nr_pages; i++)
1704 __SetPageSlab(page + i);
1705 return page_address(page);
1706}
1707
1708
1709
1710
1711static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1712{
1713 unsigned long i = (1 << cachep->gfporder);
1714 struct page *page = virt_to_page(addr);
1715 const unsigned long nr_freed = i;
1716
1717 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1718 sub_zone_page_state(page_zone(page),
1719 NR_SLAB_RECLAIMABLE, nr_freed);
1720 else
1721 sub_zone_page_state(page_zone(page),
1722 NR_SLAB_UNRECLAIMABLE, nr_freed);
1723 while (i--) {
1724 BUG_ON(!PageSlab(page));
1725 __ClearPageSlab(page);
1726 page++;
1727 }
1728 if (current->reclaim_state)
1729 current->reclaim_state->reclaimed_slab += nr_freed;
1730 free_pages((unsigned long)addr, cachep->gfporder);
1731}
1732
1733static void kmem_rcu_free(struct rcu_head *head)
1734{
1735 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1736 struct kmem_cache *cachep = slab_rcu->cachep;
1737
1738 kmem_freepages(cachep, slab_rcu->addr);
1739 if (OFF_SLAB(cachep))
1740 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1741}
1742
1743#if DEBUG
1744
1745#ifdef CONFIG_DEBUG_PAGEALLOC
1746static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1747 unsigned long caller)
1748{
1749 int size = obj_size(cachep);
1750
1751 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1752
1753 if (size < 5 * sizeof(unsigned long))
1754 return;
1755
1756 *addr++ = 0x12345678;
1757 *addr++ = caller;
1758 *addr++ = smp_processor_id();
1759 size -= 3 * sizeof(unsigned long);
1760 {
1761 unsigned long *sptr = &caller;
1762 unsigned long svalue;
1763
1764 while (!kstack_end(sptr)) {
1765 svalue = *sptr++;
1766 if (kernel_text_address(svalue)) {
1767 *addr++ = svalue;
1768 size -= sizeof(unsigned long);
1769 if (size <= sizeof(unsigned long))
1770 break;
1771 }
1772 }
1773
1774 }
1775 *addr++ = 0x87654321;
1776}
1777#endif
1778
1779static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1780{
1781 int size = obj_size(cachep);
1782 addr = &((char *)addr)[obj_offset(cachep)];
1783
1784 memset(addr, val, size);
1785 *(unsigned char *)(addr + size - 1) = POISON_END;
1786}
1787
1788static void dump_line(char *data, int offset, int limit)
1789{
1790 int i;
1791 unsigned char error = 0;
1792 int bad_count = 0;
1793
1794 printk(KERN_ERR "%03x:", offset);
1795 for (i = 0; i < limit; i++) {
1796 if (data[offset + i] != POISON_FREE) {
1797 error = data[offset + i];
1798 bad_count++;
1799 }
1800 printk(" %02x", (unsigned char)data[offset + i]);
1801 }
1802 printk("\n");
1803
1804 if (bad_count == 1) {
1805 error ^= POISON_FREE;
1806 if (!(error & (error - 1))) {
1807 printk(KERN_ERR "Single bit error detected. Probably "
1808 "bad RAM.\n");
1809#ifdef CONFIG_X86
1810 printk(KERN_ERR "Run memtest86+ or a similar memory "
1811 "test tool.\n");
1812#else
1813 printk(KERN_ERR "Run a memory test tool.\n");
1814#endif
1815 }
1816 }
1817}
1818#endif
1819
1820#if DEBUG
1821
1822static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1823{
1824 int i, size;
1825 char *realobj;
1826
1827 if (cachep->flags & SLAB_RED_ZONE) {
1828 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1829 *dbg_redzone1(cachep, objp),
1830 *dbg_redzone2(cachep, objp));
1831 }
1832
1833 if (cachep->flags & SLAB_STORE_USER) {
1834 printk(KERN_ERR "Last user: [<%p>]",
1835 *dbg_userword(cachep, objp));
1836 print_symbol("(%s)",
1837 (unsigned long)*dbg_userword(cachep, objp));
1838 printk("\n");
1839 }
1840 realobj = (char *)objp + obj_offset(cachep);
1841 size = obj_size(cachep);
1842 for (i = 0; i < size && lines; i += 16, lines--) {
1843 int limit;
1844 limit = 16;
1845 if (i + limit > size)
1846 limit = size - i;
1847 dump_line(realobj, i, limit);
1848 }
1849}
1850
1851static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1852{
1853 char *realobj;
1854 int size, i;
1855 int lines = 0;
1856
1857 realobj = (char *)objp + obj_offset(cachep);
1858 size = obj_size(cachep);
1859
1860 for (i = 0; i < size; i++) {
1861 char exp = POISON_FREE;
1862 if (i == size - 1)
1863 exp = POISON_END;
1864 if (realobj[i] != exp) {
1865 int limit;
1866
1867
1868 if (lines == 0) {
1869 printk(KERN_ERR
1870 "Slab corruption: %s start=%p, len=%d\n",
1871 cachep->name, realobj, size);
1872 print_objinfo(cachep, objp, 0);
1873 }
1874
1875 i = (i / 16) * 16;
1876 limit = 16;
1877 if (i + limit > size)
1878 limit = size - i;
1879 dump_line(realobj, i, limit);
1880 i += 16;
1881 lines++;
1882
1883 if (lines > 5)
1884 break;
1885 }
1886 }
1887 if (lines != 0) {
1888
1889
1890
1891 struct slab *slabp = virt_to_slab(objp);
1892 unsigned int objnr;
1893
1894 objnr = obj_to_index(cachep, slabp, objp);
1895 if (objnr) {
1896 objp = index_to_obj(cachep, slabp, objnr - 1);
1897 realobj = (char *)objp + obj_offset(cachep);
1898 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1899 realobj, size);
1900 print_objinfo(cachep, objp, 2);
1901 }
1902 if (objnr + 1 < cachep->num) {
1903 objp = index_to_obj(cachep, slabp, objnr + 1);
1904 realobj = (char *)objp + obj_offset(cachep);
1905 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1906 realobj, size);
1907 print_objinfo(cachep, objp, 2);
1908 }
1909 }
1910}
1911#endif
1912
1913#if DEBUG
1914static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1915{
1916 int i;
1917 for (i = 0; i < cachep->num; i++) {
1918 void *objp = index_to_obj(cachep, slabp, i);
1919
1920 if (cachep->flags & SLAB_POISON) {
1921#ifdef CONFIG_DEBUG_PAGEALLOC
1922 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1923 OFF_SLAB(cachep))
1924 kernel_map_pages(virt_to_page(objp),
1925 cachep->buffer_size / PAGE_SIZE, 1);
1926 else
1927 check_poison_obj(cachep, objp);
1928#else
1929 check_poison_obj(cachep, objp);
1930#endif
1931 }
1932 if (cachep->flags & SLAB_RED_ZONE) {
1933 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1934 slab_error(cachep, "start of a freed object "
1935 "was overwritten");
1936 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1937 slab_error(cachep, "end of a freed object "
1938 "was overwritten");
1939 }
1940 }
1941}
1942#else
1943static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1944{
1945}
1946#endif
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1958{
1959 void *addr = slabp->s_mem - slabp->colouroff;
1960
1961 slab_destroy_debugcheck(cachep, slabp);
1962 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1963 struct slab_rcu *slab_rcu;
1964
1965 slab_rcu = (struct slab_rcu *)slabp;
1966 slab_rcu->cachep = cachep;
1967 slab_rcu->addr = addr;
1968 call_rcu(&slab_rcu->head, kmem_rcu_free);
1969 } else {
1970 kmem_freepages(cachep, addr);
1971 if (OFF_SLAB(cachep))
1972 kmem_cache_free(cachep->slabp_cache, slabp);
1973 }
1974}
1975
1976static void __kmem_cache_destroy(struct kmem_cache *cachep)
1977{
1978 int i;
1979 struct kmem_list3 *l3;
1980
1981 for_each_online_cpu(i)
1982 kfree(cachep->array[i]);
1983
1984
1985 for_each_online_node(i) {
1986 l3 = cachep->nodelists[i];
1987 if (l3) {
1988 kfree(l3->shared);
1989 free_alien_cache(l3->alien);
1990 kfree(l3);
1991 }
1992 }
1993 kmem_cache_free(&cache_cache, cachep);
1994}
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010static size_t calculate_slab_order(struct kmem_cache *cachep,
2011 size_t size, size_t align, unsigned long flags)
2012{
2013 unsigned long offslab_limit;
2014 size_t left_over = 0;
2015 int gfporder;
2016
2017 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2018 unsigned int num;
2019 size_t remainder;
2020
2021 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2022 if (!num)
2023 continue;
2024
2025 if (flags & CFLGS_OFF_SLAB) {
2026
2027
2028
2029
2030
2031 offslab_limit = size - sizeof(struct slab);
2032 offslab_limit /= sizeof(kmem_bufctl_t);
2033
2034 if (num > offslab_limit)
2035 break;
2036 }
2037
2038
2039 cachep->num = num;
2040 cachep->gfporder = gfporder;
2041 left_over = remainder;
2042
2043
2044
2045
2046
2047
2048 if (flags & SLAB_RECLAIM_ACCOUNT)
2049 break;
2050
2051
2052
2053
2054
2055 if (gfporder >= slab_break_gfp_order)
2056 break;
2057
2058
2059
2060
2061 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2062 break;
2063 }
2064 return left_over;
2065}
2066
2067static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2068{
2069 if (g_cpucache_up == FULL)
2070 return enable_cpucache(cachep);
2071
2072 if (g_cpucache_up == NONE) {
2073
2074
2075
2076
2077
2078 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2079
2080
2081
2082
2083
2084
2085 set_up_list3s(cachep, SIZE_AC);
2086 if (INDEX_AC == INDEX_L3)
2087 g_cpucache_up = PARTIAL_L3;
2088 else
2089 g_cpucache_up = PARTIAL_AC;
2090 } else {
2091 cachep->array[smp_processor_id()] =
2092 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
2093
2094 if (g_cpucache_up == PARTIAL_AC) {
2095 set_up_list3s(cachep, SIZE_L3);
2096 g_cpucache_up = PARTIAL_L3;
2097 } else {
2098 int node;
2099 for_each_online_node(node) {
2100 cachep->nodelists[node] =
2101 kmalloc_node(sizeof(struct kmem_list3),
2102 GFP_KERNEL, node);
2103 BUG_ON(!cachep->nodelists[node]);
2104 kmem_list3_init(cachep->nodelists[node]);
2105 }
2106 }
2107 }
2108 cachep->nodelists[numa_node_id()]->next_reap =
2109 jiffies + REAPTIMEOUT_LIST3 +
2110 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2111
2112 cpu_cache_get(cachep)->avail = 0;
2113 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2114 cpu_cache_get(cachep)->batchcount = 1;
2115 cpu_cache_get(cachep)->touched = 0;
2116 cachep->batchcount = 1;
2117 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2118 return 0;
2119}
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150struct kmem_cache *
2151kmem_cache_create (const char *name, size_t size, size_t align,
2152 unsigned long flags, void (*ctor)(void *))
2153{
2154 size_t left_over, slab_size, ralign;
2155 struct kmem_cache *cachep = NULL, *pc;
2156
2157
2158
2159
2160 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2161 size > KMALLOC_MAX_SIZE) {
2162 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2163 name);
2164 BUG();
2165 }
2166
2167
2168
2169
2170
2171 get_online_cpus();
2172 mutex_lock(&cache_chain_mutex);
2173
2174 list_for_each_entry(pc, &cache_chain, next) {
2175 char tmp;
2176 int res;
2177
2178
2179
2180
2181
2182
2183 res = probe_kernel_address(pc->name, tmp);
2184 if (res) {
2185 printk(KERN_ERR
2186 "SLAB: cache with size %d has lost its name\n",
2187 pc->buffer_size);
2188 continue;
2189 }
2190
2191 if (!strcmp(pc->name, name)) {
2192 printk(KERN_ERR
2193 "kmem_cache_create: duplicate cache %s\n", name);
2194 dump_stack();
2195 goto oops;
2196 }
2197 }
2198
2199#if DEBUG
2200 WARN_ON(strchr(name, ' '));
2201#if FORCED_DEBUG
2202
2203
2204
2205
2206
2207
2208 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2209 2 * sizeof(unsigned long long)))
2210 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2211 if (!(flags & SLAB_DESTROY_BY_RCU))
2212 flags |= SLAB_POISON;
2213#endif
2214 if (flags & SLAB_DESTROY_BY_RCU)
2215 BUG_ON(flags & SLAB_POISON);
2216#endif
2217
2218
2219
2220
2221 BUG_ON(flags & ~CREATE_MASK);
2222
2223
2224
2225
2226
2227
2228 if (size & (BYTES_PER_WORD - 1)) {
2229 size += (BYTES_PER_WORD - 1);
2230 size &= ~(BYTES_PER_WORD - 1);
2231 }
2232
2233
2234
2235
2236 if (flags & SLAB_HWCACHE_ALIGN) {
2237
2238
2239
2240
2241
2242 ralign = cache_line_size();
2243 while (size <= ralign / 2)
2244 ralign /= 2;
2245 } else {
2246 ralign = BYTES_PER_WORD;
2247 }
2248
2249
2250
2251
2252
2253
2254 if (flags & SLAB_STORE_USER)
2255 ralign = BYTES_PER_WORD;
2256
2257 if (flags & SLAB_RED_ZONE) {
2258 ralign = REDZONE_ALIGN;
2259
2260
2261 size += REDZONE_ALIGN - 1;
2262 size &= ~(REDZONE_ALIGN - 1);
2263 }
2264
2265
2266 if (ralign < ARCH_SLAB_MINALIGN) {
2267 ralign = ARCH_SLAB_MINALIGN;
2268 }
2269
2270 if (ralign < align) {
2271 ralign = align;
2272 }
2273
2274 if (ralign > __alignof__(unsigned long long))
2275 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2276
2277
2278
2279 align = ralign;
2280
2281
2282 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
2283 if (!cachep)
2284 goto oops;
2285
2286#if DEBUG
2287 cachep->obj_size = size;
2288
2289
2290
2291
2292
2293 if (flags & SLAB_RED_ZONE) {
2294
2295 cachep->obj_offset += sizeof(unsigned long long);
2296 size += 2 * sizeof(unsigned long long);
2297 }
2298 if (flags & SLAB_STORE_USER) {
2299
2300
2301
2302
2303 if (flags & SLAB_RED_ZONE)
2304 size += REDZONE_ALIGN;
2305 else
2306 size += BYTES_PER_WORD;
2307 }
2308#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2309 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2310 && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
2311 cachep->obj_offset += PAGE_SIZE - size;
2312 size = PAGE_SIZE;
2313 }
2314#endif
2315#endif
2316
2317
2318
2319
2320
2321
2322 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
2323
2324
2325
2326
2327 flags |= CFLGS_OFF_SLAB;
2328
2329 size = ALIGN(size, align);
2330
2331 left_over = calculate_slab_order(cachep, size, align, flags);
2332
2333 if (!cachep->num) {
2334 printk(KERN_ERR
2335 "kmem_cache_create: couldn't create cache %s.\n", name);
2336 kmem_cache_free(&cache_cache, cachep);
2337 cachep = NULL;
2338 goto oops;
2339 }
2340 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2341 + sizeof(struct slab), align);
2342
2343
2344
2345
2346
2347 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2348 flags &= ~CFLGS_OFF_SLAB;
2349 left_over -= slab_size;
2350 }
2351
2352 if (flags & CFLGS_OFF_SLAB) {
2353
2354 slab_size =
2355 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2356 }
2357
2358 cachep->colour_off = cache_line_size();
2359
2360 if (cachep->colour_off < align)
2361 cachep->colour_off = align;
2362 cachep->colour = left_over / cachep->colour_off;
2363 cachep->slab_size = slab_size;
2364 cachep->flags = flags;
2365 cachep->gfpflags = 0;
2366 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2367 cachep->gfpflags |= GFP_DMA;
2368 cachep->buffer_size = size;
2369 cachep->reciprocal_buffer_size = reciprocal_value(size);
2370
2371 if (flags & CFLGS_OFF_SLAB) {
2372 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2373
2374
2375
2376
2377
2378
2379
2380 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2381 }
2382 cachep->ctor = ctor;
2383 cachep->name = name;
2384
2385 if (setup_cpu_cache(cachep)) {
2386 __kmem_cache_destroy(cachep);
2387 cachep = NULL;
2388 goto oops;
2389 }
2390
2391
2392 list_add(&cachep->next, &cache_chain);
2393oops:
2394 if (!cachep && (flags & SLAB_PANIC))
2395 panic("kmem_cache_create(): failed to create slab `%s'\n",
2396 name);
2397 mutex_unlock(&cache_chain_mutex);
2398 put_online_cpus();
2399 return cachep;
2400}
2401EXPORT_SYMBOL(kmem_cache_create);
2402
2403#if DEBUG
2404static void check_irq_off(void)
2405{
2406 BUG_ON(!irqs_disabled());
2407}
2408
2409static void check_irq_on(void)
2410{
2411 BUG_ON(irqs_disabled());
2412}
2413
2414static void check_spinlock_acquired(struct kmem_cache *cachep)
2415{
2416#ifdef CONFIG_SMP
2417 check_irq_off();
2418 assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
2419#endif
2420}
2421
2422static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2423{
2424#ifdef CONFIG_SMP
2425 check_irq_off();
2426 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2427#endif
2428}
2429
2430#else
2431#define check_irq_off() do { } while(0)
2432#define check_irq_on() do { } while(0)
2433#define check_spinlock_acquired(x) do { } while(0)
2434#define check_spinlock_acquired_node(x, y) do { } while(0)
2435#endif
2436
2437static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2438 struct array_cache *ac,
2439 int force, int node);
2440
2441static void do_drain(void *arg)
2442{
2443 struct kmem_cache *cachep = arg;
2444 struct array_cache *ac;
2445 int node = numa_node_id();
2446
2447 check_irq_off();
2448 ac = cpu_cache_get(cachep);
2449 spin_lock(&cachep->nodelists[node]->list_lock);
2450 free_block(cachep, ac->entry, ac->avail, node);
2451 spin_unlock(&cachep->nodelists[node]->list_lock);
2452 ac->avail = 0;
2453}
2454
2455static void drain_cpu_caches(struct kmem_cache *cachep)
2456{
2457 struct kmem_list3 *l3;
2458 int node;
2459
2460 on_each_cpu(do_drain, cachep, 1);
2461 check_irq_on();
2462 for_each_online_node(node) {
2463 l3 = cachep->nodelists[node];
2464 if (l3 && l3->alien)
2465 drain_alien_cache(cachep, l3->alien);
2466 }
2467
2468 for_each_online_node(node) {
2469 l3 = cachep->nodelists[node];
2470 if (l3)
2471 drain_array(cachep, l3, l3->shared, 1, node);
2472 }
2473}
2474
2475
2476
2477
2478
2479
2480
2481static int drain_freelist(struct kmem_cache *cache,
2482 struct kmem_list3 *l3, int tofree)
2483{
2484 struct list_head *p;
2485 int nr_freed;
2486 struct slab *slabp;
2487
2488 nr_freed = 0;
2489 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2490
2491 spin_lock_irq(&l3->list_lock);
2492 p = l3->slabs_free.prev;
2493 if (p == &l3->slabs_free) {
2494 spin_unlock_irq(&l3->list_lock);
2495 goto out;
2496 }
2497
2498 slabp = list_entry(p, struct slab, list);
2499#if DEBUG
2500 BUG_ON(slabp->inuse);
2501#endif
2502 list_del(&slabp->list);
2503
2504
2505
2506
2507 l3->free_objects -= cache->num;
2508 spin_unlock_irq(&l3->list_lock);
2509 slab_destroy(cache, slabp);
2510 nr_freed++;
2511 }
2512out:
2513 return nr_freed;
2514}
2515
2516
2517static int __cache_shrink(struct kmem_cache *cachep)
2518{
2519 int ret = 0, i = 0;
2520 struct kmem_list3 *l3;
2521
2522 drain_cpu_caches(cachep);
2523
2524 check_irq_on();
2525 for_each_online_node(i) {
2526 l3 = cachep->nodelists[i];
2527 if (!l3)
2528 continue;
2529
2530 drain_freelist(cachep, l3, l3->free_objects);
2531
2532 ret += !list_empty(&l3->slabs_full) ||
2533 !list_empty(&l3->slabs_partial);
2534 }
2535 return (ret ? 1 : 0);
2536}
2537
2538
2539
2540
2541
2542
2543
2544
2545int kmem_cache_shrink(struct kmem_cache *cachep)
2546{
2547 int ret;
2548 BUG_ON(!cachep || in_interrupt());
2549
2550 get_online_cpus();
2551 mutex_lock(&cache_chain_mutex);
2552 ret = __cache_shrink(cachep);
2553 mutex_unlock(&cache_chain_mutex);
2554 put_online_cpus();
2555 return ret;
2556}
2557EXPORT_SYMBOL(kmem_cache_shrink);
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575void kmem_cache_destroy(struct kmem_cache *cachep)
2576{
2577 BUG_ON(!cachep || in_interrupt());
2578
2579
2580 get_online_cpus();
2581 mutex_lock(&cache_chain_mutex);
2582
2583
2584
2585 list_del(&cachep->next);
2586 if (__cache_shrink(cachep)) {
2587 slab_error(cachep, "Can't free all objects");
2588 list_add(&cachep->next, &cache_chain);
2589 mutex_unlock(&cache_chain_mutex);
2590 put_online_cpus();
2591 return;
2592 }
2593
2594 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2595 rcu_barrier();
2596
2597 __kmem_cache_destroy(cachep);
2598 mutex_unlock(&cache_chain_mutex);
2599 put_online_cpus();
2600}
2601EXPORT_SYMBOL(kmem_cache_destroy);
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2615 int colour_off, gfp_t local_flags,
2616 int nodeid)
2617{
2618 struct slab *slabp;
2619
2620 if (OFF_SLAB(cachep)) {
2621
2622 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2623 local_flags, nodeid);
2624 if (!slabp)
2625 return NULL;
2626 } else {
2627 slabp = objp + colour_off;
2628 colour_off += cachep->slab_size;
2629 }
2630 slabp->inuse = 0;
2631 slabp->colouroff = colour_off;
2632 slabp->s_mem = objp + colour_off;
2633 slabp->nodeid = nodeid;
2634 slabp->free = 0;
2635 return slabp;
2636}
2637
2638static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2639{
2640 return (kmem_bufctl_t *) (slabp + 1);
2641}
2642
2643static void cache_init_objs(struct kmem_cache *cachep,
2644 struct slab *slabp)
2645{
2646 int i;
2647
2648 for (i = 0; i < cachep->num; i++) {
2649 void *objp = index_to_obj(cachep, slabp, i);
2650#if DEBUG
2651
2652 if (cachep->flags & SLAB_POISON)
2653 poison_obj(cachep, objp, POISON_FREE);
2654 if (cachep->flags & SLAB_STORE_USER)
2655 *dbg_userword(cachep, objp) = NULL;
2656
2657 if (cachep->flags & SLAB_RED_ZONE) {
2658 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2659 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2660 }
2661
2662
2663
2664
2665
2666 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2667 cachep->ctor(objp + obj_offset(cachep));
2668
2669 if (cachep->flags & SLAB_RED_ZONE) {
2670 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2671 slab_error(cachep, "constructor overwrote the"
2672 " end of an object");
2673 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2674 slab_error(cachep, "constructor overwrote the"
2675 " start of an object");
2676 }
2677 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2678 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2679 kernel_map_pages(virt_to_page(objp),
2680 cachep->buffer_size / PAGE_SIZE, 0);
2681#else
2682 if (cachep->ctor)
2683 cachep->ctor(objp);
2684#endif
2685 slab_bufctl(slabp)[i] = i + 1;
2686 }
2687 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2688}
2689
2690static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2691{
2692 if (CONFIG_ZONE_DMA_FLAG) {
2693 if (flags & GFP_DMA)
2694 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2695 else
2696 BUG_ON(cachep->gfpflags & GFP_DMA);
2697 }
2698}
2699
2700static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2701 int nodeid)
2702{
2703 void *objp = index_to_obj(cachep, slabp, slabp->free);
2704 kmem_bufctl_t next;
2705
2706 slabp->inuse++;
2707 next = slab_bufctl(slabp)[slabp->free];
2708#if DEBUG
2709 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2710 WARN_ON(slabp->nodeid != nodeid);
2711#endif
2712 slabp->free = next;
2713
2714 return objp;
2715}
2716
2717static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2718 void *objp, int nodeid)
2719{
2720 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2721
2722#if DEBUG
2723
2724 WARN_ON(slabp->nodeid != nodeid);
2725
2726 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2727 printk(KERN_ERR "slab: double free detected in cache "
2728 "'%s', objp %p\n", cachep->name, objp);
2729 BUG();
2730 }
2731#endif
2732 slab_bufctl(slabp)[objnr] = slabp->free;
2733 slabp->free = objnr;
2734 slabp->inuse--;
2735}
2736
2737
2738
2739
2740
2741
2742static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2743 void *addr)
2744{
2745 int nr_pages;
2746 struct page *page;
2747
2748 page = virt_to_page(addr);
2749
2750 nr_pages = 1;
2751 if (likely(!PageCompound(page)))
2752 nr_pages <<= cache->gfporder;
2753
2754 do {
2755 page_set_cache(page, cache);
2756 page_set_slab(page, slab);
2757 page++;
2758 } while (--nr_pages);
2759}
2760
2761
2762
2763
2764
2765static int cache_grow(struct kmem_cache *cachep,
2766 gfp_t flags, int nodeid, void *objp)
2767{
2768 struct slab *slabp;
2769 size_t offset;
2770 gfp_t local_flags;
2771 struct kmem_list3 *l3;
2772
2773
2774
2775
2776
2777 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2778 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2779
2780
2781 check_irq_off();
2782 l3 = cachep->nodelists[nodeid];
2783 spin_lock(&l3->list_lock);
2784
2785
2786 offset = l3->colour_next;
2787 l3->colour_next++;
2788 if (l3->colour_next >= cachep->colour)
2789 l3->colour_next = 0;
2790 spin_unlock(&l3->list_lock);
2791
2792 offset *= cachep->colour_off;
2793
2794 if (local_flags & __GFP_WAIT)
2795 local_irq_enable();
2796
2797
2798
2799
2800
2801
2802
2803 kmem_flagcheck(cachep, flags);
2804
2805
2806
2807
2808
2809 if (!objp)
2810 objp = kmem_getpages(cachep, local_flags, nodeid);
2811 if (!objp)
2812 goto failed;
2813
2814
2815 slabp = alloc_slabmgmt(cachep, objp, offset,
2816 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2817 if (!slabp)
2818 goto opps1;
2819
2820 slab_map_pages(cachep, slabp, objp);
2821
2822 cache_init_objs(cachep, slabp);
2823
2824 if (local_flags & __GFP_WAIT)
2825 local_irq_disable();
2826 check_irq_off();
2827 spin_lock(&l3->list_lock);
2828
2829
2830 list_add_tail(&slabp->list, &(l3->slabs_free));
2831 STATS_INC_GROWN(cachep);
2832 l3->free_objects += cachep->num;
2833 spin_unlock(&l3->list_lock);
2834 return 1;
2835opps1:
2836 kmem_freepages(cachep, objp);
2837failed:
2838 if (local_flags & __GFP_WAIT)
2839 local_irq_disable();
2840 return 0;
2841}
2842
2843#if DEBUG
2844
2845
2846
2847
2848
2849
2850static void kfree_debugcheck(const void *objp)
2851{
2852 if (!virt_addr_valid(objp)) {
2853 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2854 (unsigned long)objp);
2855 BUG();
2856 }
2857}
2858
2859static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2860{
2861 unsigned long long redzone1, redzone2;
2862
2863 redzone1 = *dbg_redzone1(cache, obj);
2864 redzone2 = *dbg_redzone2(cache, obj);
2865
2866
2867
2868
2869 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2870 return;
2871
2872 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2873 slab_error(cache, "double free detected");
2874 else
2875 slab_error(cache, "memory outside object was overwritten");
2876
2877 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2878 obj, redzone1, redzone2);
2879}
2880
2881static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2882 void *caller)
2883{
2884 struct page *page;
2885 unsigned int objnr;
2886 struct slab *slabp;
2887
2888 BUG_ON(virt_to_cache(objp) != cachep);
2889
2890 objp -= obj_offset(cachep);
2891 kfree_debugcheck(objp);
2892 page = virt_to_head_page(objp);
2893
2894 slabp = page_get_slab(page);
2895
2896 if (cachep->flags & SLAB_RED_ZONE) {
2897 verify_redzone_free(cachep, objp);
2898 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2899 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2900 }
2901 if (cachep->flags & SLAB_STORE_USER)
2902 *dbg_userword(cachep, objp) = caller;
2903
2904 objnr = obj_to_index(cachep, slabp, objp);
2905
2906 BUG_ON(objnr >= cachep->num);
2907 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2908
2909#ifdef CONFIG_DEBUG_SLAB_LEAK
2910 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2911#endif
2912 if (cachep->flags & SLAB_POISON) {
2913#ifdef CONFIG_DEBUG_PAGEALLOC
2914 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2915 store_stackinfo(cachep, objp, (unsigned long)caller);
2916 kernel_map_pages(virt_to_page(objp),
2917 cachep->buffer_size / PAGE_SIZE, 0);
2918 } else {
2919 poison_obj(cachep, objp, POISON_FREE);
2920 }
2921#else
2922 poison_obj(cachep, objp, POISON_FREE);
2923#endif
2924 }
2925 return objp;
2926}
2927
2928static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2929{
2930 kmem_bufctl_t i;
2931 int entries = 0;
2932
2933
2934 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2935 entries++;
2936 if (entries > cachep->num || i >= cachep->num)
2937 goto bad;
2938 }
2939 if (entries != cachep->num - slabp->inuse) {
2940bad:
2941 printk(KERN_ERR "slab: Internal list corruption detected in "
2942 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2943 cachep->name, cachep->num, slabp, slabp->inuse);
2944 for (i = 0;
2945 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2946 i++) {
2947 if (i % 16 == 0)
2948 printk("\n%03x:", i);
2949 printk(" %02x", ((unsigned char *)slabp)[i]);
2950 }
2951 printk("\n");
2952 BUG();
2953 }
2954}
2955#else
2956#define kfree_debugcheck(x) do { } while(0)
2957#define cache_free_debugcheck(x,objp,z) (objp)
2958#define check_slabp(x,y) do { } while(0)
2959#endif
2960
2961static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2962{
2963 int batchcount;
2964 struct kmem_list3 *l3;
2965 struct array_cache *ac;
2966 int node;
2967
2968retry:
2969 check_irq_off();
2970 node = numa_node_id();
2971 ac = cpu_cache_get(cachep);
2972 batchcount = ac->batchcount;
2973 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2974
2975
2976
2977
2978
2979 batchcount = BATCHREFILL_LIMIT;
2980 }
2981 l3 = cachep->nodelists[node];
2982
2983 BUG_ON(ac->avail > 0 || !l3);
2984 spin_lock(&l3->list_lock);
2985
2986
2987 if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
2988 goto alloc_done;
2989
2990 while (batchcount > 0) {
2991 struct list_head *entry;
2992 struct slab *slabp;
2993
2994 entry = l3->slabs_partial.next;
2995 if (entry == &l3->slabs_partial) {
2996 l3->free_touched = 1;
2997 entry = l3->slabs_free.next;
2998 if (entry == &l3->slabs_free)
2999 goto must_grow;
3000 }
3001
3002 slabp = list_entry(entry, struct slab, list);
3003 check_slabp(cachep, slabp);
3004 check_spinlock_acquired(cachep);
3005
3006
3007
3008
3009
3010
3011 BUG_ON(slabp->inuse >= cachep->num);
3012
3013 while (slabp->inuse < cachep->num && batchcount--) {
3014 STATS_INC_ALLOCED(cachep);
3015 STATS_INC_ACTIVE(cachep);
3016 STATS_SET_HIGH(cachep);
3017
3018 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
3019 node);
3020 }
3021 check_slabp(cachep, slabp);
3022
3023
3024 list_del(&slabp->list);
3025 if (slabp->free == BUFCTL_END)
3026 list_add(&slabp->list, &l3->slabs_full);
3027 else
3028 list_add(&slabp->list, &l3->slabs_partial);
3029 }
3030
3031must_grow:
3032 l3->free_objects -= ac->avail;
3033alloc_done:
3034 spin_unlock(&l3->list_lock);
3035
3036 if (unlikely(!ac->avail)) {
3037 int x;
3038 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3039
3040
3041 ac = cpu_cache_get(cachep);
3042 if (!x && ac->avail == 0)
3043 return NULL;
3044
3045 if (!ac->avail)
3046 goto retry;
3047 }
3048 ac->touched = 1;
3049 return ac->entry[--ac->avail];
3050}
3051
3052static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3053 gfp_t flags)
3054{
3055 might_sleep_if(flags & __GFP_WAIT);
3056#if DEBUG
3057 kmem_flagcheck(cachep, flags);
3058#endif
3059}
3060
3061#if DEBUG
3062static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3063 gfp_t flags, void *objp, void *caller)
3064{
3065 if (!objp)
3066 return objp;
3067 if (cachep->flags & SLAB_POISON) {
3068#ifdef CONFIG_DEBUG_PAGEALLOC
3069 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3070 kernel_map_pages(virt_to_page(objp),
3071 cachep->buffer_size / PAGE_SIZE, 1);
3072 else
3073 check_poison_obj(cachep, objp);
3074#else
3075 check_poison_obj(cachep, objp);
3076#endif
3077 poison_obj(cachep, objp, POISON_INUSE);
3078 }
3079 if (cachep->flags & SLAB_STORE_USER)
3080 *dbg_userword(cachep, objp) = caller;
3081
3082 if (cachep->flags & SLAB_RED_ZONE) {
3083 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3084 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3085 slab_error(cachep, "double free, or memory outside"
3086 " object was overwritten");
3087 printk(KERN_ERR
3088 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3089 objp, *dbg_redzone1(cachep, objp),
3090 *dbg_redzone2(cachep, objp));
3091 }
3092 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3093 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3094 }
3095#ifdef CONFIG_DEBUG_SLAB_LEAK
3096 {
3097 struct slab *slabp;
3098 unsigned objnr;
3099
3100 slabp = page_get_slab(virt_to_head_page(objp));
3101 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3102 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3103 }
3104#endif
3105 objp += obj_offset(cachep);
3106 if (cachep->ctor && cachep->flags & SLAB_POISON)
3107 cachep->ctor(objp);
3108#if ARCH_SLAB_MINALIGN
3109 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3110 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3111 objp, ARCH_SLAB_MINALIGN);
3112 }
3113#endif
3114 return objp;
3115}
3116#else
3117#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3118#endif
3119
3120static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3121{
3122 if (cachep == &cache_cache)
3123 return false;
3124
3125 return should_failslab(obj_size(cachep), flags);
3126}
3127
3128static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3129{
3130 void *objp;
3131 struct array_cache *ac;
3132
3133 check_irq_off();
3134
3135 ac = cpu_cache_get(cachep);
3136 if (likely(ac->avail)) {
3137 STATS_INC_ALLOCHIT(cachep);
3138 ac->touched = 1;
3139 objp = ac->entry[--ac->avail];
3140 } else {
3141 STATS_INC_ALLOCMISS(cachep);
3142 objp = cache_alloc_refill(cachep, flags);
3143 }
3144 return objp;
3145}
3146
3147#ifdef CONFIG_NUMA
3148
3149
3150
3151
3152
3153
3154static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3155{
3156 int nid_alloc, nid_here;
3157
3158 if (in_interrupt() || (flags & __GFP_THISNODE))
3159 return NULL;
3160 nid_alloc = nid_here = numa_node_id();
3161 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3162 nid_alloc = cpuset_mem_spread_node();
3163 else if (current->mempolicy)
3164 nid_alloc = slab_node(current->mempolicy);
3165 if (nid_alloc != nid_here)
3166 return ____cache_alloc_node(cachep, flags, nid_alloc);
3167 return NULL;
3168}
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3179{
3180 struct zonelist *zonelist;
3181 gfp_t local_flags;
3182 struct zoneref *z;
3183 struct zone *zone;
3184 enum zone_type high_zoneidx = gfp_zone(flags);
3185 void *obj = NULL;
3186 int nid;
3187
3188 if (flags & __GFP_THISNODE)
3189 return NULL;
3190
3191 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3192 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3193
3194retry:
3195
3196
3197
3198
3199 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3200 nid = zone_to_nid(zone);
3201
3202 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3203 cache->nodelists[nid] &&
3204 cache->nodelists[nid]->free_objects) {
3205 obj = ____cache_alloc_node(cache,
3206 flags | GFP_THISNODE, nid);
3207 if (obj)
3208 break;
3209 }
3210 }
3211
3212 if (!obj) {
3213
3214
3215
3216
3217
3218
3219 if (local_flags & __GFP_WAIT)
3220 local_irq_enable();
3221 kmem_flagcheck(cache, flags);
3222 obj = kmem_getpages(cache, local_flags, -1);
3223 if (local_flags & __GFP_WAIT)
3224 local_irq_disable();
3225 if (obj) {
3226
3227
3228
3229 nid = page_to_nid(virt_to_page(obj));
3230 if (cache_grow(cache, flags, nid, obj)) {
3231 obj = ____cache_alloc_node(cache,
3232 flags | GFP_THISNODE, nid);
3233 if (!obj)
3234
3235
3236
3237
3238
3239 goto retry;
3240 } else {
3241
3242 obj = NULL;
3243 }
3244 }
3245 }
3246 return obj;
3247}
3248
3249
3250
3251
3252static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3253 int nodeid)
3254{
3255 struct list_head *entry;
3256 struct slab *slabp;
3257 struct kmem_list3 *l3;
3258 void *obj;
3259 int x;
3260
3261 l3 = cachep->nodelists[nodeid];
3262 BUG_ON(!l3);
3263
3264retry:
3265 check_irq_off();
3266 spin_lock(&l3->list_lock);
3267 entry = l3->slabs_partial.next;
3268 if (entry == &l3->slabs_partial) {
3269 l3->free_touched = 1;
3270 entry = l3->slabs_free.next;
3271 if (entry == &l3->slabs_free)
3272 goto must_grow;
3273 }
3274
3275 slabp = list_entry(entry, struct slab, list);
3276 check_spinlock_acquired_node(cachep, nodeid);
3277 check_slabp(cachep, slabp);
3278
3279 STATS_INC_NODEALLOCS(cachep);
3280 STATS_INC_ACTIVE(cachep);
3281 STATS_SET_HIGH(cachep);
3282
3283 BUG_ON(slabp->inuse == cachep->num);
3284
3285 obj = slab_get_obj(cachep, slabp, nodeid);
3286 check_slabp(cachep, slabp);
3287 l3->free_objects--;
3288
3289 list_del(&slabp->list);
3290
3291 if (slabp->free == BUFCTL_END)
3292 list_add(&slabp->list, &l3->slabs_full);
3293 else
3294 list_add(&slabp->list, &l3->slabs_partial);
3295
3296 spin_unlock(&l3->list_lock);
3297 goto done;
3298
3299must_grow:
3300 spin_unlock(&l3->list_lock);
3301 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3302 if (x)
3303 goto retry;
3304
3305 return fallback_alloc(cachep, flags);
3306
3307done:
3308 return obj;
3309}
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323static __always_inline void *
3324__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3325 void *caller)
3326{
3327 unsigned long save_flags;
3328 void *ptr;
3329
3330 lockdep_trace_alloc(flags);
3331
3332 if (slab_should_failslab(cachep, flags))
3333 return NULL;
3334
3335 cache_alloc_debugcheck_before(cachep, flags);
3336 local_irq_save(save_flags);
3337
3338 if (unlikely(nodeid == -1))
3339 nodeid = numa_node_id();
3340
3341 if (unlikely(!cachep->nodelists[nodeid])) {
3342
3343 ptr = fallback_alloc(cachep, flags);
3344 goto out;
3345 }
3346
3347 if (nodeid == numa_node_id()) {
3348
3349
3350
3351
3352
3353
3354 ptr = ____cache_alloc(cachep, flags);
3355 if (ptr)
3356 goto out;
3357 }
3358
3359 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3360 out:
3361 local_irq_restore(save_flags);
3362 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3363
3364 if (unlikely((flags & __GFP_ZERO) && ptr))
3365 memset(ptr, 0, obj_size(cachep));
3366
3367 return ptr;
3368}
3369
3370static __always_inline void *
3371__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3372{
3373 void *objp;
3374
3375 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3376 objp = alternate_node_alloc(cache, flags);
3377 if (objp)
3378 goto out;
3379 }
3380 objp = ____cache_alloc(cache, flags);
3381
3382
3383
3384
3385
3386 if (!objp)
3387 objp = ____cache_alloc_node(cache, flags, numa_node_id());
3388
3389 out:
3390 return objp;
3391}
3392#else
3393
3394static __always_inline void *
3395__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3396{
3397 return ____cache_alloc(cachep, flags);
3398}
3399
3400#endif
3401
3402static __always_inline void *
3403__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3404{
3405 unsigned long save_flags;
3406 void *objp;
3407
3408 lockdep_trace_alloc(flags);
3409
3410 if (slab_should_failslab(cachep, flags))
3411 return NULL;
3412
3413 cache_alloc_debugcheck_before(cachep, flags);
3414 local_irq_save(save_flags);
3415 objp = __do_cache_alloc(cachep, flags);
3416 local_irq_restore(save_flags);
3417 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3418 prefetchw(objp);
3419
3420 if (unlikely((flags & __GFP_ZERO) && objp))
3421 memset(objp, 0, obj_size(cachep));
3422
3423 return objp;
3424}
3425
3426
3427
3428
3429static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3430 int node)
3431{
3432 int i;
3433 struct kmem_list3 *l3;
3434
3435 for (i = 0; i < nr_objects; i++) {
3436 void *objp = objpp[i];
3437 struct slab *slabp;
3438
3439 slabp = virt_to_slab(objp);
3440 l3 = cachep->nodelists[node];
3441 list_del(&slabp->list);
3442 check_spinlock_acquired_node(cachep, node);
3443 check_slabp(cachep, slabp);
3444 slab_put_obj(cachep, slabp, objp, node);
3445 STATS_DEC_ACTIVE(cachep);
3446 l3->free_objects++;
3447 check_slabp(cachep, slabp);
3448
3449
3450 if (slabp->inuse == 0) {
3451 if (l3->free_objects > l3->free_limit) {
3452 l3->free_objects -= cachep->num;
3453
3454
3455
3456
3457
3458
3459 slab_destroy(cachep, slabp);
3460 } else {
3461 list_add(&slabp->list, &l3->slabs_free);
3462 }
3463 } else {
3464
3465
3466
3467
3468 list_add_tail(&slabp->list, &l3->slabs_partial);
3469 }
3470 }
3471}
3472
3473static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3474{
3475 int batchcount;
3476 struct kmem_list3 *l3;
3477 int node = numa_node_id();
3478
3479 batchcount = ac->batchcount;
3480#if DEBUG
3481 BUG_ON(!batchcount || batchcount > ac->avail);
3482#endif
3483 check_irq_off();
3484 l3 = cachep->nodelists[node];
3485 spin_lock(&l3->list_lock);
3486 if (l3->shared) {
3487 struct array_cache *shared_array = l3->shared;
3488 int max = shared_array->limit - shared_array->avail;
3489 if (max) {
3490 if (batchcount > max)
3491 batchcount = max;
3492 memcpy(&(shared_array->entry[shared_array->avail]),
3493 ac->entry, sizeof(void *) * batchcount);
3494 shared_array->avail += batchcount;
3495 goto free_done;
3496 }
3497 }
3498
3499 free_block(cachep, ac->entry, batchcount, node);
3500free_done:
3501#if STATS
3502 {
3503 int i = 0;
3504 struct list_head *p;
3505
3506 p = l3->slabs_free.next;
3507 while (p != &(l3->slabs_free)) {
3508 struct slab *slabp;
3509
3510 slabp = list_entry(p, struct slab, list);
3511 BUG_ON(slabp->inuse);
3512
3513 i++;
3514 p = p->next;
3515 }
3516 STATS_SET_FREEABLE(cachep, i);
3517 }
3518#endif
3519 spin_unlock(&l3->list_lock);
3520 ac->avail -= batchcount;
3521 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3522}
3523
3524
3525
3526
3527
3528static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3529{
3530 struct array_cache *ac = cpu_cache_get(cachep);
3531
3532 check_irq_off();
3533 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3534
3535
3536
3537
3538
3539
3540
3541
3542 if (numa_platform && cache_free_alien(cachep, objp))
3543 return;
3544
3545 if (likely(ac->avail < ac->limit)) {
3546 STATS_INC_FREEHIT(cachep);
3547 ac->entry[ac->avail++] = objp;
3548 return;
3549 } else {
3550 STATS_INC_FREEMISS(cachep);
3551 cache_flusharray(cachep, ac);
3552 ac->entry[ac->avail++] = objp;
3553 }
3554}
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3565{
3566 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3567
3568 trace_kmem_cache_alloc(_RET_IP_, ret,
3569 obj_size(cachep), cachep->buffer_size, flags);
3570
3571 return ret;
3572}
3573EXPORT_SYMBOL(kmem_cache_alloc);
3574
3575#ifdef CONFIG_KMEMTRACE
3576void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3577{
3578 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3579}
3580EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3581#endif
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3597{
3598 unsigned long addr = (unsigned long)ptr;
3599 unsigned long min_addr = PAGE_OFFSET;
3600 unsigned long align_mask = BYTES_PER_WORD - 1;
3601 unsigned long size = cachep->buffer_size;
3602 struct page *page;
3603
3604 if (unlikely(addr < min_addr))
3605 goto out;
3606 if (unlikely(addr > (unsigned long)high_memory - size))
3607 goto out;
3608 if (unlikely(addr & align_mask))
3609 goto out;
3610 if (unlikely(!kern_addr_valid(addr)))
3611 goto out;
3612 if (unlikely(!kern_addr_valid(addr + size - 1)))
3613 goto out;
3614 page = virt_to_page(ptr);
3615 if (unlikely(!PageSlab(page)))
3616 goto out;
3617 if (unlikely(page_get_cache(page) != cachep))
3618 goto out;
3619 return 1;
3620out:
3621 return 0;
3622}
3623
3624#ifdef CONFIG_NUMA
3625void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3626{
3627 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3628 __builtin_return_address(0));
3629
3630 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3631 obj_size(cachep), cachep->buffer_size,
3632 flags, nodeid);
3633
3634 return ret;
3635}
3636EXPORT_SYMBOL(kmem_cache_alloc_node);
3637
3638#ifdef CONFIG_KMEMTRACE
3639void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
3640 gfp_t flags,
3641 int nodeid)
3642{
3643 return __cache_alloc_node(cachep, flags, nodeid,
3644 __builtin_return_address(0));
3645}
3646EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
3647#endif
3648
3649static __always_inline void *
3650__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3651{
3652 struct kmem_cache *cachep;
3653 void *ret;
3654
3655 cachep = kmem_find_general_cachep(size, flags);
3656 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3657 return cachep;
3658 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
3659
3660 trace_kmalloc_node((unsigned long) caller, ret,
3661 size, cachep->buffer_size, flags, node);
3662
3663 return ret;
3664}
3665
3666#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3667void *__kmalloc_node(size_t size, gfp_t flags, int node)
3668{
3669 return __do_kmalloc_node(size, flags, node,
3670 __builtin_return_address(0));
3671}
3672EXPORT_SYMBOL(__kmalloc_node);
3673
3674void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3675 int node, unsigned long caller)
3676{
3677 return __do_kmalloc_node(size, flags, node, (void *)caller);
3678}
3679EXPORT_SYMBOL(__kmalloc_node_track_caller);
3680#else
3681void *__kmalloc_node(size_t size, gfp_t flags, int node)
3682{
3683 return __do_kmalloc_node(size, flags, node, NULL);
3684}
3685EXPORT_SYMBOL(__kmalloc_node);
3686#endif
3687#endif
3688
3689
3690
3691
3692
3693
3694
3695static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3696 void *caller)
3697{
3698 struct kmem_cache *cachep;
3699 void *ret;
3700
3701
3702
3703
3704
3705
3706 cachep = __find_general_cachep(size, flags);
3707 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3708 return cachep;
3709 ret = __cache_alloc(cachep, flags, caller);
3710
3711 trace_kmalloc((unsigned long) caller, ret,
3712 size, cachep->buffer_size, flags);
3713
3714 return ret;
3715}
3716
3717
3718#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3719void *__kmalloc(size_t size, gfp_t flags)
3720{
3721 return __do_kmalloc(size, flags, __builtin_return_address(0));
3722}
3723EXPORT_SYMBOL(__kmalloc);
3724
3725void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3726{
3727 return __do_kmalloc(size, flags, (void *)caller);
3728}
3729EXPORT_SYMBOL(__kmalloc_track_caller);
3730
3731#else
3732void *__kmalloc(size_t size, gfp_t flags)
3733{
3734 return __do_kmalloc(size, flags, NULL);
3735}
3736EXPORT_SYMBOL(__kmalloc);
3737#endif
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3748{
3749 unsigned long flags;
3750
3751 local_irq_save(flags);
3752 debug_check_no_locks_freed(objp, obj_size(cachep));
3753 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3754 debug_check_no_obj_freed(objp, obj_size(cachep));
3755 __cache_free(cachep, objp);
3756 local_irq_restore(flags);
3757
3758 trace_kmem_cache_free(_RET_IP_, objp);
3759}
3760EXPORT_SYMBOL(kmem_cache_free);
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771void kfree(const void *objp)
3772{
3773 struct kmem_cache *c;
3774 unsigned long flags;
3775
3776 trace_kfree(_RET_IP_, objp);
3777
3778 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3779 return;
3780 local_irq_save(flags);
3781 kfree_debugcheck(objp);
3782 c = virt_to_cache(objp);
3783 debug_check_no_locks_freed(objp, obj_size(c));
3784 debug_check_no_obj_freed(objp, obj_size(c));
3785 __cache_free(c, (void *)objp);
3786 local_irq_restore(flags);
3787}
3788EXPORT_SYMBOL(kfree);
3789
3790unsigned int kmem_cache_size(struct kmem_cache *cachep)
3791{
3792 return obj_size(cachep);
3793}
3794EXPORT_SYMBOL(kmem_cache_size);
3795
3796const char *kmem_cache_name(struct kmem_cache *cachep)
3797{
3798 return cachep->name;
3799}
3800EXPORT_SYMBOL_GPL(kmem_cache_name);
3801
3802
3803
3804
3805static int alloc_kmemlist(struct kmem_cache *cachep)
3806{
3807 int node;
3808 struct kmem_list3 *l3;
3809 struct array_cache *new_shared;
3810 struct array_cache **new_alien = NULL;
3811
3812 for_each_online_node(node) {
3813
3814 if (use_alien_caches) {
3815 new_alien = alloc_alien_cache(node, cachep->limit);
3816 if (!new_alien)
3817 goto fail;
3818 }
3819
3820 new_shared = NULL;
3821 if (cachep->shared) {
3822 new_shared = alloc_arraycache(node,
3823 cachep->shared*cachep->batchcount,
3824 0xbaadf00d);
3825 if (!new_shared) {
3826 free_alien_cache(new_alien);
3827 goto fail;
3828 }
3829 }
3830
3831 l3 = cachep->nodelists[node];
3832 if (l3) {
3833 struct array_cache *shared = l3->shared;
3834
3835 spin_lock_irq(&l3->list_lock);
3836
3837 if (shared)
3838 free_block(cachep, shared->entry,
3839 shared->avail, node);
3840
3841 l3->shared = new_shared;
3842 if (!l3->alien) {
3843 l3->alien = new_alien;
3844 new_alien = NULL;
3845 }
3846 l3->free_limit = (1 + nr_cpus_node(node)) *
3847 cachep->batchcount + cachep->num;
3848 spin_unlock_irq(&l3->list_lock);
3849 kfree(shared);
3850 free_alien_cache(new_alien);
3851 continue;
3852 }
3853 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
3854 if (!l3) {
3855 free_alien_cache(new_alien);
3856 kfree(new_shared);
3857 goto fail;
3858 }
3859
3860 kmem_list3_init(l3);
3861 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3862 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3863 l3->shared = new_shared;
3864 l3->alien = new_alien;
3865 l3->free_limit = (1 + nr_cpus_node(node)) *
3866 cachep->batchcount + cachep->num;
3867 cachep->nodelists[node] = l3;
3868 }
3869 return 0;
3870
3871fail:
3872 if (!cachep->next.next) {
3873
3874 node--;
3875 while (node >= 0) {
3876 if (cachep->nodelists[node]) {
3877 l3 = cachep->nodelists[node];
3878
3879 kfree(l3->shared);
3880 free_alien_cache(l3->alien);
3881 kfree(l3);
3882 cachep->nodelists[node] = NULL;
3883 }
3884 node--;
3885 }
3886 }
3887 return -ENOMEM;
3888}
3889
3890struct ccupdate_struct {
3891 struct kmem_cache *cachep;
3892 struct array_cache *new[NR_CPUS];
3893};
3894
3895static void do_ccupdate_local(void *info)
3896{
3897 struct ccupdate_struct *new = info;
3898 struct array_cache *old;
3899
3900 check_irq_off();
3901 old = cpu_cache_get(new->cachep);
3902
3903 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3904 new->new[smp_processor_id()] = old;
3905}
3906
3907
3908static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3909 int batchcount, int shared)
3910{
3911 struct ccupdate_struct *new;
3912 int i;
3913
3914 new = kzalloc(sizeof(*new), GFP_KERNEL);
3915 if (!new)
3916 return -ENOMEM;
3917
3918 for_each_online_cpu(i) {
3919 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3920 batchcount);
3921 if (!new->new[i]) {
3922 for (i--; i >= 0; i--)
3923 kfree(new->new[i]);
3924 kfree(new);
3925 return -ENOMEM;
3926 }
3927 }
3928 new->cachep = cachep;
3929
3930 on_each_cpu(do_ccupdate_local, (void *)new, 1);
3931
3932 check_irq_on();
3933 cachep->batchcount = batchcount;
3934 cachep->limit = limit;
3935 cachep->shared = shared;
3936
3937 for_each_online_cpu(i) {
3938 struct array_cache *ccold = new->new[i];
3939 if (!ccold)
3940 continue;
3941 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3942 free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
3943 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3944 kfree(ccold);
3945 }
3946 kfree(new);
3947 return alloc_kmemlist(cachep);
3948}
3949
3950
3951static int enable_cpucache(struct kmem_cache *cachep)
3952{
3953 int err;
3954 int limit, shared;
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965 if (cachep->buffer_size > 131072)
3966 limit = 1;
3967 else if (cachep->buffer_size > PAGE_SIZE)
3968 limit = 8;
3969 else if (cachep->buffer_size > 1024)
3970 limit = 24;
3971 else if (cachep->buffer_size > 256)
3972 limit = 54;
3973 else
3974 limit = 120;
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985 shared = 0;
3986 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
3987 shared = 8;
3988
3989#if DEBUG
3990
3991
3992
3993
3994 if (limit > 32)
3995 limit = 32;
3996#endif
3997 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
3998 if (err)
3999 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4000 cachep->name, -err);
4001 return err;
4002}
4003
4004
4005
4006
4007
4008
4009void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4010 struct array_cache *ac, int force, int node)
4011{
4012 int tofree;
4013
4014 if (!ac || !ac->avail)
4015 return;
4016 if (ac->touched && !force) {
4017 ac->touched = 0;
4018 } else {
4019 spin_lock_irq(&l3->list_lock);
4020 if (ac->avail) {
4021 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4022 if (tofree > ac->avail)
4023 tofree = (ac->avail + 1) / 2;
4024 free_block(cachep, ac->entry, tofree, node);
4025 ac->avail -= tofree;
4026 memmove(ac->entry, &(ac->entry[tofree]),
4027 sizeof(void *) * ac->avail);
4028 }
4029 spin_unlock_irq(&l3->list_lock);
4030 }
4031}
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045static void cache_reap(struct work_struct *w)
4046{
4047 struct kmem_cache *searchp;
4048 struct kmem_list3 *l3;
4049 int node = numa_node_id();
4050 struct delayed_work *work = to_delayed_work(w);
4051
4052 if (!mutex_trylock(&cache_chain_mutex))
4053
4054 goto out;
4055
4056 list_for_each_entry(searchp, &cache_chain, next) {
4057 check_irq_on();
4058
4059
4060
4061
4062
4063
4064 l3 = searchp->nodelists[node];
4065
4066 reap_alien(searchp, l3);
4067
4068 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4069
4070
4071
4072
4073
4074 if (time_after(l3->next_reap, jiffies))
4075 goto next;
4076
4077 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4078
4079 drain_array(searchp, l3, l3->shared, 0, node);
4080
4081 if (l3->free_touched)
4082 l3->free_touched = 0;
4083 else {
4084 int freed;
4085
4086 freed = drain_freelist(searchp, l3, (l3->free_limit +
4087 5 * searchp->num - 1) / (5 * searchp->num));
4088 STATS_ADD_REAPED(searchp, freed);
4089 }
4090next:
4091 cond_resched();
4092 }
4093 check_irq_on();
4094 mutex_unlock(&cache_chain_mutex);
4095 next_reap_node();
4096out:
4097
4098 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4099}
4100
4101#ifdef CONFIG_SLABINFO
4102
4103static void print_slabinfo_header(struct seq_file *m)
4104{
4105
4106
4107
4108
4109#if STATS
4110 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4111#else
4112 seq_puts(m, "slabinfo - version: 2.1\n");
4113#endif
4114 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4115 "<objperslab> <pagesperslab>");
4116 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4117 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4118#if STATS
4119 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4120 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4121 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4122#endif
4123 seq_putc(m, '\n');
4124}
4125
4126static void *s_start(struct seq_file *m, loff_t *pos)
4127{
4128 loff_t n = *pos;
4129
4130 mutex_lock(&cache_chain_mutex);
4131 if (!n)
4132 print_slabinfo_header(m);
4133
4134 return seq_list_start(&cache_chain, *pos);
4135}
4136
4137static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4138{
4139 return seq_list_next(p, &cache_chain, pos);
4140}
4141
4142static void s_stop(struct seq_file *m, void *p)
4143{
4144 mutex_unlock(&cache_chain_mutex);
4145}
4146
4147static int s_show(struct seq_file *m, void *p)
4148{
4149 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4150 struct slab *slabp;
4151 unsigned long active_objs;
4152 unsigned long num_objs;
4153 unsigned long active_slabs = 0;
4154 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4155 const char *name;
4156 char *error = NULL;
4157 int node;
4158 struct kmem_list3 *l3;
4159
4160 active_objs = 0;
4161 num_slabs = 0;
4162 for_each_online_node(node) {
4163 l3 = cachep->nodelists[node];
4164 if (!l3)
4165 continue;
4166
4167 check_irq_on();
4168 spin_lock_irq(&l3->list_lock);
4169
4170 list_for_each_entry(slabp, &l3->slabs_full, list) {
4171 if (slabp->inuse != cachep->num && !error)
4172 error = "slabs_full accounting error";
4173 active_objs += cachep->num;
4174 active_slabs++;
4175 }
4176 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4177 if (slabp->inuse == cachep->num && !error)
4178 error = "slabs_partial inuse accounting error";
4179 if (!slabp->inuse && !error)
4180 error = "slabs_partial/inuse accounting error";
4181 active_objs += slabp->inuse;
4182 active_slabs++;
4183 }
4184 list_for_each_entry(slabp, &l3->slabs_free, list) {
4185 if (slabp->inuse && !error)
4186 error = "slabs_free/inuse accounting error";
4187 num_slabs++;
4188 }
4189 free_objects += l3->free_objects;
4190 if (l3->shared)
4191 shared_avail += l3->shared->avail;
4192
4193 spin_unlock_irq(&l3->list_lock);
4194 }
4195 num_slabs += active_slabs;
4196 num_objs = num_slabs * cachep->num;
4197 if (num_objs - active_objs != free_objects && !error)
4198 error = "free_objects accounting error";
4199
4200 name = cachep->name;
4201 if (error)
4202 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4203
4204 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4205 name, active_objs, num_objs, cachep->buffer_size,
4206 cachep->num, (1 << cachep->gfporder));
4207 seq_printf(m, " : tunables %4u %4u %4u",
4208 cachep->limit, cachep->batchcount, cachep->shared);
4209 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4210 active_slabs, num_slabs, shared_avail);
4211#if STATS
4212 {
4213 unsigned long high = cachep->high_mark;
4214 unsigned long allocs = cachep->num_allocations;
4215 unsigned long grown = cachep->grown;
4216 unsigned long reaped = cachep->reaped;
4217 unsigned long errors = cachep->errors;
4218 unsigned long max_freeable = cachep->max_freeable;
4219 unsigned long node_allocs = cachep->node_allocs;
4220 unsigned long node_frees = cachep->node_frees;
4221 unsigned long overflows = cachep->node_overflow;
4222
4223 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
4224 %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
4225 reaped, errors, max_freeable, node_allocs,
4226 node_frees, overflows);
4227 }
4228
4229 {
4230 unsigned long allochit = atomic_read(&cachep->allochit);
4231 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4232 unsigned long freehit = atomic_read(&cachep->freehit);
4233 unsigned long freemiss = atomic_read(&cachep->freemiss);
4234
4235 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4236 allochit, allocmiss, freehit, freemiss);
4237 }
4238#endif
4239 seq_putc(m, '\n');
4240 return 0;
4241}
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257static const struct seq_operations slabinfo_op = {
4258 .start = s_start,
4259 .next = s_next,
4260 .stop = s_stop,
4261 .show = s_show,
4262};
4263
4264#define MAX_SLABINFO_WRITE 128
4265
4266
4267
4268
4269
4270
4271
4272ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4273 size_t count, loff_t *ppos)
4274{
4275 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4276 int limit, batchcount, shared, res;
4277 struct kmem_cache *cachep;
4278
4279 if (count > MAX_SLABINFO_WRITE)
4280 return -EINVAL;
4281 if (copy_from_user(&kbuf, buffer, count))
4282 return -EFAULT;
4283 kbuf[MAX_SLABINFO_WRITE] = '\0';
4284
4285 tmp = strchr(kbuf, ' ');
4286 if (!tmp)
4287 return -EINVAL;
4288 *tmp = '\0';
4289 tmp++;
4290 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4291 return -EINVAL;
4292
4293
4294 mutex_lock(&cache_chain_mutex);
4295 res = -EINVAL;
4296 list_for_each_entry(cachep, &cache_chain, next) {
4297 if (!strcmp(cachep->name, kbuf)) {
4298 if (limit < 1 || batchcount < 1 ||
4299 batchcount > limit || shared < 0) {
4300 res = 0;
4301 } else {
4302 res = do_tune_cpucache(cachep, limit,
4303 batchcount, shared);
4304 }
4305 break;
4306 }
4307 }
4308 mutex_unlock(&cache_chain_mutex);
4309 if (res >= 0)
4310 res = count;
4311 return res;
4312}
4313
4314static int slabinfo_open(struct inode *inode, struct file *file)
4315{
4316 return seq_open(file, &slabinfo_op);
4317}
4318
4319static const struct file_operations proc_slabinfo_operations = {
4320 .open = slabinfo_open,
4321 .read = seq_read,
4322 .write = slabinfo_write,
4323 .llseek = seq_lseek,
4324 .release = seq_release,
4325};
4326
4327#ifdef CONFIG_DEBUG_SLAB_LEAK
4328
4329static void *leaks_start(struct seq_file *m, loff_t *pos)
4330{
4331 mutex_lock(&cache_chain_mutex);
4332 return seq_list_start(&cache_chain, *pos);
4333}
4334
4335static inline int add_caller(unsigned long *n, unsigned long v)
4336{
4337 unsigned long *p;
4338 int l;
4339 if (!v)
4340 return 1;
4341 l = n[1];
4342 p = n + 2;
4343 while (l) {
4344 int i = l/2;
4345 unsigned long *q = p + 2 * i;
4346 if (*q == v) {
4347 q[1]++;
4348 return 1;
4349 }
4350 if (*q > v) {
4351 l = i;
4352 } else {
4353 p = q + 2;
4354 l -= i + 1;
4355 }
4356 }
4357 if (++n[1] == n[0])
4358 return 0;
4359 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4360 p[0] = v;
4361 p[1] = 1;
4362 return 1;
4363}
4364
4365static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4366{
4367 void *p;
4368 int i;
4369 if (n[0] == n[1])
4370 return;
4371 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4372 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4373 continue;
4374 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4375 return;
4376 }
4377}
4378
4379static void show_symbol(struct seq_file *m, unsigned long address)
4380{
4381#ifdef CONFIG_KALLSYMS
4382 unsigned long offset, size;
4383 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4384
4385 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4386 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4387 if (modname[0])
4388 seq_printf(m, " [%s]", modname);
4389 return;
4390 }
4391#endif
4392 seq_printf(m, "%p", (void *)address);
4393}
4394
4395static int leaks_show(struct seq_file *m, void *p)
4396{
4397 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4398 struct slab *slabp;
4399 struct kmem_list3 *l3;
4400 const char *name;
4401 unsigned long *n = m->private;
4402 int node;
4403 int i;
4404
4405 if (!(cachep->flags & SLAB_STORE_USER))
4406 return 0;
4407 if (!(cachep->flags & SLAB_RED_ZONE))
4408 return 0;
4409
4410
4411
4412 n[1] = 0;
4413
4414 for_each_online_node(node) {
4415 l3 = cachep->nodelists[node];
4416 if (!l3)
4417 continue;
4418
4419 check_irq_on();
4420 spin_lock_irq(&l3->list_lock);
4421
4422 list_for_each_entry(slabp, &l3->slabs_full, list)
4423 handle_slab(n, cachep, slabp);
4424 list_for_each_entry(slabp, &l3->slabs_partial, list)
4425 handle_slab(n, cachep, slabp);
4426 spin_unlock_irq(&l3->list_lock);
4427 }
4428 name = cachep->name;
4429 if (n[0] == n[1]) {
4430
4431 mutex_unlock(&cache_chain_mutex);
4432 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4433 if (!m->private) {
4434
4435 m->private = n;
4436 mutex_lock(&cache_chain_mutex);
4437 return -ENOMEM;
4438 }
4439 *(unsigned long *)m->private = n[0] * 2;
4440 kfree(n);
4441 mutex_lock(&cache_chain_mutex);
4442
4443 m->count = m->size;
4444 return 0;
4445 }
4446 for (i = 0; i < n[1]; i++) {
4447 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4448 show_symbol(m, n[2*i+2]);
4449 seq_putc(m, '\n');
4450 }
4451
4452 return 0;
4453}
4454
4455static const struct seq_operations slabstats_op = {
4456 .start = leaks_start,
4457 .next = s_next,
4458 .stop = s_stop,
4459 .show = leaks_show,
4460};
4461
4462static int slabstats_open(struct inode *inode, struct file *file)
4463{
4464 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4465 int ret = -ENOMEM;
4466 if (n) {
4467 ret = seq_open(file, &slabstats_op);
4468 if (!ret) {
4469 struct seq_file *m = file->private_data;
4470 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4471 m->private = n;
4472 n = NULL;
4473 }
4474 kfree(n);
4475 }
4476 return ret;
4477}
4478
4479static const struct file_operations proc_slabstats_operations = {
4480 .open = slabstats_open,
4481 .read = seq_read,
4482 .llseek = seq_lseek,
4483 .release = seq_release_private,
4484};
4485#endif
4486
4487static int __init slab_proc_init(void)
4488{
4489 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4490#ifdef CONFIG_DEBUG_SLAB_LEAK
4491 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4492#endif
4493 return 0;
4494}
4495module_init(slab_proc_init);
4496#endif
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510size_t ksize(const void *objp)
4511{
4512 BUG_ON(!objp);
4513 if (unlikely(objp == ZERO_SIZE_PTR))
4514 return 0;
4515
4516 return obj_size(virt_to_cache(objp));
4517}
4518EXPORT_SYMBOL(ksize);
4519