1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/rcupdate.h>
106#include <linux/string.h>
107#include <linux/uaccess.h>
108#include <linux/nodemask.h>
109#include <linux/kmemleak.h>
110#include <linux/mempolicy.h>
111#include <linux/mutex.h>
112#include <linux/fault-inject.h>
113#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h>
116#include <linux/kmemcheck.h>
117#include <linux/memory.h>
118#include <linux/prefetch.h>
119
120#include <asm/cacheflush.h>
121#include <asm/tlbflush.h>
122#include <asm/page.h>
123
124
125
126
127
128
129
130
131
132
133
134#ifdef CONFIG_DEBUG_SLAB
135#define DEBUG 1
136#define STATS 1
137#define FORCED_DEBUG 1
138#else
139#define DEBUG 0
140#define STATS 0
141#define FORCED_DEBUG 0
142#endif
143
144
145#define BYTES_PER_WORD sizeof(void *)
146#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
147
148#ifndef ARCH_KMALLOC_FLAGS
149#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
150#endif
151
152
153#if DEBUG
154# define CREATE_MASK (SLAB_RED_ZONE | \
155 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
156 SLAB_CACHE_DMA | \
157 SLAB_STORE_USER | \
158 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
159 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
160 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
161#else
162# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
163 SLAB_CACHE_DMA | \
164 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
165 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
166 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
167#endif
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188typedef unsigned int kmem_bufctl_t;
189#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
190#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
191#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
192#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208struct slab_rcu {
209 struct rcu_head head;
210 struct kmem_cache *cachep;
211 void *addr;
212};
213
214
215
216
217
218
219
220
221struct slab {
222 union {
223 struct {
224 struct list_head list;
225 unsigned long colouroff;
226 void *s_mem;
227 unsigned int inuse;
228 kmem_bufctl_t free;
229 unsigned short nodeid;
230 };
231 struct slab_rcu __slab_cover_slab_rcu;
232 };
233};
234
235
236
237
238
239
240
241
242
243
244
245
246
247struct array_cache {
248 unsigned int avail;
249 unsigned int limit;
250 unsigned int batchcount;
251 unsigned int touched;
252 spinlock_t lock;
253 void *entry[];
254
255
256
257
258};
259
260
261
262
263
264#define BOOT_CPUCACHE_ENTRIES 1
265struct arraycache_init {
266 struct array_cache cache;
267 void *entries[BOOT_CPUCACHE_ENTRIES];
268};
269
270
271
272
273struct kmem_list3 {
274 struct list_head slabs_partial;
275 struct list_head slabs_full;
276 struct list_head slabs_free;
277 unsigned long free_objects;
278 unsigned int free_limit;
279 unsigned int colour_next;
280 spinlock_t list_lock;
281 struct array_cache *shared;
282 struct array_cache **alien;
283 unsigned long next_reap;
284 int free_touched;
285};
286
287
288
289
290#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
291static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
292#define CACHE_CACHE 0
293#define SIZE_AC MAX_NUMNODES
294#define SIZE_L3 (2 * MAX_NUMNODES)
295
296static int drain_freelist(struct kmem_cache *cache,
297 struct kmem_list3 *l3, int tofree);
298static void free_block(struct kmem_cache *cachep, void **objpp, int len,
299 int node);
300static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
301static void cache_reap(struct work_struct *unused);
302
303
304
305
306
307static __always_inline int index_of(const size_t size)
308{
309 extern void __bad_size(void);
310
311 if (__builtin_constant_p(size)) {
312 int i = 0;
313
314#define CACHE(x) \
315 if (size <=x) \
316 return i; \
317 else \
318 i++;
319#include <linux/kmalloc_sizes.h>
320#undef CACHE
321 __bad_size();
322 } else
323 __bad_size();
324 return 0;
325}
326
327static int slab_early_init = 1;
328
329#define INDEX_AC index_of(sizeof(struct arraycache_init))
330#define INDEX_L3 index_of(sizeof(struct kmem_list3))
331
332static void kmem_list3_init(struct kmem_list3 *parent)
333{
334 INIT_LIST_HEAD(&parent->slabs_full);
335 INIT_LIST_HEAD(&parent->slabs_partial);
336 INIT_LIST_HEAD(&parent->slabs_free);
337 parent->shared = NULL;
338 parent->alien = NULL;
339 parent->colour_next = 0;
340 spin_lock_init(&parent->list_lock);
341 parent->free_objects = 0;
342 parent->free_touched = 0;
343}
344
345#define MAKE_LIST(cachep, listp, slab, nodeid) \
346 do { \
347 INIT_LIST_HEAD(listp); \
348 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
349 } while (0)
350
351#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
352 do { \
353 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
354 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
355 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
356 } while (0)
357
358#define CFLGS_OFF_SLAB (0x80000000UL)
359#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
360
361#define BATCHREFILL_LIMIT 16
362
363
364
365
366
367
368
369#define REAPTIMEOUT_CPUC (2*HZ)
370#define REAPTIMEOUT_LIST3 (4*HZ)
371
372#if STATS
373#define STATS_INC_ACTIVE(x) ((x)->num_active++)
374#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
375#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
376#define STATS_INC_GROWN(x) ((x)->grown++)
377#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
378#define STATS_SET_HIGH(x) \
379 do { \
380 if ((x)->num_active > (x)->high_mark) \
381 (x)->high_mark = (x)->num_active; \
382 } while (0)
383#define STATS_INC_ERR(x) ((x)->errors++)
384#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
385#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
386#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
387#define STATS_SET_FREEABLE(x, i) \
388 do { \
389 if ((x)->max_freeable < i) \
390 (x)->max_freeable = i; \
391 } while (0)
392#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
393#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
394#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
395#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
396#else
397#define STATS_INC_ACTIVE(x) do { } while (0)
398#define STATS_DEC_ACTIVE(x) do { } while (0)
399#define STATS_INC_ALLOCED(x) do { } while (0)
400#define STATS_INC_GROWN(x) do { } while (0)
401#define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
402#define STATS_SET_HIGH(x) do { } while (0)
403#define STATS_INC_ERR(x) do { } while (0)
404#define STATS_INC_NODEALLOCS(x) do { } while (0)
405#define STATS_INC_NODEFREES(x) do { } while (0)
406#define STATS_INC_ACOVERFLOW(x) do { } while (0)
407#define STATS_SET_FREEABLE(x, i) do { } while (0)
408#define STATS_INC_ALLOCHIT(x) do { } while (0)
409#define STATS_INC_ALLOCMISS(x) do { } while (0)
410#define STATS_INC_FREEHIT(x) do { } while (0)
411#define STATS_INC_FREEMISS(x) do { } while (0)
412#endif
413
414#if DEBUG
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429static int obj_offset(struct kmem_cache *cachep)
430{
431 return cachep->obj_offset;
432}
433
434static int obj_size(struct kmem_cache *cachep)
435{
436 return cachep->obj_size;
437}
438
439static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
440{
441 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
442 return (unsigned long long*) (objp + obj_offset(cachep) -
443 sizeof(unsigned long long));
444}
445
446static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
447{
448 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
449 if (cachep->flags & SLAB_STORE_USER)
450 return (unsigned long long *)(objp + cachep->buffer_size -
451 sizeof(unsigned long long) -
452 REDZONE_ALIGN);
453 return (unsigned long long *) (objp + cachep->buffer_size -
454 sizeof(unsigned long long));
455}
456
457static void **dbg_userword(struct kmem_cache *cachep, void *objp)
458{
459 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
460 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
461}
462
463#else
464
465#define obj_offset(x) 0
466#define obj_size(cachep) (cachep->buffer_size)
467#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
468#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
469#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
470
471#endif
472
473#ifdef CONFIG_TRACING
474size_t slab_buffer_size(struct kmem_cache *cachep)
475{
476 return cachep->buffer_size;
477}
478EXPORT_SYMBOL(slab_buffer_size);
479#endif
480
481
482
483
484#define BREAK_GFP_ORDER_HI 1
485#define BREAK_GFP_ORDER_LO 0
486static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
487
488
489
490
491
492
493static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
494{
495 page->lru.next = (struct list_head *)cache;
496}
497
498static inline struct kmem_cache *page_get_cache(struct page *page)
499{
500 page = compound_head(page);
501 BUG_ON(!PageSlab(page));
502 return (struct kmem_cache *)page->lru.next;
503}
504
505static inline void page_set_slab(struct page *page, struct slab *slab)
506{
507 page->lru.prev = (struct list_head *)slab;
508}
509
510static inline struct slab *page_get_slab(struct page *page)
511{
512 BUG_ON(!PageSlab(page));
513 return (struct slab *)page->lru.prev;
514}
515
516static inline struct kmem_cache *virt_to_cache(const void *obj)
517{
518 struct page *page = virt_to_head_page(obj);
519 return page_get_cache(page);
520}
521
522static inline struct slab *virt_to_slab(const void *obj)
523{
524 struct page *page = virt_to_head_page(obj);
525 return page_get_slab(page);
526}
527
528static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
529 unsigned int idx)
530{
531 return slab->s_mem + cache->buffer_size * idx;
532}
533
534
535
536
537
538
539
540static inline unsigned int obj_to_index(const struct kmem_cache *cache,
541 const struct slab *slab, void *obj)
542{
543 u32 offset = (obj - slab->s_mem);
544 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
545}
546
547
548
549
550struct cache_sizes malloc_sizes[] = {
551#define CACHE(x) { .cs_size = (x) },
552#include <linux/kmalloc_sizes.h>
553 CACHE(ULONG_MAX)
554#undef CACHE
555};
556EXPORT_SYMBOL(malloc_sizes);
557
558
559struct cache_names {
560 char *name;
561 char *name_dma;
562};
563
564static struct cache_names __initdata cache_names[] = {
565#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
566#include <linux/kmalloc_sizes.h>
567 {NULL,}
568#undef CACHE
569};
570
571static struct arraycache_init initarray_cache __initdata =
572 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
573static struct arraycache_init initarray_generic =
574 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
575
576
577static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
578static struct kmem_cache cache_cache = {
579 .nodelists = cache_cache_nodelists,
580 .batchcount = 1,
581 .limit = BOOT_CPUCACHE_ENTRIES,
582 .shared = 1,
583 .buffer_size = sizeof(struct kmem_cache),
584 .name = "kmem_cache",
585};
586
587#define BAD_ALIEN_MAGIC 0x01020304ul
588
589
590
591
592
593static enum {
594 NONE,
595 PARTIAL_AC,
596 PARTIAL_L3,
597 EARLY,
598 LATE,
599 FULL
600} g_cpucache_up;
601
602
603
604
605int slab_is_available(void)
606{
607 return g_cpucache_up >= EARLY;
608}
609
610#ifdef CONFIG_LOCKDEP
611
612
613
614
615
616
617
618
619
620
621
622
623static struct lock_class_key on_slab_l3_key;
624static struct lock_class_key on_slab_alc_key;
625
626static struct lock_class_key debugobj_l3_key;
627static struct lock_class_key debugobj_alc_key;
628
629static void slab_set_lock_classes(struct kmem_cache *cachep,
630 struct lock_class_key *l3_key, struct lock_class_key *alc_key,
631 int q)
632{
633 struct array_cache **alc;
634 struct kmem_list3 *l3;
635 int r;
636
637 l3 = cachep->nodelists[q];
638 if (!l3)
639 return;
640
641 lockdep_set_class(&l3->list_lock, l3_key);
642 alc = l3->alien;
643
644
645
646
647
648
649
650 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
651 return;
652 for_each_node(r) {
653 if (alc[r])
654 lockdep_set_class(&alc[r]->lock, alc_key);
655 }
656}
657
658static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
659{
660 slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
661}
662
663static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
664{
665 int node;
666
667 for_each_online_node(node)
668 slab_set_debugobj_lock_classes_node(cachep, node);
669}
670
671static void init_node_lock_keys(int q)
672{
673 struct cache_sizes *s = malloc_sizes;
674
675 if (g_cpucache_up < LATE)
676 return;
677
678 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
679 struct kmem_list3 *l3;
680
681 l3 = s->cs_cachep->nodelists[q];
682 if (!l3 || OFF_SLAB(s->cs_cachep))
683 continue;
684
685 slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
686 &on_slab_alc_key, q);
687 }
688}
689
690static inline void init_lock_keys(void)
691{
692 int node;
693
694 for_each_node(node)
695 init_node_lock_keys(node);
696}
697#else
698static void init_node_lock_keys(int q)
699{
700}
701
702static inline void init_lock_keys(void)
703{
704}
705
706static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
707{
708}
709
710static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
711{
712}
713#endif
714
715
716
717
718static DEFINE_MUTEX(cache_chain_mutex);
719static struct list_head cache_chain;
720
721static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
722
723static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
724{
725 return cachep->array[smp_processor_id()];
726}
727
728static inline struct kmem_cache *__find_general_cachep(size_t size,
729 gfp_t gfpflags)
730{
731 struct cache_sizes *csizep = malloc_sizes;
732
733#if DEBUG
734
735
736
737
738 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
739#endif
740 if (!size)
741 return ZERO_SIZE_PTR;
742
743 while (size > csizep->cs_size)
744 csizep++;
745
746
747
748
749
750
751#ifdef CONFIG_ZONE_DMA
752 if (unlikely(gfpflags & GFP_DMA))
753 return csizep->cs_dmacachep;
754#endif
755 return csizep->cs_cachep;
756}
757
758static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
759{
760 return __find_general_cachep(size, gfpflags);
761}
762
763static size_t slab_mgmt_size(size_t nr_objs, size_t align)
764{
765 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
766}
767
768
769
770
771static void cache_estimate(unsigned long gfporder, size_t buffer_size,
772 size_t align, int flags, size_t *left_over,
773 unsigned int *num)
774{
775 int nr_objs;
776 size_t mgmt_size;
777 size_t slab_size = PAGE_SIZE << gfporder;
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794 if (flags & CFLGS_OFF_SLAB) {
795 mgmt_size = 0;
796 nr_objs = slab_size / buffer_size;
797
798 if (nr_objs > SLAB_LIMIT)
799 nr_objs = SLAB_LIMIT;
800 } else {
801
802
803
804
805
806
807
808
809 nr_objs = (slab_size - sizeof(struct slab)) /
810 (buffer_size + sizeof(kmem_bufctl_t));
811
812
813
814
815
816 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
817 > slab_size)
818 nr_objs--;
819
820 if (nr_objs > SLAB_LIMIT)
821 nr_objs = SLAB_LIMIT;
822
823 mgmt_size = slab_mgmt_size(nr_objs, align);
824 }
825 *num = nr_objs;
826 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
827}
828
829#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
830
831static void __slab_error(const char *function, struct kmem_cache *cachep,
832 char *msg)
833{
834 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
835 function, cachep->name, msg);
836 dump_stack();
837}
838
839
840
841
842
843
844
845
846
847static int use_alien_caches __read_mostly = 1;
848static int __init noaliencache_setup(char *s)
849{
850 use_alien_caches = 0;
851 return 1;
852}
853__setup("noaliencache", noaliencache_setup);
854
855#ifdef CONFIG_NUMA
856
857
858
859
860
861
862static DEFINE_PER_CPU(unsigned long, slab_reap_node);
863
864static void init_reap_node(int cpu)
865{
866 int node;
867
868 node = next_node(cpu_to_mem(cpu), node_online_map);
869 if (node == MAX_NUMNODES)
870 node = first_node(node_online_map);
871
872 per_cpu(slab_reap_node, cpu) = node;
873}
874
875static void next_reap_node(void)
876{
877 int node = __this_cpu_read(slab_reap_node);
878
879 node = next_node(node, node_online_map);
880 if (unlikely(node >= MAX_NUMNODES))
881 node = first_node(node_online_map);
882 __this_cpu_write(slab_reap_node, node);
883}
884
885#else
886#define init_reap_node(cpu) do { } while (0)
887#define next_reap_node(void) do { } while (0)
888#endif
889
890
891
892
893
894
895
896
897static void __cpuinit start_cpu_timer(int cpu)
898{
899 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
900
901
902
903
904
905
906 if (keventd_up() && reap_work->work.func == NULL) {
907 init_reap_node(cpu);
908 INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
909 schedule_delayed_work_on(cpu, reap_work,
910 __round_jiffies_relative(HZ, cpu));
911 }
912}
913
914static struct array_cache *alloc_arraycache(int node, int entries,
915 int batchcount, gfp_t gfp)
916{
917 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
918 struct array_cache *nc = NULL;
919
920 nc = kmalloc_node(memsize, gfp, node);
921
922
923
924
925
926
927
928 kmemleak_no_scan(nc);
929 if (nc) {
930 nc->avail = 0;
931 nc->limit = entries;
932 nc->batchcount = batchcount;
933 nc->touched = 0;
934 spin_lock_init(&nc->lock);
935 }
936 return nc;
937}
938
939
940
941
942
943
944
945static int transfer_objects(struct array_cache *to,
946 struct array_cache *from, unsigned int max)
947{
948
949 int nr = min3(from->avail, max, to->limit - to->avail);
950
951 if (!nr)
952 return 0;
953
954 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
955 sizeof(void *) *nr);
956
957 from->avail -= nr;
958 to->avail += nr;
959 return nr;
960}
961
962#ifndef CONFIG_NUMA
963
964#define drain_alien_cache(cachep, alien) do { } while (0)
965#define reap_alien(cachep, l3) do { } while (0)
966
967static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
968{
969 return (struct array_cache **)BAD_ALIEN_MAGIC;
970}
971
972static inline void free_alien_cache(struct array_cache **ac_ptr)
973{
974}
975
976static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
977{
978 return 0;
979}
980
981static inline void *alternate_node_alloc(struct kmem_cache *cachep,
982 gfp_t flags)
983{
984 return NULL;
985}
986
987static inline void *____cache_alloc_node(struct kmem_cache *cachep,
988 gfp_t flags, int nodeid)
989{
990 return NULL;
991}
992
993#else
994
995static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
996static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
997
998static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
999{
1000 struct array_cache **ac_ptr;
1001 int memsize = sizeof(void *) * nr_node_ids;
1002 int i;
1003
1004 if (limit > 1)
1005 limit = 12;
1006 ac_ptr = kzalloc_node(memsize, gfp, node);
1007 if (ac_ptr) {
1008 for_each_node(i) {
1009 if (i == node || !node_online(i))
1010 continue;
1011 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
1012 if (!ac_ptr[i]) {
1013 for (i--; i >= 0; i--)
1014 kfree(ac_ptr[i]);
1015 kfree(ac_ptr);
1016 return NULL;
1017 }
1018 }
1019 }
1020 return ac_ptr;
1021}
1022
1023static void free_alien_cache(struct array_cache **ac_ptr)
1024{
1025 int i;
1026
1027 if (!ac_ptr)
1028 return;
1029 for_each_node(i)
1030 kfree(ac_ptr[i]);
1031 kfree(ac_ptr);
1032}
1033
1034static void __drain_alien_cache(struct kmem_cache *cachep,
1035 struct array_cache *ac, int node)
1036{
1037 struct kmem_list3 *rl3 = cachep->nodelists[node];
1038
1039 if (ac->avail) {
1040 spin_lock(&rl3->list_lock);
1041
1042
1043
1044
1045
1046 if (rl3->shared)
1047 transfer_objects(rl3->shared, ac, ac->limit);
1048
1049 free_block(cachep, ac->entry, ac->avail, node);
1050 ac->avail = 0;
1051 spin_unlock(&rl3->list_lock);
1052 }
1053}
1054
1055
1056
1057
1058static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1059{
1060 int node = __this_cpu_read(slab_reap_node);
1061
1062 if (l3->alien) {
1063 struct array_cache *ac = l3->alien[node];
1064
1065 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1066 __drain_alien_cache(cachep, ac, node);
1067 spin_unlock_irq(&ac->lock);
1068 }
1069 }
1070}
1071
1072static void drain_alien_cache(struct kmem_cache *cachep,
1073 struct array_cache **alien)
1074{
1075 int i = 0;
1076 struct array_cache *ac;
1077 unsigned long flags;
1078
1079 for_each_online_node(i) {
1080 ac = alien[i];
1081 if (ac) {
1082 spin_lock_irqsave(&ac->lock, flags);
1083 __drain_alien_cache(cachep, ac, i);
1084 spin_unlock_irqrestore(&ac->lock, flags);
1085 }
1086 }
1087}
1088
1089static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1090{
1091 struct slab *slabp = virt_to_slab(objp);
1092 int nodeid = slabp->nodeid;
1093 struct kmem_list3 *l3;
1094 struct array_cache *alien = NULL;
1095 int node;
1096
1097 node = numa_mem_id();
1098
1099
1100
1101
1102
1103 if (likely(slabp->nodeid == node))
1104 return 0;
1105
1106 l3 = cachep->nodelists[node];
1107 STATS_INC_NODEFREES(cachep);
1108 if (l3->alien && l3->alien[nodeid]) {
1109 alien = l3->alien[nodeid];
1110 spin_lock(&alien->lock);
1111 if (unlikely(alien->avail == alien->limit)) {
1112 STATS_INC_ACOVERFLOW(cachep);
1113 __drain_alien_cache(cachep, alien, nodeid);
1114 }
1115 alien->entry[alien->avail++] = objp;
1116 spin_unlock(&alien->lock);
1117 } else {
1118 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1119 free_block(cachep, &objp, 1, nodeid);
1120 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1121 }
1122 return 1;
1123}
1124#endif
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135static int init_cache_nodelists_node(int node)
1136{
1137 struct kmem_cache *cachep;
1138 struct kmem_list3 *l3;
1139 const int memsize = sizeof(struct kmem_list3);
1140
1141 list_for_each_entry(cachep, &cache_chain, next) {
1142
1143
1144
1145
1146
1147 if (!cachep->nodelists[node]) {
1148 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1149 if (!l3)
1150 return -ENOMEM;
1151 kmem_list3_init(l3);
1152 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1153 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1154
1155
1156
1157
1158
1159
1160 cachep->nodelists[node] = l3;
1161 }
1162
1163 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1164 cachep->nodelists[node]->free_limit =
1165 (1 + nr_cpus_node(node)) *
1166 cachep->batchcount + cachep->num;
1167 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1168 }
1169 return 0;
1170}
1171
1172static void __cpuinit cpuup_canceled(long cpu)
1173{
1174 struct kmem_cache *cachep;
1175 struct kmem_list3 *l3 = NULL;
1176 int node = cpu_to_mem(cpu);
1177 const struct cpumask *mask = cpumask_of_node(node);
1178
1179 list_for_each_entry(cachep, &cache_chain, next) {
1180 struct array_cache *nc;
1181 struct array_cache *shared;
1182 struct array_cache **alien;
1183
1184
1185 nc = cachep->array[cpu];
1186 cachep->array[cpu] = NULL;
1187 l3 = cachep->nodelists[node];
1188
1189 if (!l3)
1190 goto free_array_cache;
1191
1192 spin_lock_irq(&l3->list_lock);
1193
1194
1195 l3->free_limit -= cachep->batchcount;
1196 if (nc)
1197 free_block(cachep, nc->entry, nc->avail, node);
1198
1199 if (!cpumask_empty(mask)) {
1200 spin_unlock_irq(&l3->list_lock);
1201 goto free_array_cache;
1202 }
1203
1204 shared = l3->shared;
1205 if (shared) {
1206 free_block(cachep, shared->entry,
1207 shared->avail, node);
1208 l3->shared = NULL;
1209 }
1210
1211 alien = l3->alien;
1212 l3->alien = NULL;
1213
1214 spin_unlock_irq(&l3->list_lock);
1215
1216 kfree(shared);
1217 if (alien) {
1218 drain_alien_cache(cachep, alien);
1219 free_alien_cache(alien);
1220 }
1221free_array_cache:
1222 kfree(nc);
1223 }
1224
1225
1226
1227
1228
1229 list_for_each_entry(cachep, &cache_chain, next) {
1230 l3 = cachep->nodelists[node];
1231 if (!l3)
1232 continue;
1233 drain_freelist(cachep, l3, l3->free_objects);
1234 }
1235}
1236
1237static int __cpuinit cpuup_prepare(long cpu)
1238{
1239 struct kmem_cache *cachep;
1240 struct kmem_list3 *l3 = NULL;
1241 int node = cpu_to_mem(cpu);
1242 int err;
1243
1244
1245
1246
1247
1248
1249
1250 err = init_cache_nodelists_node(node);
1251 if (err < 0)
1252 goto bad;
1253
1254
1255
1256
1257
1258 list_for_each_entry(cachep, &cache_chain, next) {
1259 struct array_cache *nc;
1260 struct array_cache *shared = NULL;
1261 struct array_cache **alien = NULL;
1262
1263 nc = alloc_arraycache(node, cachep->limit,
1264 cachep->batchcount, GFP_KERNEL);
1265 if (!nc)
1266 goto bad;
1267 if (cachep->shared) {
1268 shared = alloc_arraycache(node,
1269 cachep->shared * cachep->batchcount,
1270 0xbaadf00d, GFP_KERNEL);
1271 if (!shared) {
1272 kfree(nc);
1273 goto bad;
1274 }
1275 }
1276 if (use_alien_caches) {
1277 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1278 if (!alien) {
1279 kfree(shared);
1280 kfree(nc);
1281 goto bad;
1282 }
1283 }
1284 cachep->array[cpu] = nc;
1285 l3 = cachep->nodelists[node];
1286 BUG_ON(!l3);
1287
1288 spin_lock_irq(&l3->list_lock);
1289 if (!l3->shared) {
1290
1291
1292
1293
1294 l3->shared = shared;
1295 shared = NULL;
1296 }
1297#ifdef CONFIG_NUMA
1298 if (!l3->alien) {
1299 l3->alien = alien;
1300 alien = NULL;
1301 }
1302#endif
1303 spin_unlock_irq(&l3->list_lock);
1304 kfree(shared);
1305 free_alien_cache(alien);
1306 if (cachep->flags & SLAB_DEBUG_OBJECTS)
1307 slab_set_debugobj_lock_classes_node(cachep, node);
1308 }
1309 init_node_lock_keys(node);
1310
1311 return 0;
1312bad:
1313 cpuup_canceled(cpu);
1314 return -ENOMEM;
1315}
1316
1317static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1318 unsigned long action, void *hcpu)
1319{
1320 long cpu = (long)hcpu;
1321 int err = 0;
1322
1323 switch (action) {
1324 case CPU_UP_PREPARE:
1325 case CPU_UP_PREPARE_FROZEN:
1326 mutex_lock(&cache_chain_mutex);
1327 err = cpuup_prepare(cpu);
1328 mutex_unlock(&cache_chain_mutex);
1329 break;
1330 case CPU_ONLINE:
1331 case CPU_ONLINE_FROZEN:
1332 start_cpu_timer(cpu);
1333 break;
1334#ifdef CONFIG_HOTPLUG_CPU
1335 case CPU_DOWN_PREPARE:
1336 case CPU_DOWN_PREPARE_FROZEN:
1337
1338
1339
1340
1341
1342
1343 cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
1344
1345 per_cpu(slab_reap_work, cpu).work.func = NULL;
1346 break;
1347 case CPU_DOWN_FAILED:
1348 case CPU_DOWN_FAILED_FROZEN:
1349 start_cpu_timer(cpu);
1350 break;
1351 case CPU_DEAD:
1352 case CPU_DEAD_FROZEN:
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362#endif
1363 case CPU_UP_CANCELED:
1364 case CPU_UP_CANCELED_FROZEN:
1365 mutex_lock(&cache_chain_mutex);
1366 cpuup_canceled(cpu);
1367 mutex_unlock(&cache_chain_mutex);
1368 break;
1369 }
1370 return notifier_from_errno(err);
1371}
1372
1373static struct notifier_block __cpuinitdata cpucache_notifier = {
1374 &cpuup_callback, NULL, 0
1375};
1376
1377#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1378
1379
1380
1381
1382
1383
1384
1385static int __meminit drain_cache_nodelists_node(int node)
1386{
1387 struct kmem_cache *cachep;
1388 int ret = 0;
1389
1390 list_for_each_entry(cachep, &cache_chain, next) {
1391 struct kmem_list3 *l3;
1392
1393 l3 = cachep->nodelists[node];
1394 if (!l3)
1395 continue;
1396
1397 drain_freelist(cachep, l3, l3->free_objects);
1398
1399 if (!list_empty(&l3->slabs_full) ||
1400 !list_empty(&l3->slabs_partial)) {
1401 ret = -EBUSY;
1402 break;
1403 }
1404 }
1405 return ret;
1406}
1407
1408static int __meminit slab_memory_callback(struct notifier_block *self,
1409 unsigned long action, void *arg)
1410{
1411 struct memory_notify *mnb = arg;
1412 int ret = 0;
1413 int nid;
1414
1415 nid = mnb->status_change_nid;
1416 if (nid < 0)
1417 goto out;
1418
1419 switch (action) {
1420 case MEM_GOING_ONLINE:
1421 mutex_lock(&cache_chain_mutex);
1422 ret = init_cache_nodelists_node(nid);
1423 mutex_unlock(&cache_chain_mutex);
1424 break;
1425 case MEM_GOING_OFFLINE:
1426 mutex_lock(&cache_chain_mutex);
1427 ret = drain_cache_nodelists_node(nid);
1428 mutex_unlock(&cache_chain_mutex);
1429 break;
1430 case MEM_ONLINE:
1431 case MEM_OFFLINE:
1432 case MEM_CANCEL_ONLINE:
1433 case MEM_CANCEL_OFFLINE:
1434 break;
1435 }
1436out:
1437 return notifier_from_errno(ret);
1438}
1439#endif
1440
1441
1442
1443
1444static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1445 int nodeid)
1446{
1447 struct kmem_list3 *ptr;
1448
1449 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1450 BUG_ON(!ptr);
1451
1452 memcpy(ptr, list, sizeof(struct kmem_list3));
1453
1454
1455
1456 spin_lock_init(&ptr->list_lock);
1457
1458 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1459 cachep->nodelists[nodeid] = ptr;
1460}
1461
1462
1463
1464
1465
1466static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1467{
1468 int node;
1469
1470 for_each_online_node(node) {
1471 cachep->nodelists[node] = &initkmem_list3[index + node];
1472 cachep->nodelists[node]->next_reap = jiffies +
1473 REAPTIMEOUT_LIST3 +
1474 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1475 }
1476}
1477
1478
1479
1480
1481
1482void __init kmem_cache_init(void)
1483{
1484 size_t left_over;
1485 struct cache_sizes *sizes;
1486 struct cache_names *names;
1487 int i;
1488 int order;
1489 int node;
1490
1491 if (num_possible_nodes() == 1)
1492 use_alien_caches = 0;
1493
1494 for (i = 0; i < NUM_INIT_LISTS; i++) {
1495 kmem_list3_init(&initkmem_list3[i]);
1496 if (i < MAX_NUMNODES)
1497 cache_cache.nodelists[i] = NULL;
1498 }
1499 set_up_list3s(&cache_cache, CACHE_CACHE);
1500
1501
1502
1503
1504
1505 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1506 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 node = numa_mem_id();
1529
1530
1531 INIT_LIST_HEAD(&cache_chain);
1532 list_add(&cache_cache.next, &cache_chain);
1533 cache_cache.colour_off = cache_line_size();
1534 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1535 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1536
1537
1538
1539
1540 cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1541 nr_node_ids * sizeof(struct kmem_list3 *);
1542#if DEBUG
1543 cache_cache.obj_size = cache_cache.buffer_size;
1544#endif
1545 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1546 cache_line_size());
1547 cache_cache.reciprocal_buffer_size =
1548 reciprocal_value(cache_cache.buffer_size);
1549
1550 for (order = 0; order < MAX_ORDER; order++) {
1551 cache_estimate(order, cache_cache.buffer_size,
1552 cache_line_size(), 0, &left_over, &cache_cache.num);
1553 if (cache_cache.num)
1554 break;
1555 }
1556 BUG_ON(!cache_cache.num);
1557 cache_cache.gfporder = order;
1558 cache_cache.colour = left_over / cache_cache.colour_off;
1559 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1560 sizeof(struct slab), cache_line_size());
1561
1562
1563 sizes = malloc_sizes;
1564 names = cache_names;
1565
1566
1567
1568
1569
1570
1571
1572 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1573 sizes[INDEX_AC].cs_size,
1574 ARCH_KMALLOC_MINALIGN,
1575 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1576 NULL);
1577
1578 if (INDEX_AC != INDEX_L3) {
1579 sizes[INDEX_L3].cs_cachep =
1580 kmem_cache_create(names[INDEX_L3].name,
1581 sizes[INDEX_L3].cs_size,
1582 ARCH_KMALLOC_MINALIGN,
1583 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1584 NULL);
1585 }
1586
1587 slab_early_init = 0;
1588
1589 while (sizes->cs_size != ULONG_MAX) {
1590
1591
1592
1593
1594
1595
1596
1597 if (!sizes->cs_cachep) {
1598 sizes->cs_cachep = kmem_cache_create(names->name,
1599 sizes->cs_size,
1600 ARCH_KMALLOC_MINALIGN,
1601 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1602 NULL);
1603 }
1604#ifdef CONFIG_ZONE_DMA
1605 sizes->cs_dmacachep = kmem_cache_create(
1606 names->name_dma,
1607 sizes->cs_size,
1608 ARCH_KMALLOC_MINALIGN,
1609 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1610 SLAB_PANIC,
1611 NULL);
1612#endif
1613 sizes++;
1614 names++;
1615 }
1616
1617 {
1618 struct array_cache *ptr;
1619
1620 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1621
1622 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1623 memcpy(ptr, cpu_cache_get(&cache_cache),
1624 sizeof(struct arraycache_init));
1625
1626
1627
1628 spin_lock_init(&ptr->lock);
1629
1630 cache_cache.array[smp_processor_id()] = ptr;
1631
1632 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1633
1634 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1635 != &initarray_generic.cache);
1636 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1637 sizeof(struct arraycache_init));
1638
1639
1640
1641 spin_lock_init(&ptr->lock);
1642
1643 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1644 ptr;
1645 }
1646
1647 {
1648 int nid;
1649
1650 for_each_online_node(nid) {
1651 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1652
1653 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1654 &initkmem_list3[SIZE_AC + nid], nid);
1655
1656 if (INDEX_AC != INDEX_L3) {
1657 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1658 &initkmem_list3[SIZE_L3 + nid], nid);
1659 }
1660 }
1661 }
1662
1663 g_cpucache_up = EARLY;
1664}
1665
1666void __init kmem_cache_init_late(void)
1667{
1668 struct kmem_cache *cachep;
1669
1670 g_cpucache_up = LATE;
1671
1672
1673 init_lock_keys();
1674
1675
1676 mutex_lock(&cache_chain_mutex);
1677 list_for_each_entry(cachep, &cache_chain, next)
1678 if (enable_cpucache(cachep, GFP_NOWAIT))
1679 BUG();
1680 mutex_unlock(&cache_chain_mutex);
1681
1682
1683 g_cpucache_up = FULL;
1684
1685
1686
1687
1688
1689 register_cpu_notifier(&cpucache_notifier);
1690
1691#ifdef CONFIG_NUMA
1692
1693
1694
1695
1696 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1697#endif
1698
1699
1700
1701
1702
1703}
1704
1705static int __init cpucache_init(void)
1706{
1707 int cpu;
1708
1709
1710
1711
1712 for_each_online_cpu(cpu)
1713 start_cpu_timer(cpu);
1714 return 0;
1715}
1716__initcall(cpucache_init);
1717
1718
1719
1720
1721
1722
1723
1724
1725static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1726{
1727 struct page *page;
1728 int nr_pages;
1729 int i;
1730
1731#ifndef CONFIG_MMU
1732
1733
1734
1735
1736 flags |= __GFP_COMP;
1737#endif
1738
1739 flags |= cachep->gfpflags;
1740 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1741 flags |= __GFP_RECLAIMABLE;
1742
1743 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1744 if (!page)
1745 return NULL;
1746
1747 nr_pages = (1 << cachep->gfporder);
1748 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1749 add_zone_page_state(page_zone(page),
1750 NR_SLAB_RECLAIMABLE, nr_pages);
1751 else
1752 add_zone_page_state(page_zone(page),
1753 NR_SLAB_UNRECLAIMABLE, nr_pages);
1754 for (i = 0; i < nr_pages; i++)
1755 __SetPageSlab(page + i);
1756
1757 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1758 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1759
1760 if (cachep->ctor)
1761 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1762 else
1763 kmemcheck_mark_unallocated_pages(page, nr_pages);
1764 }
1765
1766 return page_address(page);
1767}
1768
1769
1770
1771
1772static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1773{
1774 unsigned long i = (1 << cachep->gfporder);
1775 struct page *page = virt_to_page(addr);
1776 const unsigned long nr_freed = i;
1777
1778 kmemcheck_free_shadow(page, cachep->gfporder);
1779
1780 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1781 sub_zone_page_state(page_zone(page),
1782 NR_SLAB_RECLAIMABLE, nr_freed);
1783 else
1784 sub_zone_page_state(page_zone(page),
1785 NR_SLAB_UNRECLAIMABLE, nr_freed);
1786 while (i--) {
1787 BUG_ON(!PageSlab(page));
1788 __ClearPageSlab(page);
1789 page++;
1790 }
1791 if (current->reclaim_state)
1792 current->reclaim_state->reclaimed_slab += nr_freed;
1793 free_pages((unsigned long)addr, cachep->gfporder);
1794}
1795
1796static void kmem_rcu_free(struct rcu_head *head)
1797{
1798 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1799 struct kmem_cache *cachep = slab_rcu->cachep;
1800
1801 kmem_freepages(cachep, slab_rcu->addr);
1802 if (OFF_SLAB(cachep))
1803 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1804}
1805
1806#if DEBUG
1807
1808#ifdef CONFIG_DEBUG_PAGEALLOC
1809static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1810 unsigned long caller)
1811{
1812 int size = obj_size(cachep);
1813
1814 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1815
1816 if (size < 5 * sizeof(unsigned long))
1817 return;
1818
1819 *addr++ = 0x12345678;
1820 *addr++ = caller;
1821 *addr++ = smp_processor_id();
1822 size -= 3 * sizeof(unsigned long);
1823 {
1824 unsigned long *sptr = &caller;
1825 unsigned long svalue;
1826
1827 while (!kstack_end(sptr)) {
1828 svalue = *sptr++;
1829 if (kernel_text_address(svalue)) {
1830 *addr++ = svalue;
1831 size -= sizeof(unsigned long);
1832 if (size <= sizeof(unsigned long))
1833 break;
1834 }
1835 }
1836
1837 }
1838 *addr++ = 0x87654321;
1839}
1840#endif
1841
1842static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1843{
1844 int size = obj_size(cachep);
1845 addr = &((char *)addr)[obj_offset(cachep)];
1846
1847 memset(addr, val, size);
1848 *(unsigned char *)(addr + size - 1) = POISON_END;
1849}
1850
1851static void dump_line(char *data, int offset, int limit)
1852{
1853 int i;
1854 unsigned char error = 0;
1855 int bad_count = 0;
1856
1857 printk(KERN_ERR "%03x:", offset);
1858 for (i = 0; i < limit; i++) {
1859 if (data[offset + i] != POISON_FREE) {
1860 error = data[offset + i];
1861 bad_count++;
1862 }
1863 printk(" %02x", (unsigned char)data[offset + i]);
1864 }
1865 printk("\n");
1866
1867 if (bad_count == 1) {
1868 error ^= POISON_FREE;
1869 if (!(error & (error - 1))) {
1870 printk(KERN_ERR "Single bit error detected. Probably "
1871 "bad RAM.\n");
1872#ifdef CONFIG_X86
1873 printk(KERN_ERR "Run memtest86+ or a similar memory "
1874 "test tool.\n");
1875#else
1876 printk(KERN_ERR "Run a memory test tool.\n");
1877#endif
1878 }
1879 }
1880}
1881#endif
1882
1883#if DEBUG
1884
1885static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1886{
1887 int i, size;
1888 char *realobj;
1889
1890 if (cachep->flags & SLAB_RED_ZONE) {
1891 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1892 *dbg_redzone1(cachep, objp),
1893 *dbg_redzone2(cachep, objp));
1894 }
1895
1896 if (cachep->flags & SLAB_STORE_USER) {
1897 printk(KERN_ERR "Last user: [<%p>]",
1898 *dbg_userword(cachep, objp));
1899 print_symbol("(%s)",
1900 (unsigned long)*dbg_userword(cachep, objp));
1901 printk("\n");
1902 }
1903 realobj = (char *)objp + obj_offset(cachep);
1904 size = obj_size(cachep);
1905 for (i = 0; i < size && lines; i += 16, lines--) {
1906 int limit;
1907 limit = 16;
1908 if (i + limit > size)
1909 limit = size - i;
1910 dump_line(realobj, i, limit);
1911 }
1912}
1913
1914static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1915{
1916 char *realobj;
1917 int size, i;
1918 int lines = 0;
1919
1920 realobj = (char *)objp + obj_offset(cachep);
1921 size = obj_size(cachep);
1922
1923 for (i = 0; i < size; i++) {
1924 char exp = POISON_FREE;
1925 if (i == size - 1)
1926 exp = POISON_END;
1927 if (realobj[i] != exp) {
1928 int limit;
1929
1930
1931 if (lines == 0) {
1932 printk(KERN_ERR
1933 "Slab corruption: %s start=%p, len=%d\n",
1934 cachep->name, realobj, size);
1935 print_objinfo(cachep, objp, 0);
1936 }
1937
1938 i = (i / 16) * 16;
1939 limit = 16;
1940 if (i + limit > size)
1941 limit = size - i;
1942 dump_line(realobj, i, limit);
1943 i += 16;
1944 lines++;
1945
1946 if (lines > 5)
1947 break;
1948 }
1949 }
1950 if (lines != 0) {
1951
1952
1953
1954 struct slab *slabp = virt_to_slab(objp);
1955 unsigned int objnr;
1956
1957 objnr = obj_to_index(cachep, slabp, objp);
1958 if (objnr) {
1959 objp = index_to_obj(cachep, slabp, objnr - 1);
1960 realobj = (char *)objp + obj_offset(cachep);
1961 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1962 realobj, size);
1963 print_objinfo(cachep, objp, 2);
1964 }
1965 if (objnr + 1 < cachep->num) {
1966 objp = index_to_obj(cachep, slabp, objnr + 1);
1967 realobj = (char *)objp + obj_offset(cachep);
1968 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1969 realobj, size);
1970 print_objinfo(cachep, objp, 2);
1971 }
1972 }
1973}
1974#endif
1975
1976#if DEBUG
1977static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1978{
1979 int i;
1980 for (i = 0; i < cachep->num; i++) {
1981 void *objp = index_to_obj(cachep, slabp, i);
1982
1983 if (cachep->flags & SLAB_POISON) {
1984#ifdef CONFIG_DEBUG_PAGEALLOC
1985 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1986 OFF_SLAB(cachep))
1987 kernel_map_pages(virt_to_page(objp),
1988 cachep->buffer_size / PAGE_SIZE, 1);
1989 else
1990 check_poison_obj(cachep, objp);
1991#else
1992 check_poison_obj(cachep, objp);
1993#endif
1994 }
1995 if (cachep->flags & SLAB_RED_ZONE) {
1996 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1997 slab_error(cachep, "start of a freed object "
1998 "was overwritten");
1999 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2000 slab_error(cachep, "end of a freed object "
2001 "was overwritten");
2002 }
2003 }
2004}
2005#else
2006static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
2007{
2008}
2009#endif
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
2021{
2022 void *addr = slabp->s_mem - slabp->colouroff;
2023
2024 slab_destroy_debugcheck(cachep, slabp);
2025 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
2026 struct slab_rcu *slab_rcu;
2027
2028 slab_rcu = (struct slab_rcu *)slabp;
2029 slab_rcu->cachep = cachep;
2030 slab_rcu->addr = addr;
2031 call_rcu(&slab_rcu->head, kmem_rcu_free);
2032 } else {
2033 kmem_freepages(cachep, addr);
2034 if (OFF_SLAB(cachep))
2035 kmem_cache_free(cachep->slabp_cache, slabp);
2036 }
2037}
2038
2039static void __kmem_cache_destroy(struct kmem_cache *cachep)
2040{
2041 int i;
2042 struct kmem_list3 *l3;
2043
2044 for_each_online_cpu(i)
2045 kfree(cachep->array[i]);
2046
2047
2048 for_each_online_node(i) {
2049 l3 = cachep->nodelists[i];
2050 if (l3) {
2051 kfree(l3->shared);
2052 free_alien_cache(l3->alien);
2053 kfree(l3);
2054 }
2055 }
2056 kmem_cache_free(&cache_cache, cachep);
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073static size_t calculate_slab_order(struct kmem_cache *cachep,
2074 size_t size, size_t align, unsigned long flags)
2075{
2076 unsigned long offslab_limit;
2077 size_t left_over = 0;
2078 int gfporder;
2079
2080 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2081 unsigned int num;
2082 size_t remainder;
2083
2084 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2085 if (!num)
2086 continue;
2087
2088 if (flags & CFLGS_OFF_SLAB) {
2089
2090
2091
2092
2093
2094 offslab_limit = size - sizeof(struct slab);
2095 offslab_limit /= sizeof(kmem_bufctl_t);
2096
2097 if (num > offslab_limit)
2098 break;
2099 }
2100
2101
2102 cachep->num = num;
2103 cachep->gfporder = gfporder;
2104 left_over = remainder;
2105
2106
2107
2108
2109
2110
2111 if (flags & SLAB_RECLAIM_ACCOUNT)
2112 break;
2113
2114
2115
2116
2117
2118 if (gfporder >= slab_break_gfp_order)
2119 break;
2120
2121
2122
2123
2124 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2125 break;
2126 }
2127 return left_over;
2128}
2129
2130static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2131{
2132 if (g_cpucache_up == FULL)
2133 return enable_cpucache(cachep, gfp);
2134
2135 if (g_cpucache_up == NONE) {
2136
2137
2138
2139
2140
2141 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2142
2143
2144
2145
2146
2147
2148 set_up_list3s(cachep, SIZE_AC);
2149 if (INDEX_AC == INDEX_L3)
2150 g_cpucache_up = PARTIAL_L3;
2151 else
2152 g_cpucache_up = PARTIAL_AC;
2153 } else {
2154 cachep->array[smp_processor_id()] =
2155 kmalloc(sizeof(struct arraycache_init), gfp);
2156
2157 if (g_cpucache_up == PARTIAL_AC) {
2158 set_up_list3s(cachep, SIZE_L3);
2159 g_cpucache_up = PARTIAL_L3;
2160 } else {
2161 int node;
2162 for_each_online_node(node) {
2163 cachep->nodelists[node] =
2164 kmalloc_node(sizeof(struct kmem_list3),
2165 gfp, node);
2166 BUG_ON(!cachep->nodelists[node]);
2167 kmem_list3_init(cachep->nodelists[node]);
2168 }
2169 }
2170 }
2171 cachep->nodelists[numa_mem_id()]->next_reap =
2172 jiffies + REAPTIMEOUT_LIST3 +
2173 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2174
2175 cpu_cache_get(cachep)->avail = 0;
2176 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2177 cpu_cache_get(cachep)->batchcount = 1;
2178 cpu_cache_get(cachep)->touched = 0;
2179 cachep->batchcount = 1;
2180 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2181 return 0;
2182}
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211struct kmem_cache *
2212kmem_cache_create (const char *name, size_t size, size_t align,
2213 unsigned long flags, void (*ctor)(void *))
2214{
2215 size_t left_over, slab_size, ralign;
2216 struct kmem_cache *cachep = NULL, *pc;
2217 gfp_t gfp;
2218
2219
2220
2221
2222 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2223 size > KMALLOC_MAX_SIZE) {
2224 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2225 name);
2226 BUG();
2227 }
2228
2229
2230
2231
2232
2233 if (slab_is_available()) {
2234 get_online_cpus();
2235 mutex_lock(&cache_chain_mutex);
2236 }
2237
2238 list_for_each_entry(pc, &cache_chain, next) {
2239 char tmp;
2240 int res;
2241
2242
2243
2244
2245
2246
2247 res = probe_kernel_address(pc->name, tmp);
2248 if (res) {
2249 printk(KERN_ERR
2250 "SLAB: cache with size %d has lost its name\n",
2251 pc->buffer_size);
2252 continue;
2253 }
2254
2255 if (!strcmp(pc->name, name)) {
2256 printk(KERN_ERR
2257 "kmem_cache_create: duplicate cache %s\n", name);
2258 dump_stack();
2259 goto oops;
2260 }
2261 }
2262
2263#if DEBUG
2264 WARN_ON(strchr(name, ' '));
2265#if FORCED_DEBUG
2266
2267
2268
2269
2270
2271
2272 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2273 2 * sizeof(unsigned long long)))
2274 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2275 if (!(flags & SLAB_DESTROY_BY_RCU))
2276 flags |= SLAB_POISON;
2277#endif
2278 if (flags & SLAB_DESTROY_BY_RCU)
2279 BUG_ON(flags & SLAB_POISON);
2280#endif
2281
2282
2283
2284
2285 BUG_ON(flags & ~CREATE_MASK);
2286
2287
2288
2289
2290
2291
2292 if (size & (BYTES_PER_WORD - 1)) {
2293 size += (BYTES_PER_WORD - 1);
2294 size &= ~(BYTES_PER_WORD - 1);
2295 }
2296
2297
2298
2299
2300 if (flags & SLAB_HWCACHE_ALIGN) {
2301
2302
2303
2304
2305
2306 ralign = cache_line_size();
2307 while (size <= ralign / 2)
2308 ralign /= 2;
2309 } else {
2310 ralign = BYTES_PER_WORD;
2311 }
2312
2313
2314
2315
2316
2317
2318 if (flags & SLAB_STORE_USER)
2319 ralign = BYTES_PER_WORD;
2320
2321 if (flags & SLAB_RED_ZONE) {
2322 ralign = REDZONE_ALIGN;
2323
2324
2325 size += REDZONE_ALIGN - 1;
2326 size &= ~(REDZONE_ALIGN - 1);
2327 }
2328
2329
2330 if (ralign < ARCH_SLAB_MINALIGN) {
2331 ralign = ARCH_SLAB_MINALIGN;
2332 }
2333
2334 if (ralign < align) {
2335 ralign = align;
2336 }
2337
2338 if (ralign > __alignof__(unsigned long long))
2339 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2340
2341
2342
2343 align = ralign;
2344
2345 if (slab_is_available())
2346 gfp = GFP_KERNEL;
2347 else
2348 gfp = GFP_NOWAIT;
2349
2350
2351 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2352 if (!cachep)
2353 goto oops;
2354
2355 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
2356#if DEBUG
2357 cachep->obj_size = size;
2358
2359
2360
2361
2362
2363 if (flags & SLAB_RED_ZONE) {
2364
2365 cachep->obj_offset += sizeof(unsigned long long);
2366 size += 2 * sizeof(unsigned long long);
2367 }
2368 if (flags & SLAB_STORE_USER) {
2369
2370
2371
2372
2373 if (flags & SLAB_RED_ZONE)
2374 size += REDZONE_ALIGN;
2375 else
2376 size += BYTES_PER_WORD;
2377 }
2378#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2379 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2380 && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
2381 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
2382 size = PAGE_SIZE;
2383 }
2384#endif
2385#endif
2386
2387
2388
2389
2390
2391
2392
2393 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2394 !(flags & SLAB_NOLEAKTRACE))
2395
2396
2397
2398
2399 flags |= CFLGS_OFF_SLAB;
2400
2401 size = ALIGN(size, align);
2402
2403 left_over = calculate_slab_order(cachep, size, align, flags);
2404
2405 if (!cachep->num) {
2406 printk(KERN_ERR
2407 "kmem_cache_create: couldn't create cache %s.\n", name);
2408 kmem_cache_free(&cache_cache, cachep);
2409 cachep = NULL;
2410 goto oops;
2411 }
2412 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2413 + sizeof(struct slab), align);
2414
2415
2416
2417
2418
2419 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2420 flags &= ~CFLGS_OFF_SLAB;
2421 left_over -= slab_size;
2422 }
2423
2424 if (flags & CFLGS_OFF_SLAB) {
2425
2426 slab_size =
2427 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2428
2429#ifdef CONFIG_PAGE_POISONING
2430
2431
2432
2433
2434 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2435 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2436#endif
2437 }
2438
2439 cachep->colour_off = cache_line_size();
2440
2441 if (cachep->colour_off < align)
2442 cachep->colour_off = align;
2443 cachep->colour = left_over / cachep->colour_off;
2444 cachep->slab_size = slab_size;
2445 cachep->flags = flags;
2446 cachep->gfpflags = 0;
2447 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2448 cachep->gfpflags |= GFP_DMA;
2449 cachep->buffer_size = size;
2450 cachep->reciprocal_buffer_size = reciprocal_value(size);
2451
2452 if (flags & CFLGS_OFF_SLAB) {
2453 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2454
2455
2456
2457
2458
2459
2460
2461 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2462 }
2463 cachep->ctor = ctor;
2464 cachep->name = name;
2465
2466 if (setup_cpu_cache(cachep, gfp)) {
2467 __kmem_cache_destroy(cachep);
2468 cachep = NULL;
2469 goto oops;
2470 }
2471
2472 if (flags & SLAB_DEBUG_OBJECTS) {
2473
2474
2475
2476
2477 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
2478
2479 slab_set_debugobj_lock_classes(cachep);
2480 }
2481
2482
2483 list_add(&cachep->next, &cache_chain);
2484oops:
2485 if (!cachep && (flags & SLAB_PANIC))
2486 panic("kmem_cache_create(): failed to create slab `%s'\n",
2487 name);
2488 if (slab_is_available()) {
2489 mutex_unlock(&cache_chain_mutex);
2490 put_online_cpus();
2491 }
2492 return cachep;
2493}
2494EXPORT_SYMBOL(kmem_cache_create);
2495
2496#if DEBUG
2497static void check_irq_off(void)
2498{
2499 BUG_ON(!irqs_disabled());
2500}
2501
2502static void check_irq_on(void)
2503{
2504 BUG_ON(irqs_disabled());
2505}
2506
2507static void check_spinlock_acquired(struct kmem_cache *cachep)
2508{
2509#ifdef CONFIG_SMP
2510 check_irq_off();
2511 assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
2512#endif
2513}
2514
2515static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2516{
2517#ifdef CONFIG_SMP
2518 check_irq_off();
2519 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2520#endif
2521}
2522
2523#else
2524#define check_irq_off() do { } while(0)
2525#define check_irq_on() do { } while(0)
2526#define check_spinlock_acquired(x) do { } while(0)
2527#define check_spinlock_acquired_node(x, y) do { } while(0)
2528#endif
2529
2530static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2531 struct array_cache *ac,
2532 int force, int node);
2533
2534static void do_drain(void *arg)
2535{
2536 struct kmem_cache *cachep = arg;
2537 struct array_cache *ac;
2538 int node = numa_mem_id();
2539
2540 check_irq_off();
2541 ac = cpu_cache_get(cachep);
2542 spin_lock(&cachep->nodelists[node]->list_lock);
2543 free_block(cachep, ac->entry, ac->avail, node);
2544 spin_unlock(&cachep->nodelists[node]->list_lock);
2545 ac->avail = 0;
2546}
2547
2548static void drain_cpu_caches(struct kmem_cache *cachep)
2549{
2550 struct kmem_list3 *l3;
2551 int node;
2552
2553 on_each_cpu(do_drain, cachep, 1);
2554 check_irq_on();
2555 for_each_online_node(node) {
2556 l3 = cachep->nodelists[node];
2557 if (l3 && l3->alien)
2558 drain_alien_cache(cachep, l3->alien);
2559 }
2560
2561 for_each_online_node(node) {
2562 l3 = cachep->nodelists[node];
2563 if (l3)
2564 drain_array(cachep, l3, l3->shared, 1, node);
2565 }
2566}
2567
2568
2569
2570
2571
2572
2573
2574static int drain_freelist(struct kmem_cache *cache,
2575 struct kmem_list3 *l3, int tofree)
2576{
2577 struct list_head *p;
2578 int nr_freed;
2579 struct slab *slabp;
2580
2581 nr_freed = 0;
2582 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2583
2584 spin_lock_irq(&l3->list_lock);
2585 p = l3->slabs_free.prev;
2586 if (p == &l3->slabs_free) {
2587 spin_unlock_irq(&l3->list_lock);
2588 goto out;
2589 }
2590
2591 slabp = list_entry(p, struct slab, list);
2592#if DEBUG
2593 BUG_ON(slabp->inuse);
2594#endif
2595 list_del(&slabp->list);
2596
2597
2598
2599
2600 l3->free_objects -= cache->num;
2601 spin_unlock_irq(&l3->list_lock);
2602 slab_destroy(cache, slabp);
2603 nr_freed++;
2604 }
2605out:
2606 return nr_freed;
2607}
2608
2609
2610static int __cache_shrink(struct kmem_cache *cachep)
2611{
2612 int ret = 0, i = 0;
2613 struct kmem_list3 *l3;
2614
2615 drain_cpu_caches(cachep);
2616
2617 check_irq_on();
2618 for_each_online_node(i) {
2619 l3 = cachep->nodelists[i];
2620 if (!l3)
2621 continue;
2622
2623 drain_freelist(cachep, l3, l3->free_objects);
2624
2625 ret += !list_empty(&l3->slabs_full) ||
2626 !list_empty(&l3->slabs_partial);
2627 }
2628 return (ret ? 1 : 0);
2629}
2630
2631
2632
2633
2634
2635
2636
2637
2638int kmem_cache_shrink(struct kmem_cache *cachep)
2639{
2640 int ret;
2641 BUG_ON(!cachep || in_interrupt());
2642
2643 get_online_cpus();
2644 mutex_lock(&cache_chain_mutex);
2645 ret = __cache_shrink(cachep);
2646 mutex_unlock(&cache_chain_mutex);
2647 put_online_cpus();
2648 return ret;
2649}
2650EXPORT_SYMBOL(kmem_cache_shrink);
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668void kmem_cache_destroy(struct kmem_cache *cachep)
2669{
2670 BUG_ON(!cachep || in_interrupt());
2671
2672
2673 get_online_cpus();
2674 mutex_lock(&cache_chain_mutex);
2675
2676
2677
2678 list_del(&cachep->next);
2679 if (__cache_shrink(cachep)) {
2680 slab_error(cachep, "Can't free all objects");
2681 list_add(&cachep->next, &cache_chain);
2682 mutex_unlock(&cache_chain_mutex);
2683 put_online_cpus();
2684 return;
2685 }
2686
2687 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2688 rcu_barrier();
2689
2690 __kmem_cache_destroy(cachep);
2691 mutex_unlock(&cache_chain_mutex);
2692 put_online_cpus();
2693}
2694EXPORT_SYMBOL(kmem_cache_destroy);
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2708 int colour_off, gfp_t local_flags,
2709 int nodeid)
2710{
2711 struct slab *slabp;
2712
2713 if (OFF_SLAB(cachep)) {
2714
2715 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2716 local_flags, nodeid);
2717
2718
2719
2720
2721
2722
2723 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2724 local_flags);
2725 if (!slabp)
2726 return NULL;
2727 } else {
2728 slabp = objp + colour_off;
2729 colour_off += cachep->slab_size;
2730 }
2731 slabp->inuse = 0;
2732 slabp->colouroff = colour_off;
2733 slabp->s_mem = objp + colour_off;
2734 slabp->nodeid = nodeid;
2735 slabp->free = 0;
2736 return slabp;
2737}
2738
2739static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2740{
2741 return (kmem_bufctl_t *) (slabp + 1);
2742}
2743
2744static void cache_init_objs(struct kmem_cache *cachep,
2745 struct slab *slabp)
2746{
2747 int i;
2748
2749 for (i = 0; i < cachep->num; i++) {
2750 void *objp = index_to_obj(cachep, slabp, i);
2751#if DEBUG
2752
2753 if (cachep->flags & SLAB_POISON)
2754 poison_obj(cachep, objp, POISON_FREE);
2755 if (cachep->flags & SLAB_STORE_USER)
2756 *dbg_userword(cachep, objp) = NULL;
2757
2758 if (cachep->flags & SLAB_RED_ZONE) {
2759 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2760 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2761 }
2762
2763
2764
2765
2766
2767 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2768 cachep->ctor(objp + obj_offset(cachep));
2769
2770 if (cachep->flags & SLAB_RED_ZONE) {
2771 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2772 slab_error(cachep, "constructor overwrote the"
2773 " end of an object");
2774 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2775 slab_error(cachep, "constructor overwrote the"
2776 " start of an object");
2777 }
2778 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2779 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2780 kernel_map_pages(virt_to_page(objp),
2781 cachep->buffer_size / PAGE_SIZE, 0);
2782#else
2783 if (cachep->ctor)
2784 cachep->ctor(objp);
2785#endif
2786 slab_bufctl(slabp)[i] = i + 1;
2787 }
2788 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2789}
2790
2791static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2792{
2793 if (CONFIG_ZONE_DMA_FLAG) {
2794 if (flags & GFP_DMA)
2795 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2796 else
2797 BUG_ON(cachep->gfpflags & GFP_DMA);
2798 }
2799}
2800
2801static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2802 int nodeid)
2803{
2804 void *objp = index_to_obj(cachep, slabp, slabp->free);
2805 kmem_bufctl_t next;
2806
2807 slabp->inuse++;
2808 next = slab_bufctl(slabp)[slabp->free];
2809#if DEBUG
2810 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2811 WARN_ON(slabp->nodeid != nodeid);
2812#endif
2813 slabp->free = next;
2814
2815 return objp;
2816}
2817
2818static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2819 void *objp, int nodeid)
2820{
2821 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2822
2823#if DEBUG
2824
2825 WARN_ON(slabp->nodeid != nodeid);
2826
2827 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2828 printk(KERN_ERR "slab: double free detected in cache "
2829 "'%s', objp %p\n", cachep->name, objp);
2830 BUG();
2831 }
2832#endif
2833 slab_bufctl(slabp)[objnr] = slabp->free;
2834 slabp->free = objnr;
2835 slabp->inuse--;
2836}
2837
2838
2839
2840
2841
2842
2843static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2844 void *addr)
2845{
2846 int nr_pages;
2847 struct page *page;
2848
2849 page = virt_to_page(addr);
2850
2851 nr_pages = 1;
2852 if (likely(!PageCompound(page)))
2853 nr_pages <<= cache->gfporder;
2854
2855 do {
2856 page_set_cache(page, cache);
2857 page_set_slab(page, slab);
2858 page++;
2859 } while (--nr_pages);
2860}
2861
2862
2863
2864
2865
2866static int cache_grow(struct kmem_cache *cachep,
2867 gfp_t flags, int nodeid, void *objp)
2868{
2869 struct slab *slabp;
2870 size_t offset;
2871 gfp_t local_flags;
2872 struct kmem_list3 *l3;
2873
2874
2875
2876
2877
2878 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2879 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2880
2881
2882 check_irq_off();
2883 l3 = cachep->nodelists[nodeid];
2884 spin_lock(&l3->list_lock);
2885
2886
2887 offset = l3->colour_next;
2888 l3->colour_next++;
2889 if (l3->colour_next >= cachep->colour)
2890 l3->colour_next = 0;
2891 spin_unlock(&l3->list_lock);
2892
2893 offset *= cachep->colour_off;
2894
2895 if (local_flags & __GFP_WAIT)
2896 local_irq_enable();
2897
2898
2899
2900
2901
2902
2903
2904 kmem_flagcheck(cachep, flags);
2905
2906
2907
2908
2909
2910 if (!objp)
2911 objp = kmem_getpages(cachep, local_flags, nodeid);
2912 if (!objp)
2913 goto failed;
2914
2915
2916 slabp = alloc_slabmgmt(cachep, objp, offset,
2917 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2918 if (!slabp)
2919 goto opps1;
2920
2921 slab_map_pages(cachep, slabp, objp);
2922
2923 cache_init_objs(cachep, slabp);
2924
2925 if (local_flags & __GFP_WAIT)
2926 local_irq_disable();
2927 check_irq_off();
2928 spin_lock(&l3->list_lock);
2929
2930
2931 list_add_tail(&slabp->list, &(l3->slabs_free));
2932 STATS_INC_GROWN(cachep);
2933 l3->free_objects += cachep->num;
2934 spin_unlock(&l3->list_lock);
2935 return 1;
2936opps1:
2937 kmem_freepages(cachep, objp);
2938failed:
2939 if (local_flags & __GFP_WAIT)
2940 local_irq_disable();
2941 return 0;
2942}
2943
2944#if DEBUG
2945
2946
2947
2948
2949
2950
2951static void kfree_debugcheck(const void *objp)
2952{
2953 if (!virt_addr_valid(objp)) {
2954 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2955 (unsigned long)objp);
2956 BUG();
2957 }
2958}
2959
2960static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2961{
2962 unsigned long long redzone1, redzone2;
2963
2964 redzone1 = *dbg_redzone1(cache, obj);
2965 redzone2 = *dbg_redzone2(cache, obj);
2966
2967
2968
2969
2970 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2971 return;
2972
2973 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2974 slab_error(cache, "double free detected");
2975 else
2976 slab_error(cache, "memory outside object was overwritten");
2977
2978 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2979 obj, redzone1, redzone2);
2980}
2981
2982static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2983 void *caller)
2984{
2985 struct page *page;
2986 unsigned int objnr;
2987 struct slab *slabp;
2988
2989 BUG_ON(virt_to_cache(objp) != cachep);
2990
2991 objp -= obj_offset(cachep);
2992 kfree_debugcheck(objp);
2993 page = virt_to_head_page(objp);
2994
2995 slabp = page_get_slab(page);
2996
2997 if (cachep->flags & SLAB_RED_ZONE) {
2998 verify_redzone_free(cachep, objp);
2999 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
3000 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
3001 }
3002 if (cachep->flags & SLAB_STORE_USER)
3003 *dbg_userword(cachep, objp) = caller;
3004
3005 objnr = obj_to_index(cachep, slabp, objp);
3006
3007 BUG_ON(objnr >= cachep->num);
3008 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
3009
3010#ifdef CONFIG_DEBUG_SLAB_LEAK
3011 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
3012#endif
3013 if (cachep->flags & SLAB_POISON) {
3014#ifdef CONFIG_DEBUG_PAGEALLOC
3015 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
3016 store_stackinfo(cachep, objp, (unsigned long)caller);
3017 kernel_map_pages(virt_to_page(objp),
3018 cachep->buffer_size / PAGE_SIZE, 0);
3019 } else {
3020 poison_obj(cachep, objp, POISON_FREE);
3021 }
3022#else
3023 poison_obj(cachep, objp, POISON_FREE);
3024#endif
3025 }
3026 return objp;
3027}
3028
3029static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
3030{
3031 kmem_bufctl_t i;
3032 int entries = 0;
3033
3034
3035 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
3036 entries++;
3037 if (entries > cachep->num || i >= cachep->num)
3038 goto bad;
3039 }
3040 if (entries != cachep->num - slabp->inuse) {
3041bad:
3042 printk(KERN_ERR "slab: Internal list corruption detected in "
3043 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
3044 cachep->name, cachep->num, slabp, slabp->inuse);
3045 for (i = 0;
3046 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
3047 i++) {
3048 if (i % 16 == 0)
3049 printk("\n%03x:", i);
3050 printk(" %02x", ((unsigned char *)slabp)[i]);
3051 }
3052 printk("\n");
3053 BUG();
3054 }
3055}
3056#else
3057#define kfree_debugcheck(x) do { } while(0)
3058#define cache_free_debugcheck(x,objp,z) (objp)
3059#define check_slabp(x,y) do { } while(0)
3060#endif
3061
3062static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
3063{
3064 int batchcount;
3065 struct kmem_list3 *l3;
3066 struct array_cache *ac;
3067 int node;
3068
3069retry:
3070 check_irq_off();
3071 node = numa_mem_id();
3072 ac = cpu_cache_get(cachep);
3073 batchcount = ac->batchcount;
3074 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
3075
3076
3077
3078
3079
3080 batchcount = BATCHREFILL_LIMIT;
3081 }
3082 l3 = cachep->nodelists[node];
3083
3084 BUG_ON(ac->avail > 0 || !l3);
3085 spin_lock(&l3->list_lock);
3086
3087
3088 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
3089 l3->shared->touched = 1;
3090 goto alloc_done;
3091 }
3092
3093 while (batchcount > 0) {
3094 struct list_head *entry;
3095 struct slab *slabp;
3096
3097 entry = l3->slabs_partial.next;
3098 if (entry == &l3->slabs_partial) {
3099 l3->free_touched = 1;
3100 entry = l3->slabs_free.next;
3101 if (entry == &l3->slabs_free)
3102 goto must_grow;
3103 }
3104
3105 slabp = list_entry(entry, struct slab, list);
3106 check_slabp(cachep, slabp);
3107 check_spinlock_acquired(cachep);
3108
3109
3110
3111
3112
3113
3114 BUG_ON(slabp->inuse >= cachep->num);
3115
3116 while (slabp->inuse < cachep->num && batchcount--) {
3117 STATS_INC_ALLOCED(cachep);
3118 STATS_INC_ACTIVE(cachep);
3119 STATS_SET_HIGH(cachep);
3120
3121 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
3122 node);
3123 }
3124 check_slabp(cachep, slabp);
3125
3126
3127 list_del(&slabp->list);
3128 if (slabp->free == BUFCTL_END)
3129 list_add(&slabp->list, &l3->slabs_full);
3130 else
3131 list_add(&slabp->list, &l3->slabs_partial);
3132 }
3133
3134must_grow:
3135 l3->free_objects -= ac->avail;
3136alloc_done:
3137 spin_unlock(&l3->list_lock);
3138
3139 if (unlikely(!ac->avail)) {
3140 int x;
3141 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3142
3143
3144 ac = cpu_cache_get(cachep);
3145 if (!x && ac->avail == 0)
3146 return NULL;
3147
3148 if (!ac->avail)
3149 goto retry;
3150 }
3151 ac->touched = 1;
3152 return ac->entry[--ac->avail];
3153}
3154
3155static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3156 gfp_t flags)
3157{
3158 might_sleep_if(flags & __GFP_WAIT);
3159#if DEBUG
3160 kmem_flagcheck(cachep, flags);
3161#endif
3162}
3163
3164#if DEBUG
3165static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3166 gfp_t flags, void *objp, void *caller)
3167{
3168 if (!objp)
3169 return objp;
3170 if (cachep->flags & SLAB_POISON) {
3171#ifdef CONFIG_DEBUG_PAGEALLOC
3172 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3173 kernel_map_pages(virt_to_page(objp),
3174 cachep->buffer_size / PAGE_SIZE, 1);
3175 else
3176 check_poison_obj(cachep, objp);
3177#else
3178 check_poison_obj(cachep, objp);
3179#endif
3180 poison_obj(cachep, objp, POISON_INUSE);
3181 }
3182 if (cachep->flags & SLAB_STORE_USER)
3183 *dbg_userword(cachep, objp) = caller;
3184
3185 if (cachep->flags & SLAB_RED_ZONE) {
3186 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3187 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3188 slab_error(cachep, "double free, or memory outside"
3189 " object was overwritten");
3190 printk(KERN_ERR
3191 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3192 objp, *dbg_redzone1(cachep, objp),
3193 *dbg_redzone2(cachep, objp));
3194 }
3195 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3196 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3197 }
3198#ifdef CONFIG_DEBUG_SLAB_LEAK
3199 {
3200 struct slab *slabp;
3201 unsigned objnr;
3202
3203 slabp = page_get_slab(virt_to_head_page(objp));
3204 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3205 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3206 }
3207#endif
3208 objp += obj_offset(cachep);
3209 if (cachep->ctor && cachep->flags & SLAB_POISON)
3210 cachep->ctor(objp);
3211 if (ARCH_SLAB_MINALIGN &&
3212 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
3213 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3214 objp, (int)ARCH_SLAB_MINALIGN);
3215 }
3216 return objp;
3217}
3218#else
3219#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3220#endif
3221
3222static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3223{
3224 if (cachep == &cache_cache)
3225 return false;
3226
3227 return should_failslab(obj_size(cachep), flags, cachep->flags);
3228}
3229
3230static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3231{
3232 void *objp;
3233 struct array_cache *ac;
3234
3235 check_irq_off();
3236
3237 ac = cpu_cache_get(cachep);
3238 if (likely(ac->avail)) {
3239 STATS_INC_ALLOCHIT(cachep);
3240 ac->touched = 1;
3241 objp = ac->entry[--ac->avail];
3242 } else {
3243 STATS_INC_ALLOCMISS(cachep);
3244 objp = cache_alloc_refill(cachep, flags);
3245
3246
3247
3248
3249 ac = cpu_cache_get(cachep);
3250 }
3251
3252
3253
3254
3255
3256 if (objp)
3257 kmemleak_erase(&ac->entry[ac->avail]);
3258 return objp;
3259}
3260
3261#ifdef CONFIG_NUMA
3262
3263
3264
3265
3266
3267
3268static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3269{
3270 int nid_alloc, nid_here;
3271
3272 if (in_interrupt() || (flags & __GFP_THISNODE))
3273 return NULL;
3274 nid_alloc = nid_here = numa_mem_id();
3275 get_mems_allowed();
3276 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3277 nid_alloc = cpuset_slab_spread_node();
3278 else if (current->mempolicy)
3279 nid_alloc = slab_node(current->mempolicy);
3280 put_mems_allowed();
3281 if (nid_alloc != nid_here)
3282 return ____cache_alloc_node(cachep, flags, nid_alloc);
3283 return NULL;
3284}
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3295{
3296 struct zonelist *zonelist;
3297 gfp_t local_flags;
3298 struct zoneref *z;
3299 struct zone *zone;
3300 enum zone_type high_zoneidx = gfp_zone(flags);
3301 void *obj = NULL;
3302 int nid;
3303
3304 if (flags & __GFP_THISNODE)
3305 return NULL;
3306
3307 get_mems_allowed();
3308 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3309 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3310
3311retry:
3312
3313
3314
3315
3316 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3317 nid = zone_to_nid(zone);
3318
3319 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3320 cache->nodelists[nid] &&
3321 cache->nodelists[nid]->free_objects) {
3322 obj = ____cache_alloc_node(cache,
3323 flags | GFP_THISNODE, nid);
3324 if (obj)
3325 break;
3326 }
3327 }
3328
3329 if (!obj) {
3330
3331
3332
3333
3334
3335
3336 if (local_flags & __GFP_WAIT)
3337 local_irq_enable();
3338 kmem_flagcheck(cache, flags);
3339 obj = kmem_getpages(cache, local_flags, numa_mem_id());
3340 if (local_flags & __GFP_WAIT)
3341 local_irq_disable();
3342 if (obj) {
3343
3344
3345
3346 nid = page_to_nid(virt_to_page(obj));
3347 if (cache_grow(cache, flags, nid, obj)) {
3348 obj = ____cache_alloc_node(cache,
3349 flags | GFP_THISNODE, nid);
3350 if (!obj)
3351
3352
3353
3354
3355
3356 goto retry;
3357 } else {
3358
3359 obj = NULL;
3360 }
3361 }
3362 }
3363 put_mems_allowed();
3364 return obj;
3365}
3366
3367
3368
3369
3370static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3371 int nodeid)
3372{
3373 struct list_head *entry;
3374 struct slab *slabp;
3375 struct kmem_list3 *l3;
3376 void *obj;
3377 int x;
3378
3379 l3 = cachep->nodelists[nodeid];
3380 BUG_ON(!l3);
3381
3382retry:
3383 check_irq_off();
3384 spin_lock(&l3->list_lock);
3385 entry = l3->slabs_partial.next;
3386 if (entry == &l3->slabs_partial) {
3387 l3->free_touched = 1;
3388 entry = l3->slabs_free.next;
3389 if (entry == &l3->slabs_free)
3390 goto must_grow;
3391 }
3392
3393 slabp = list_entry(entry, struct slab, list);
3394 check_spinlock_acquired_node(cachep, nodeid);
3395 check_slabp(cachep, slabp);
3396
3397 STATS_INC_NODEALLOCS(cachep);
3398 STATS_INC_ACTIVE(cachep);
3399 STATS_SET_HIGH(cachep);
3400
3401 BUG_ON(slabp->inuse == cachep->num);
3402
3403 obj = slab_get_obj(cachep, slabp, nodeid);
3404 check_slabp(cachep, slabp);
3405 l3->free_objects--;
3406
3407 list_del(&slabp->list);
3408
3409 if (slabp->free == BUFCTL_END)
3410 list_add(&slabp->list, &l3->slabs_full);
3411 else
3412 list_add(&slabp->list, &l3->slabs_partial);
3413
3414 spin_unlock(&l3->list_lock);
3415 goto done;
3416
3417must_grow:
3418 spin_unlock(&l3->list_lock);
3419 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3420 if (x)
3421 goto retry;
3422
3423 return fallback_alloc(cachep, flags);
3424
3425done:
3426 return obj;
3427}
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441static __always_inline void *
3442__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3443 void *caller)
3444{
3445 unsigned long save_flags;
3446 void *ptr;
3447 int slab_node = numa_mem_id();
3448
3449 flags &= gfp_allowed_mask;
3450
3451 lockdep_trace_alloc(flags);
3452
3453 if (slab_should_failslab(cachep, flags))
3454 return NULL;
3455
3456 cache_alloc_debugcheck_before(cachep, flags);
3457 local_irq_save(save_flags);
3458
3459 if (nodeid == NUMA_NO_NODE)
3460 nodeid = slab_node;
3461
3462 if (unlikely(!cachep->nodelists[nodeid])) {
3463
3464 ptr = fallback_alloc(cachep, flags);
3465 goto out;
3466 }
3467
3468 if (nodeid == slab_node) {
3469
3470
3471
3472
3473
3474
3475 ptr = ____cache_alloc(cachep, flags);
3476 if (ptr)
3477 goto out;
3478 }
3479
3480 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3481 out:
3482 local_irq_restore(save_flags);
3483 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3484 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3485 flags);
3486
3487 if (likely(ptr))
3488 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3489
3490 if (unlikely((flags & __GFP_ZERO) && ptr))
3491 memset(ptr, 0, obj_size(cachep));
3492
3493 return ptr;
3494}
3495
3496static __always_inline void *
3497__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3498{
3499 void *objp;
3500
3501 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3502 objp = alternate_node_alloc(cache, flags);
3503 if (objp)
3504 goto out;
3505 }
3506 objp = ____cache_alloc(cache, flags);
3507
3508
3509
3510
3511
3512 if (!objp)
3513 objp = ____cache_alloc_node(cache, flags, numa_mem_id());
3514
3515 out:
3516 return objp;
3517}
3518#else
3519
3520static __always_inline void *
3521__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3522{
3523 return ____cache_alloc(cachep, flags);
3524}
3525
3526#endif
3527
3528static __always_inline void *
3529__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3530{
3531 unsigned long save_flags;
3532 void *objp;
3533
3534 flags &= gfp_allowed_mask;
3535
3536 lockdep_trace_alloc(flags);
3537
3538 if (slab_should_failslab(cachep, flags))
3539 return NULL;
3540
3541 cache_alloc_debugcheck_before(cachep, flags);
3542 local_irq_save(save_flags);
3543 objp = __do_cache_alloc(cachep, flags);
3544 local_irq_restore(save_flags);
3545 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3546 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3547 flags);
3548 prefetchw(objp);
3549
3550 if (likely(objp))
3551 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3552
3553 if (unlikely((flags & __GFP_ZERO) && objp))
3554 memset(objp, 0, obj_size(cachep));
3555
3556 return objp;
3557}
3558
3559
3560
3561
3562static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3563 int node)
3564{
3565 int i;
3566 struct kmem_list3 *l3;
3567
3568 for (i = 0; i < nr_objects; i++) {
3569 void *objp = objpp[i];
3570 struct slab *slabp;
3571
3572 slabp = virt_to_slab(objp);
3573 l3 = cachep->nodelists[node];
3574 list_del(&slabp->list);
3575 check_spinlock_acquired_node(cachep, node);
3576 check_slabp(cachep, slabp);
3577 slab_put_obj(cachep, slabp, objp, node);
3578 STATS_DEC_ACTIVE(cachep);
3579 l3->free_objects++;
3580 check_slabp(cachep, slabp);
3581
3582
3583 if (slabp->inuse == 0) {
3584 if (l3->free_objects > l3->free_limit) {
3585 l3->free_objects -= cachep->num;
3586
3587
3588
3589
3590
3591
3592 slab_destroy(cachep, slabp);
3593 } else {
3594 list_add(&slabp->list, &l3->slabs_free);
3595 }
3596 } else {
3597
3598
3599
3600
3601 list_add_tail(&slabp->list, &l3->slabs_partial);
3602 }
3603 }
3604}
3605
3606static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3607{
3608 int batchcount;
3609 struct kmem_list3 *l3;
3610 int node = numa_mem_id();
3611
3612 batchcount = ac->batchcount;
3613#if DEBUG
3614 BUG_ON(!batchcount || batchcount > ac->avail);
3615#endif
3616 check_irq_off();
3617 l3 = cachep->nodelists[node];
3618 spin_lock(&l3->list_lock);
3619 if (l3->shared) {
3620 struct array_cache *shared_array = l3->shared;
3621 int max = shared_array->limit - shared_array->avail;
3622 if (max) {
3623 if (batchcount > max)
3624 batchcount = max;
3625 memcpy(&(shared_array->entry[shared_array->avail]),
3626 ac->entry, sizeof(void *) * batchcount);
3627 shared_array->avail += batchcount;
3628 goto free_done;
3629 }
3630 }
3631
3632 free_block(cachep, ac->entry, batchcount, node);
3633free_done:
3634#if STATS
3635 {
3636 int i = 0;
3637 struct list_head *p;
3638
3639 p = l3->slabs_free.next;
3640 while (p != &(l3->slabs_free)) {
3641 struct slab *slabp;
3642
3643 slabp = list_entry(p, struct slab, list);
3644 BUG_ON(slabp->inuse);
3645
3646 i++;
3647 p = p->next;
3648 }
3649 STATS_SET_FREEABLE(cachep, i);
3650 }
3651#endif
3652 spin_unlock(&l3->list_lock);
3653 ac->avail -= batchcount;
3654 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3655}
3656
3657
3658
3659
3660
3661static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3662 void *caller)
3663{
3664 struct array_cache *ac = cpu_cache_get(cachep);
3665
3666 check_irq_off();
3667 kmemleak_free_recursive(objp, cachep->flags);
3668 objp = cache_free_debugcheck(cachep, objp, caller);
3669
3670 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3671
3672
3673
3674
3675
3676
3677
3678
3679 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3680 return;
3681
3682 if (likely(ac->avail < ac->limit)) {
3683 STATS_INC_FREEHIT(cachep);
3684 ac->entry[ac->avail++] = objp;
3685 return;
3686 } else {
3687 STATS_INC_FREEMISS(cachep);
3688 cache_flusharray(cachep, ac);
3689 ac->entry[ac->avail++] = objp;
3690 }
3691}
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3702{
3703 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3704
3705 trace_kmem_cache_alloc(_RET_IP_, ret,
3706 obj_size(cachep), cachep->buffer_size, flags);
3707
3708 return ret;
3709}
3710EXPORT_SYMBOL(kmem_cache_alloc);
3711
3712#ifdef CONFIG_TRACING
3713void *
3714kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
3715{
3716 void *ret;
3717
3718 ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3719
3720 trace_kmalloc(_RET_IP_, ret,
3721 size, slab_buffer_size(cachep), flags);
3722 return ret;
3723}
3724EXPORT_SYMBOL(kmem_cache_alloc_trace);
3725#endif
3726
3727#ifdef CONFIG_NUMA
3728void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3729{
3730 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3731 __builtin_return_address(0));
3732
3733 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3734 obj_size(cachep), cachep->buffer_size,
3735 flags, nodeid);
3736
3737 return ret;
3738}
3739EXPORT_SYMBOL(kmem_cache_alloc_node);
3740
3741#ifdef CONFIG_TRACING
3742void *kmem_cache_alloc_node_trace(size_t size,
3743 struct kmem_cache *cachep,
3744 gfp_t flags,
3745 int nodeid)
3746{
3747 void *ret;
3748
3749 ret = __cache_alloc_node(cachep, flags, nodeid,
3750 __builtin_return_address(0));
3751 trace_kmalloc_node(_RET_IP_, ret,
3752 size, slab_buffer_size(cachep),
3753 flags, nodeid);
3754 return ret;
3755}
3756EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3757#endif
3758
3759static __always_inline void *
3760__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3761{
3762 struct kmem_cache *cachep;
3763
3764 cachep = kmem_find_general_cachep(size, flags);
3765 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3766 return cachep;
3767 return kmem_cache_alloc_node_trace(size, cachep, flags, node);
3768}
3769
3770#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3771void *__kmalloc_node(size_t size, gfp_t flags, int node)
3772{
3773 return __do_kmalloc_node(size, flags, node,
3774 __builtin_return_address(0));
3775}
3776EXPORT_SYMBOL(__kmalloc_node);
3777
3778void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3779 int node, unsigned long caller)
3780{
3781 return __do_kmalloc_node(size, flags, node, (void *)caller);
3782}
3783EXPORT_SYMBOL(__kmalloc_node_track_caller);
3784#else
3785void *__kmalloc_node(size_t size, gfp_t flags, int node)
3786{
3787 return __do_kmalloc_node(size, flags, node, NULL);
3788}
3789EXPORT_SYMBOL(__kmalloc_node);
3790#endif
3791#endif
3792
3793
3794
3795
3796
3797
3798
3799static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3800 void *caller)
3801{
3802 struct kmem_cache *cachep;
3803 void *ret;
3804
3805
3806
3807
3808
3809
3810 cachep = __find_general_cachep(size, flags);
3811 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3812 return cachep;
3813 ret = __cache_alloc(cachep, flags, caller);
3814
3815 trace_kmalloc((unsigned long) caller, ret,
3816 size, cachep->buffer_size, flags);
3817
3818 return ret;
3819}
3820
3821
3822#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3823void *__kmalloc(size_t size, gfp_t flags)
3824{
3825 return __do_kmalloc(size, flags, __builtin_return_address(0));
3826}
3827EXPORT_SYMBOL(__kmalloc);
3828
3829void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3830{
3831 return __do_kmalloc(size, flags, (void *)caller);
3832}
3833EXPORT_SYMBOL(__kmalloc_track_caller);
3834
3835#else
3836void *__kmalloc(size_t size, gfp_t flags)
3837{
3838 return __do_kmalloc(size, flags, NULL);
3839}
3840EXPORT_SYMBOL(__kmalloc);
3841#endif
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3852{
3853 unsigned long flags;
3854
3855 local_irq_save(flags);
3856 debug_check_no_locks_freed(objp, obj_size(cachep));
3857 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3858 debug_check_no_obj_freed(objp, obj_size(cachep));
3859 __cache_free(cachep, objp, __builtin_return_address(0));
3860 local_irq_restore(flags);
3861
3862 trace_kmem_cache_free(_RET_IP_, objp);
3863}
3864EXPORT_SYMBOL(kmem_cache_free);
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875void kfree(const void *objp)
3876{
3877 struct kmem_cache *c;
3878 unsigned long flags;
3879
3880 trace_kfree(_RET_IP_, objp);
3881
3882 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3883 return;
3884 local_irq_save(flags);
3885 kfree_debugcheck(objp);
3886 c = virt_to_cache(objp);
3887 debug_check_no_locks_freed(objp, obj_size(c));
3888 debug_check_no_obj_freed(objp, obj_size(c));
3889 __cache_free(c, (void *)objp, __builtin_return_address(0));
3890 local_irq_restore(flags);
3891}
3892EXPORT_SYMBOL(kfree);
3893
3894unsigned int kmem_cache_size(struct kmem_cache *cachep)
3895{
3896 return obj_size(cachep);
3897}
3898EXPORT_SYMBOL(kmem_cache_size);
3899
3900
3901
3902
3903static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3904{
3905 int node;
3906 struct kmem_list3 *l3;
3907 struct array_cache *new_shared;
3908 struct array_cache **new_alien = NULL;
3909
3910 for_each_online_node(node) {
3911
3912 if (use_alien_caches) {
3913 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3914 if (!new_alien)
3915 goto fail;
3916 }
3917
3918 new_shared = NULL;
3919 if (cachep->shared) {
3920 new_shared = alloc_arraycache(node,
3921 cachep->shared*cachep->batchcount,
3922 0xbaadf00d, gfp);
3923 if (!new_shared) {
3924 free_alien_cache(new_alien);
3925 goto fail;
3926 }
3927 }
3928
3929 l3 = cachep->nodelists[node];
3930 if (l3) {
3931 struct array_cache *shared = l3->shared;
3932
3933 spin_lock_irq(&l3->list_lock);
3934
3935 if (shared)
3936 free_block(cachep, shared->entry,
3937 shared->avail, node);
3938
3939 l3->shared = new_shared;
3940 if (!l3->alien) {
3941 l3->alien = new_alien;
3942 new_alien = NULL;
3943 }
3944 l3->free_limit = (1 + nr_cpus_node(node)) *
3945 cachep->batchcount + cachep->num;
3946 spin_unlock_irq(&l3->list_lock);
3947 kfree(shared);
3948 free_alien_cache(new_alien);
3949 continue;
3950 }
3951 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3952 if (!l3) {
3953 free_alien_cache(new_alien);
3954 kfree(new_shared);
3955 goto fail;
3956 }
3957
3958 kmem_list3_init(l3);
3959 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3960 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3961 l3->shared = new_shared;
3962 l3->alien = new_alien;
3963 l3->free_limit = (1 + nr_cpus_node(node)) *
3964 cachep->batchcount + cachep->num;
3965 cachep->nodelists[node] = l3;
3966 }
3967 return 0;
3968
3969fail:
3970 if (!cachep->next.next) {
3971
3972 node--;
3973 while (node >= 0) {
3974 if (cachep->nodelists[node]) {
3975 l3 = cachep->nodelists[node];
3976
3977 kfree(l3->shared);
3978 free_alien_cache(l3->alien);
3979 kfree(l3);
3980 cachep->nodelists[node] = NULL;
3981 }
3982 node--;
3983 }
3984 }
3985 return -ENOMEM;
3986}
3987
3988struct ccupdate_struct {
3989 struct kmem_cache *cachep;
3990 struct array_cache *new[0];
3991};
3992
3993static void do_ccupdate_local(void *info)
3994{
3995 struct ccupdate_struct *new = info;
3996 struct array_cache *old;
3997
3998 check_irq_off();
3999 old = cpu_cache_get(new->cachep);
4000
4001 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
4002 new->new[smp_processor_id()] = old;
4003}
4004
4005
4006static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
4007 int batchcount, int shared, gfp_t gfp)
4008{
4009 struct ccupdate_struct *new;
4010 int i;
4011
4012 new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
4013 gfp);
4014 if (!new)
4015 return -ENOMEM;
4016
4017 for_each_online_cpu(i) {
4018 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
4019 batchcount, gfp);
4020 if (!new->new[i]) {
4021 for (i--; i >= 0; i--)
4022 kfree(new->new[i]);
4023 kfree(new);
4024 return -ENOMEM;
4025 }
4026 }
4027 new->cachep = cachep;
4028
4029 on_each_cpu(do_ccupdate_local, (void *)new, 1);
4030
4031 check_irq_on();
4032 cachep->batchcount = batchcount;
4033 cachep->limit = limit;
4034 cachep->shared = shared;
4035
4036 for_each_online_cpu(i) {
4037 struct array_cache *ccold = new->new[i];
4038 if (!ccold)
4039 continue;
4040 spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
4041 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
4042 spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
4043 kfree(ccold);
4044 }
4045 kfree(new);
4046 return alloc_kmemlist(cachep, gfp);
4047}
4048
4049
4050static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4051{
4052 int err;
4053 int limit, shared;
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064 if (cachep->buffer_size > 131072)
4065 limit = 1;
4066 else if (cachep->buffer_size > PAGE_SIZE)
4067 limit = 8;
4068 else if (cachep->buffer_size > 1024)
4069 limit = 24;
4070 else if (cachep->buffer_size > 256)
4071 limit = 54;
4072 else
4073 limit = 120;
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084 shared = 0;
4085 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
4086 shared = 8;
4087
4088#if DEBUG
4089
4090
4091
4092
4093 if (limit > 32)
4094 limit = 32;
4095#endif
4096 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
4097 if (err)
4098 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4099 cachep->name, -err);
4100 return err;
4101}
4102
4103
4104
4105
4106
4107
4108static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4109 struct array_cache *ac, int force, int node)
4110{
4111 int tofree;
4112
4113 if (!ac || !ac->avail)
4114 return;
4115 if (ac->touched && !force) {
4116 ac->touched = 0;
4117 } else {
4118 spin_lock_irq(&l3->list_lock);
4119 if (ac->avail) {
4120 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4121 if (tofree > ac->avail)
4122 tofree = (ac->avail + 1) / 2;
4123 free_block(cachep, ac->entry, tofree, node);
4124 ac->avail -= tofree;
4125 memmove(ac->entry, &(ac->entry[tofree]),
4126 sizeof(void *) * ac->avail);
4127 }
4128 spin_unlock_irq(&l3->list_lock);
4129 }
4130}
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144static void cache_reap(struct work_struct *w)
4145{
4146 struct kmem_cache *searchp;
4147 struct kmem_list3 *l3;
4148 int node = numa_mem_id();
4149 struct delayed_work *work = to_delayed_work(w);
4150
4151 if (!mutex_trylock(&cache_chain_mutex))
4152
4153 goto out;
4154
4155 list_for_each_entry(searchp, &cache_chain, next) {
4156 check_irq_on();
4157
4158
4159
4160
4161
4162
4163 l3 = searchp->nodelists[node];
4164
4165 reap_alien(searchp, l3);
4166
4167 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4168
4169
4170
4171
4172
4173 if (time_after(l3->next_reap, jiffies))
4174 goto next;
4175
4176 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4177
4178 drain_array(searchp, l3, l3->shared, 0, node);
4179
4180 if (l3->free_touched)
4181 l3->free_touched = 0;
4182 else {
4183 int freed;
4184
4185 freed = drain_freelist(searchp, l3, (l3->free_limit +
4186 5 * searchp->num - 1) / (5 * searchp->num));
4187 STATS_ADD_REAPED(searchp, freed);
4188 }
4189next:
4190 cond_resched();
4191 }
4192 check_irq_on();
4193 mutex_unlock(&cache_chain_mutex);
4194 next_reap_node();
4195out:
4196
4197 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4198}
4199
4200#ifdef CONFIG_SLABINFO
4201
4202static void print_slabinfo_header(struct seq_file *m)
4203{
4204
4205
4206
4207
4208#if STATS
4209 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4210#else
4211 seq_puts(m, "slabinfo - version: 2.1\n");
4212#endif
4213 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4214 "<objperslab> <pagesperslab>");
4215 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4216 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4217#if STATS
4218 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4219 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4220 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4221#endif
4222 seq_putc(m, '\n');
4223}
4224
4225static void *s_start(struct seq_file *m, loff_t *pos)
4226{
4227 loff_t n = *pos;
4228
4229 mutex_lock(&cache_chain_mutex);
4230 if (!n)
4231 print_slabinfo_header(m);
4232
4233 return seq_list_start(&cache_chain, *pos);
4234}
4235
4236static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4237{
4238 return seq_list_next(p, &cache_chain, pos);
4239}
4240
4241static void s_stop(struct seq_file *m, void *p)
4242{
4243 mutex_unlock(&cache_chain_mutex);
4244}
4245
4246static int s_show(struct seq_file *m, void *p)
4247{
4248 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4249 struct slab *slabp;
4250 unsigned long active_objs;
4251 unsigned long num_objs;
4252 unsigned long active_slabs = 0;
4253 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4254 const char *name;
4255 char *error = NULL;
4256 int node;
4257 struct kmem_list3 *l3;
4258
4259 active_objs = 0;
4260 num_slabs = 0;
4261 for_each_online_node(node) {
4262 l3 = cachep->nodelists[node];
4263 if (!l3)
4264 continue;
4265
4266 check_irq_on();
4267 spin_lock_irq(&l3->list_lock);
4268
4269 list_for_each_entry(slabp, &l3->slabs_full, list) {
4270 if (slabp->inuse != cachep->num && !error)
4271 error = "slabs_full accounting error";
4272 active_objs += cachep->num;
4273 active_slabs++;
4274 }
4275 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4276 if (slabp->inuse == cachep->num && !error)
4277 error = "slabs_partial inuse accounting error";
4278 if (!slabp->inuse && !error)
4279 error = "slabs_partial/inuse accounting error";
4280 active_objs += slabp->inuse;
4281 active_slabs++;
4282 }
4283 list_for_each_entry(slabp, &l3->slabs_free, list) {
4284 if (slabp->inuse && !error)
4285 error = "slabs_free/inuse accounting error";
4286 num_slabs++;
4287 }
4288 free_objects += l3->free_objects;
4289 if (l3->shared)
4290 shared_avail += l3->shared->avail;
4291
4292 spin_unlock_irq(&l3->list_lock);
4293 }
4294 num_slabs += active_slabs;
4295 num_objs = num_slabs * cachep->num;
4296 if (num_objs - active_objs != free_objects && !error)
4297 error = "free_objects accounting error";
4298
4299 name = cachep->name;
4300 if (error)
4301 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4302
4303 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4304 name, active_objs, num_objs, cachep->buffer_size,
4305 cachep->num, (1 << cachep->gfporder));
4306 seq_printf(m, " : tunables %4u %4u %4u",
4307 cachep->limit, cachep->batchcount, cachep->shared);
4308 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4309 active_slabs, num_slabs, shared_avail);
4310#if STATS
4311 {
4312 unsigned long high = cachep->high_mark;
4313 unsigned long allocs = cachep->num_allocations;
4314 unsigned long grown = cachep->grown;
4315 unsigned long reaped = cachep->reaped;
4316 unsigned long errors = cachep->errors;
4317 unsigned long max_freeable = cachep->max_freeable;
4318 unsigned long node_allocs = cachep->node_allocs;
4319 unsigned long node_frees = cachep->node_frees;
4320 unsigned long overflows = cachep->node_overflow;
4321
4322 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
4323 "%4lu %4lu %4lu %4lu %4lu",
4324 allocs, high, grown,
4325 reaped, errors, max_freeable, node_allocs,
4326 node_frees, overflows);
4327 }
4328
4329 {
4330 unsigned long allochit = atomic_read(&cachep->allochit);
4331 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4332 unsigned long freehit = atomic_read(&cachep->freehit);
4333 unsigned long freemiss = atomic_read(&cachep->freemiss);
4334
4335 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4336 allochit, allocmiss, freehit, freemiss);
4337 }
4338#endif
4339 seq_putc(m, '\n');
4340 return 0;
4341}
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357static const struct seq_operations slabinfo_op = {
4358 .start = s_start,
4359 .next = s_next,
4360 .stop = s_stop,
4361 .show = s_show,
4362};
4363
4364#define MAX_SLABINFO_WRITE 128
4365
4366
4367
4368
4369
4370
4371
4372static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4373 size_t count, loff_t *ppos)
4374{
4375 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4376 int limit, batchcount, shared, res;
4377 struct kmem_cache *cachep;
4378
4379 if (count > MAX_SLABINFO_WRITE)
4380 return -EINVAL;
4381 if (copy_from_user(&kbuf, buffer, count))
4382 return -EFAULT;
4383 kbuf[MAX_SLABINFO_WRITE] = '\0';
4384
4385 tmp = strchr(kbuf, ' ');
4386 if (!tmp)
4387 return -EINVAL;
4388 *tmp = '\0';
4389 tmp++;
4390 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4391 return -EINVAL;
4392
4393
4394 mutex_lock(&cache_chain_mutex);
4395 res = -EINVAL;
4396 list_for_each_entry(cachep, &cache_chain, next) {
4397 if (!strcmp(cachep->name, kbuf)) {
4398 if (limit < 1 || batchcount < 1 ||
4399 batchcount > limit || shared < 0) {
4400 res = 0;
4401 } else {
4402 res = do_tune_cpucache(cachep, limit,
4403 batchcount, shared,
4404 GFP_KERNEL);
4405 }
4406 break;
4407 }
4408 }
4409 mutex_unlock(&cache_chain_mutex);
4410 if (res >= 0)
4411 res = count;
4412 return res;
4413}
4414
4415static int slabinfo_open(struct inode *inode, struct file *file)
4416{
4417 return seq_open(file, &slabinfo_op);
4418}
4419
4420static const struct file_operations proc_slabinfo_operations = {
4421 .open = slabinfo_open,
4422 .read = seq_read,
4423 .write = slabinfo_write,
4424 .llseek = seq_lseek,
4425 .release = seq_release,
4426};
4427
4428#ifdef CONFIG_DEBUG_SLAB_LEAK
4429
4430static void *leaks_start(struct seq_file *m, loff_t *pos)
4431{
4432 mutex_lock(&cache_chain_mutex);
4433 return seq_list_start(&cache_chain, *pos);
4434}
4435
4436static inline int add_caller(unsigned long *n, unsigned long v)
4437{
4438 unsigned long *p;
4439 int l;
4440 if (!v)
4441 return 1;
4442 l = n[1];
4443 p = n + 2;
4444 while (l) {
4445 int i = l/2;
4446 unsigned long *q = p + 2 * i;
4447 if (*q == v) {
4448 q[1]++;
4449 return 1;
4450 }
4451 if (*q > v) {
4452 l = i;
4453 } else {
4454 p = q + 2;
4455 l -= i + 1;
4456 }
4457 }
4458 if (++n[1] == n[0])
4459 return 0;
4460 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4461 p[0] = v;
4462 p[1] = 1;
4463 return 1;
4464}
4465
4466static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4467{
4468 void *p;
4469 int i;
4470 if (n[0] == n[1])
4471 return;
4472 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4473 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4474 continue;
4475 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4476 return;
4477 }
4478}
4479
4480static void show_symbol(struct seq_file *m, unsigned long address)
4481{
4482#ifdef CONFIG_KALLSYMS
4483 unsigned long offset, size;
4484 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4485
4486 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4487 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4488 if (modname[0])
4489 seq_printf(m, " [%s]", modname);
4490 return;
4491 }
4492#endif
4493 seq_printf(m, "%p", (void *)address);
4494}
4495
4496static int leaks_show(struct seq_file *m, void *p)
4497{
4498 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4499 struct slab *slabp;
4500 struct kmem_list3 *l3;
4501 const char *name;
4502 unsigned long *n = m->private;
4503 int node;
4504 int i;
4505
4506 if (!(cachep->flags & SLAB_STORE_USER))
4507 return 0;
4508 if (!(cachep->flags & SLAB_RED_ZONE))
4509 return 0;
4510
4511
4512
4513 n[1] = 0;
4514
4515 for_each_online_node(node) {
4516 l3 = cachep->nodelists[node];
4517 if (!l3)
4518 continue;
4519
4520 check_irq_on();
4521 spin_lock_irq(&l3->list_lock);
4522
4523 list_for_each_entry(slabp, &l3->slabs_full, list)
4524 handle_slab(n, cachep, slabp);
4525 list_for_each_entry(slabp, &l3->slabs_partial, list)
4526 handle_slab(n, cachep, slabp);
4527 spin_unlock_irq(&l3->list_lock);
4528 }
4529 name = cachep->name;
4530 if (n[0] == n[1]) {
4531
4532 mutex_unlock(&cache_chain_mutex);
4533 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4534 if (!m->private) {
4535
4536 m->private = n;
4537 mutex_lock(&cache_chain_mutex);
4538 return -ENOMEM;
4539 }
4540 *(unsigned long *)m->private = n[0] * 2;
4541 kfree(n);
4542 mutex_lock(&cache_chain_mutex);
4543
4544 m->count = m->size;
4545 return 0;
4546 }
4547 for (i = 0; i < n[1]; i++) {
4548 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4549 show_symbol(m, n[2*i+2]);
4550 seq_putc(m, '\n');
4551 }
4552
4553 return 0;
4554}
4555
4556static const struct seq_operations slabstats_op = {
4557 .start = leaks_start,
4558 .next = s_next,
4559 .stop = s_stop,
4560 .show = leaks_show,
4561};
4562
4563static int slabstats_open(struct inode *inode, struct file *file)
4564{
4565 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4566 int ret = -ENOMEM;
4567 if (n) {
4568 ret = seq_open(file, &slabstats_op);
4569 if (!ret) {
4570 struct seq_file *m = file->private_data;
4571 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4572 m->private = n;
4573 n = NULL;
4574 }
4575 kfree(n);
4576 }
4577 return ret;
4578}
4579
4580static const struct file_operations proc_slabstats_operations = {
4581 .open = slabstats_open,
4582 .read = seq_read,
4583 .llseek = seq_lseek,
4584 .release = seq_release_private,
4585};
4586#endif
4587
4588static int __init slab_proc_init(void)
4589{
4590 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4591#ifdef CONFIG_DEBUG_SLAB_LEAK
4592 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4593#endif
4594 return 0;
4595}
4596module_init(slab_proc_init);
4597#endif
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611size_t ksize(const void *objp)
4612{
4613 BUG_ON(!objp);
4614 if (unlikely(objp == ZERO_SIZE_PTR))
4615 return 0;
4616
4617 return obj_size(virt_to_cache(objp));
4618}
4619EXPORT_SYMBOL(ksize);
4620