1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include "slab.h"
91#include <linux/mm.h>
92#include <linux/poison.h>
93#include <linux/swap.h>
94#include <linux/cache.h>
95#include <linux/interrupt.h>
96#include <linux/init.h>
97#include <linux/compiler.h>
98#include <linux/cpuset.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/notifier.h>
102#include <linux/kallsyms.h>
103#include <linux/cpu.h>
104#include <linux/sysctl.h>
105#include <linux/module.h>
106#include <linux/rcupdate.h>
107#include <linux/string.h>
108#include <linux/uaccess.h>
109#include <linux/nodemask.h>
110#include <linux/kmemleak.h>
111#include <linux/mempolicy.h>
112#include <linux/mutex.h>
113#include <linux/fault-inject.h>
114#include <linux/rtmutex.h>
115#include <linux/reciprocal_div.h>
116#include <linux/debugobjects.h>
117#include <linux/kmemcheck.h>
118#include <linux/memory.h>
119#include <linux/prefetch.h>
120
121#include <net/sock.h>
122
123#include <asm/cacheflush.h>
124#include <asm/tlbflush.h>
125#include <asm/page.h>
126
127#include <trace/events/kmem.h>
128
129#include "internal.h"
130
131
132
133
134
135
136
137
138
139
140
141#ifdef CONFIG_DEBUG_SLAB
142#define DEBUG 1
143#define STATS 1
144#define FORCED_DEBUG 1
145#else
146#define DEBUG 0
147#define STATS 0
148#define FORCED_DEBUG 0
149#endif
150
151
152#define BYTES_PER_WORD sizeof(void *)
153#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
154
155#ifndef ARCH_KMALLOC_FLAGS
156#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
157#endif
158
159
160
161
162
163static bool pfmemalloc_active __read_mostly;
164
165
166#if DEBUG
167# define CREATE_MASK (SLAB_RED_ZONE | \
168 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
169 SLAB_CACHE_DMA | \
170 SLAB_STORE_USER | \
171 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
172 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
173 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
174#else
175# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
176 SLAB_CACHE_DMA | \
177 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
178 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
179 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
180#endif
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201typedef unsigned int kmem_bufctl_t;
202#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
203#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
204#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
205#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221struct slab_rcu {
222 struct rcu_head head;
223 struct kmem_cache *cachep;
224 void *addr;
225};
226
227
228
229
230
231
232
233
234struct slab {
235 union {
236 struct {
237 struct list_head list;
238 unsigned long colouroff;
239 void *s_mem;
240 unsigned int inuse;
241 kmem_bufctl_t free;
242 unsigned short nodeid;
243 };
244 struct slab_rcu __slab_cover_slab_rcu;
245 };
246};
247
248
249
250
251
252
253
254
255
256
257
258
259
260struct array_cache {
261 unsigned int avail;
262 unsigned int limit;
263 unsigned int batchcount;
264 unsigned int touched;
265 spinlock_t lock;
266 void *entry[];
267
268
269
270
271
272
273
274
275};
276
277#define SLAB_OBJ_PFMEMALLOC 1
278static inline bool is_obj_pfmemalloc(void *objp)
279{
280 return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
281}
282
283static inline void set_obj_pfmemalloc(void **objp)
284{
285 *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
286 return;
287}
288
289static inline void clear_obj_pfmemalloc(void **objp)
290{
291 *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
292}
293
294
295
296
297
298#define BOOT_CPUCACHE_ENTRIES 1
299struct arraycache_init {
300 struct array_cache cache;
301 void *entries[BOOT_CPUCACHE_ENTRIES];
302};
303
304
305
306
307struct kmem_list3 {
308 struct list_head slabs_partial;
309 struct list_head slabs_full;
310 struct list_head slabs_free;
311 unsigned long free_objects;
312 unsigned int free_limit;
313 unsigned int colour_next;
314 spinlock_t list_lock;
315 struct array_cache *shared;
316 struct array_cache **alien;
317 unsigned long next_reap;
318 int free_touched;
319};
320
321
322
323
324#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
325static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
326#define CACHE_CACHE 0
327#define SIZE_AC MAX_NUMNODES
328#define SIZE_L3 (2 * MAX_NUMNODES)
329
330static int drain_freelist(struct kmem_cache *cache,
331 struct kmem_list3 *l3, int tofree);
332static void free_block(struct kmem_cache *cachep, void **objpp, int len,
333 int node);
334static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
335static void cache_reap(struct work_struct *unused);
336
337
338
339
340
341static __always_inline int index_of(const size_t size)
342{
343 extern void __bad_size(void);
344
345 if (__builtin_constant_p(size)) {
346 int i = 0;
347
348#define CACHE(x) \
349 if (size <=x) \
350 return i; \
351 else \
352 i++;
353#include <linux/kmalloc_sizes.h>
354#undef CACHE
355 __bad_size();
356 } else
357 __bad_size();
358 return 0;
359}
360
361static int slab_early_init = 1;
362
363#define INDEX_AC index_of(sizeof(struct arraycache_init))
364#define INDEX_L3 index_of(sizeof(struct kmem_list3))
365
366static void kmem_list3_init(struct kmem_list3 *parent)
367{
368 INIT_LIST_HEAD(&parent->slabs_full);
369 INIT_LIST_HEAD(&parent->slabs_partial);
370 INIT_LIST_HEAD(&parent->slabs_free);
371 parent->shared = NULL;
372 parent->alien = NULL;
373 parent->colour_next = 0;
374 spin_lock_init(&parent->list_lock);
375 parent->free_objects = 0;
376 parent->free_touched = 0;
377}
378
379#define MAKE_LIST(cachep, listp, slab, nodeid) \
380 do { \
381 INIT_LIST_HEAD(listp); \
382 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
383 } while (0)
384
385#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
386 do { \
387 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
388 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
389 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
390 } while (0)
391
392#define CFLGS_OFF_SLAB (0x80000000UL)
393#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
394
395#define BATCHREFILL_LIMIT 16
396
397
398
399
400
401
402
403#define REAPTIMEOUT_CPUC (2*HZ)
404#define REAPTIMEOUT_LIST3 (4*HZ)
405
406#if STATS
407#define STATS_INC_ACTIVE(x) ((x)->num_active++)
408#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
409#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
410#define STATS_INC_GROWN(x) ((x)->grown++)
411#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
412#define STATS_SET_HIGH(x) \
413 do { \
414 if ((x)->num_active > (x)->high_mark) \
415 (x)->high_mark = (x)->num_active; \
416 } while (0)
417#define STATS_INC_ERR(x) ((x)->errors++)
418#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
419#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
420#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
421#define STATS_SET_FREEABLE(x, i) \
422 do { \
423 if ((x)->max_freeable < i) \
424 (x)->max_freeable = i; \
425 } while (0)
426#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
427#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
428#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
429#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
430#else
431#define STATS_INC_ACTIVE(x) do { } while (0)
432#define STATS_DEC_ACTIVE(x) do { } while (0)
433#define STATS_INC_ALLOCED(x) do { } while (0)
434#define STATS_INC_GROWN(x) do { } while (0)
435#define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
436#define STATS_SET_HIGH(x) do { } while (0)
437#define STATS_INC_ERR(x) do { } while (0)
438#define STATS_INC_NODEALLOCS(x) do { } while (0)
439#define STATS_INC_NODEFREES(x) do { } while (0)
440#define STATS_INC_ACOVERFLOW(x) do { } while (0)
441#define STATS_SET_FREEABLE(x, i) do { } while (0)
442#define STATS_INC_ALLOCHIT(x) do { } while (0)
443#define STATS_INC_ALLOCMISS(x) do { } while (0)
444#define STATS_INC_FREEHIT(x) do { } while (0)
445#define STATS_INC_FREEMISS(x) do { } while (0)
446#endif
447
448#if DEBUG
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463static int obj_offset(struct kmem_cache *cachep)
464{
465 return cachep->obj_offset;
466}
467
468static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
469{
470 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
471 return (unsigned long long*) (objp + obj_offset(cachep) -
472 sizeof(unsigned long long));
473}
474
475static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
476{
477 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
478 if (cachep->flags & SLAB_STORE_USER)
479 return (unsigned long long *)(objp + cachep->size -
480 sizeof(unsigned long long) -
481 REDZONE_ALIGN);
482 return (unsigned long long *) (objp + cachep->size -
483 sizeof(unsigned long long));
484}
485
486static void **dbg_userword(struct kmem_cache *cachep, void *objp)
487{
488 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
489 return (void **)(objp + cachep->size - BYTES_PER_WORD);
490}
491
492#else
493
494#define obj_offset(x) 0
495#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
496#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
497#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
498
499#endif
500
501#ifdef CONFIG_TRACING
502size_t slab_buffer_size(struct kmem_cache *cachep)
503{
504 return cachep->size;
505}
506EXPORT_SYMBOL(slab_buffer_size);
507#endif
508
509
510
511
512
513#define SLAB_MAX_ORDER_HI 1
514#define SLAB_MAX_ORDER_LO 0
515static int slab_max_order = SLAB_MAX_ORDER_LO;
516static bool slab_max_order_set __initdata;
517
518static inline struct kmem_cache *page_get_cache(struct page *page)
519{
520 page = compound_head(page);
521 BUG_ON(!PageSlab(page));
522 return page->slab_cache;
523}
524
525static inline struct kmem_cache *virt_to_cache(const void *obj)
526{
527 struct page *page = virt_to_head_page(obj);
528 return page->slab_cache;
529}
530
531static inline struct slab *virt_to_slab(const void *obj)
532{
533 struct page *page = virt_to_head_page(obj);
534
535 VM_BUG_ON(!PageSlab(page));
536 return page->slab_page;
537}
538
539static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
540 unsigned int idx)
541{
542 return slab->s_mem + cache->size * idx;
543}
544
545
546
547
548
549
550
551static inline unsigned int obj_to_index(const struct kmem_cache *cache,
552 const struct slab *slab, void *obj)
553{
554 u32 offset = (obj - slab->s_mem);
555 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
556}
557
558
559
560
561struct cache_sizes malloc_sizes[] = {
562#define CACHE(x) { .cs_size = (x) },
563#include <linux/kmalloc_sizes.h>
564 CACHE(ULONG_MAX)
565#undef CACHE
566};
567EXPORT_SYMBOL(malloc_sizes);
568
569
570struct cache_names {
571 char *name;
572 char *name_dma;
573};
574
575static struct cache_names __initdata cache_names[] = {
576#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
577#include <linux/kmalloc_sizes.h>
578 {NULL,}
579#undef CACHE
580};
581
582static struct arraycache_init initarray_cache __initdata =
583 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
584static struct arraycache_init initarray_generic =
585 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
586
587
588static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
589static struct kmem_cache cache_cache = {
590 .nodelists = cache_cache_nodelists,
591 .batchcount = 1,
592 .limit = BOOT_CPUCACHE_ENTRIES,
593 .shared = 1,
594 .size = sizeof(struct kmem_cache),
595 .name = "kmem_cache",
596};
597
598#define BAD_ALIEN_MAGIC 0x01020304ul
599
600#ifdef CONFIG_LOCKDEP
601
602
603
604
605
606
607
608
609
610
611
612
613static struct lock_class_key on_slab_l3_key;
614static struct lock_class_key on_slab_alc_key;
615
616static struct lock_class_key debugobj_l3_key;
617static struct lock_class_key debugobj_alc_key;
618
619static void slab_set_lock_classes(struct kmem_cache *cachep,
620 struct lock_class_key *l3_key, struct lock_class_key *alc_key,
621 int q)
622{
623 struct array_cache **alc;
624 struct kmem_list3 *l3;
625 int r;
626
627 l3 = cachep->nodelists[q];
628 if (!l3)
629 return;
630
631 lockdep_set_class(&l3->list_lock, l3_key);
632 alc = l3->alien;
633
634
635
636
637
638
639
640 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
641 return;
642 for_each_node(r) {
643 if (alc[r])
644 lockdep_set_class(&alc[r]->lock, alc_key);
645 }
646}
647
648static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
649{
650 slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
651}
652
653static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
654{
655 int node;
656
657 for_each_online_node(node)
658 slab_set_debugobj_lock_classes_node(cachep, node);
659}
660
661static void init_node_lock_keys(int q)
662{
663 struct cache_sizes *s = malloc_sizes;
664
665 if (slab_state < UP)
666 return;
667
668 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
669 struct kmem_list3 *l3;
670
671 l3 = s->cs_cachep->nodelists[q];
672 if (!l3 || OFF_SLAB(s->cs_cachep))
673 continue;
674
675 slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
676 &on_slab_alc_key, q);
677 }
678}
679
680static inline void init_lock_keys(void)
681{
682 int node;
683
684 for_each_node(node)
685 init_node_lock_keys(node);
686}
687#else
688static void init_node_lock_keys(int q)
689{
690}
691
692static inline void init_lock_keys(void)
693{
694}
695
696static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
697{
698}
699
700static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
701{
702}
703#endif
704
705static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
706
707static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
708{
709 return cachep->array[smp_processor_id()];
710}
711
712static inline struct kmem_cache *__find_general_cachep(size_t size,
713 gfp_t gfpflags)
714{
715 struct cache_sizes *csizep = malloc_sizes;
716
717#if DEBUG
718
719
720
721
722 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
723#endif
724 if (!size)
725 return ZERO_SIZE_PTR;
726
727 while (size > csizep->cs_size)
728 csizep++;
729
730
731
732
733
734
735#ifdef CONFIG_ZONE_DMA
736 if (unlikely(gfpflags & GFP_DMA))
737 return csizep->cs_dmacachep;
738#endif
739 return csizep->cs_cachep;
740}
741
742static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
743{
744 return __find_general_cachep(size, gfpflags);
745}
746
747static size_t slab_mgmt_size(size_t nr_objs, size_t align)
748{
749 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
750}
751
752
753
754
755static void cache_estimate(unsigned long gfporder, size_t buffer_size,
756 size_t align, int flags, size_t *left_over,
757 unsigned int *num)
758{
759 int nr_objs;
760 size_t mgmt_size;
761 size_t slab_size = PAGE_SIZE << gfporder;
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778 if (flags & CFLGS_OFF_SLAB) {
779 mgmt_size = 0;
780 nr_objs = slab_size / buffer_size;
781
782 if (nr_objs > SLAB_LIMIT)
783 nr_objs = SLAB_LIMIT;
784 } else {
785
786
787
788
789
790
791
792
793 nr_objs = (slab_size - sizeof(struct slab)) /
794 (buffer_size + sizeof(kmem_bufctl_t));
795
796
797
798
799
800 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
801 > slab_size)
802 nr_objs--;
803
804 if (nr_objs > SLAB_LIMIT)
805 nr_objs = SLAB_LIMIT;
806
807 mgmt_size = slab_mgmt_size(nr_objs, align);
808 }
809 *num = nr_objs;
810 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
811}
812
813#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
814
815static void __slab_error(const char *function, struct kmem_cache *cachep,
816 char *msg)
817{
818 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
819 function, cachep->name, msg);
820 dump_stack();
821}
822
823
824
825
826
827
828
829
830
831static int use_alien_caches __read_mostly = 1;
832static int __init noaliencache_setup(char *s)
833{
834 use_alien_caches = 0;
835 return 1;
836}
837__setup("noaliencache", noaliencache_setup);
838
839static int __init slab_max_order_setup(char *str)
840{
841 get_option(&str, &slab_max_order);
842 slab_max_order = slab_max_order < 0 ? 0 :
843 min(slab_max_order, MAX_ORDER - 1);
844 slab_max_order_set = true;
845
846 return 1;
847}
848__setup("slab_max_order=", slab_max_order_setup);
849
850#ifdef CONFIG_NUMA
851
852
853
854
855
856
857static DEFINE_PER_CPU(unsigned long, slab_reap_node);
858
859static void init_reap_node(int cpu)
860{
861 int node;
862
863 node = next_node(cpu_to_mem(cpu), node_online_map);
864 if (node == MAX_NUMNODES)
865 node = first_node(node_online_map);
866
867 per_cpu(slab_reap_node, cpu) = node;
868}
869
870static void next_reap_node(void)
871{
872 int node = __this_cpu_read(slab_reap_node);
873
874 node = next_node(node, node_online_map);
875 if (unlikely(node >= MAX_NUMNODES))
876 node = first_node(node_online_map);
877 __this_cpu_write(slab_reap_node, node);
878}
879
880#else
881#define init_reap_node(cpu) do { } while (0)
882#define next_reap_node(void) do { } while (0)
883#endif
884
885
886
887
888
889
890
891
892static void __cpuinit start_cpu_timer(int cpu)
893{
894 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
895
896
897
898
899
900
901 if (keventd_up() && reap_work->work.func == NULL) {
902 init_reap_node(cpu);
903 INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
904 schedule_delayed_work_on(cpu, reap_work,
905 __round_jiffies_relative(HZ, cpu));
906 }
907}
908
909static struct array_cache *alloc_arraycache(int node, int entries,
910 int batchcount, gfp_t gfp)
911{
912 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
913 struct array_cache *nc = NULL;
914
915 nc = kmalloc_node(memsize, gfp, node);
916
917
918
919
920
921
922
923 kmemleak_no_scan(nc);
924 if (nc) {
925 nc->avail = 0;
926 nc->limit = entries;
927 nc->batchcount = batchcount;
928 nc->touched = 0;
929 spin_lock_init(&nc->lock);
930 }
931 return nc;
932}
933
934static inline bool is_slab_pfmemalloc(struct slab *slabp)
935{
936 struct page *page = virt_to_page(slabp->s_mem);
937
938 return PageSlabPfmemalloc(page);
939}
940
941
942static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
943 struct array_cache *ac)
944{
945 struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()];
946 struct slab *slabp;
947 unsigned long flags;
948
949 if (!pfmemalloc_active)
950 return;
951
952 spin_lock_irqsave(&l3->list_lock, flags);
953 list_for_each_entry(slabp, &l3->slabs_full, list)
954 if (is_slab_pfmemalloc(slabp))
955 goto out;
956
957 list_for_each_entry(slabp, &l3->slabs_partial, list)
958 if (is_slab_pfmemalloc(slabp))
959 goto out;
960
961 list_for_each_entry(slabp, &l3->slabs_free, list)
962 if (is_slab_pfmemalloc(slabp))
963 goto out;
964
965 pfmemalloc_active = false;
966out:
967 spin_unlock_irqrestore(&l3->list_lock, flags);
968}
969
970static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
971 gfp_t flags, bool force_refill)
972{
973 int i;
974 void *objp = ac->entry[--ac->avail];
975
976
977 if (unlikely(is_obj_pfmemalloc(objp))) {
978 struct kmem_list3 *l3;
979
980 if (gfp_pfmemalloc_allowed(flags)) {
981 clear_obj_pfmemalloc(&objp);
982 return objp;
983 }
984
985
986 for (i = 0; i < ac->avail; i++) {
987
988 if (!is_obj_pfmemalloc(ac->entry[i])) {
989 objp = ac->entry[i];
990 ac->entry[i] = ac->entry[ac->avail];
991 ac->entry[ac->avail] = objp;
992 return objp;
993 }
994 }
995
996
997
998
999
1000 l3 = cachep->nodelists[numa_mem_id()];
1001 if (!list_empty(&l3->slabs_free) && force_refill) {
1002 struct slab *slabp = virt_to_slab(objp);
1003 ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
1004 clear_obj_pfmemalloc(&objp);
1005 recheck_pfmemalloc_active(cachep, ac);
1006 return objp;
1007 }
1008
1009
1010 ac->avail++;
1011 objp = NULL;
1012 }
1013
1014 return objp;
1015}
1016
1017static inline void *ac_get_obj(struct kmem_cache *cachep,
1018 struct array_cache *ac, gfp_t flags, bool force_refill)
1019{
1020 void *objp;
1021
1022 if (unlikely(sk_memalloc_socks()))
1023 objp = __ac_get_obj(cachep, ac, flags, force_refill);
1024 else
1025 objp = ac->entry[--ac->avail];
1026
1027 return objp;
1028}
1029
1030static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
1031 void *objp)
1032{
1033 if (unlikely(pfmemalloc_active)) {
1034
1035 struct page *page = virt_to_head_page(objp);
1036 if (PageSlabPfmemalloc(page))
1037 set_obj_pfmemalloc(&objp);
1038 }
1039
1040 return objp;
1041}
1042
1043static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
1044 void *objp)
1045{
1046 if (unlikely(sk_memalloc_socks()))
1047 objp = __ac_put_obj(cachep, ac, objp);
1048
1049 ac->entry[ac->avail++] = objp;
1050}
1051
1052
1053
1054
1055
1056
1057
1058static int transfer_objects(struct array_cache *to,
1059 struct array_cache *from, unsigned int max)
1060{
1061
1062 int nr = min3(from->avail, max, to->limit - to->avail);
1063
1064 if (!nr)
1065 return 0;
1066
1067 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
1068 sizeof(void *) *nr);
1069
1070 from->avail -= nr;
1071 to->avail += nr;
1072 return nr;
1073}
1074
1075#ifndef CONFIG_NUMA
1076
1077#define drain_alien_cache(cachep, alien) do { } while (0)
1078#define reap_alien(cachep, l3) do { } while (0)
1079
1080static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1081{
1082 return (struct array_cache **)BAD_ALIEN_MAGIC;
1083}
1084
1085static inline void free_alien_cache(struct array_cache **ac_ptr)
1086{
1087}
1088
1089static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1090{
1091 return 0;
1092}
1093
1094static inline void *alternate_node_alloc(struct kmem_cache *cachep,
1095 gfp_t flags)
1096{
1097 return NULL;
1098}
1099
1100static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1101 gfp_t flags, int nodeid)
1102{
1103 return NULL;
1104}
1105
1106#else
1107
1108static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1109static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1110
1111static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1112{
1113 struct array_cache **ac_ptr;
1114 int memsize = sizeof(void *) * nr_node_ids;
1115 int i;
1116
1117 if (limit > 1)
1118 limit = 12;
1119 ac_ptr = kzalloc_node(memsize, gfp, node);
1120 if (ac_ptr) {
1121 for_each_node(i) {
1122 if (i == node || !node_online(i))
1123 continue;
1124 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
1125 if (!ac_ptr[i]) {
1126 for (i--; i >= 0; i--)
1127 kfree(ac_ptr[i]);
1128 kfree(ac_ptr);
1129 return NULL;
1130 }
1131 }
1132 }
1133 return ac_ptr;
1134}
1135
1136static void free_alien_cache(struct array_cache **ac_ptr)
1137{
1138 int i;
1139
1140 if (!ac_ptr)
1141 return;
1142 for_each_node(i)
1143 kfree(ac_ptr[i]);
1144 kfree(ac_ptr);
1145}
1146
1147static void __drain_alien_cache(struct kmem_cache *cachep,
1148 struct array_cache *ac, int node)
1149{
1150 struct kmem_list3 *rl3 = cachep->nodelists[node];
1151
1152 if (ac->avail) {
1153 spin_lock(&rl3->list_lock);
1154
1155
1156
1157
1158
1159 if (rl3->shared)
1160 transfer_objects(rl3->shared, ac, ac->limit);
1161
1162 free_block(cachep, ac->entry, ac->avail, node);
1163 ac->avail = 0;
1164 spin_unlock(&rl3->list_lock);
1165 }
1166}
1167
1168
1169
1170
1171static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1172{
1173 int node = __this_cpu_read(slab_reap_node);
1174
1175 if (l3->alien) {
1176 struct array_cache *ac = l3->alien[node];
1177
1178 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1179 __drain_alien_cache(cachep, ac, node);
1180 spin_unlock_irq(&ac->lock);
1181 }
1182 }
1183}
1184
1185static void drain_alien_cache(struct kmem_cache *cachep,
1186 struct array_cache **alien)
1187{
1188 int i = 0;
1189 struct array_cache *ac;
1190 unsigned long flags;
1191
1192 for_each_online_node(i) {
1193 ac = alien[i];
1194 if (ac) {
1195 spin_lock_irqsave(&ac->lock, flags);
1196 __drain_alien_cache(cachep, ac, i);
1197 spin_unlock_irqrestore(&ac->lock, flags);
1198 }
1199 }
1200}
1201
1202static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1203{
1204 struct slab *slabp = virt_to_slab(objp);
1205 int nodeid = slabp->nodeid;
1206 struct kmem_list3 *l3;
1207 struct array_cache *alien = NULL;
1208 int node;
1209
1210 node = numa_mem_id();
1211
1212
1213
1214
1215
1216 if (likely(slabp->nodeid == node))
1217 return 0;
1218
1219 l3 = cachep->nodelists[node];
1220 STATS_INC_NODEFREES(cachep);
1221 if (l3->alien && l3->alien[nodeid]) {
1222 alien = l3->alien[nodeid];
1223 spin_lock(&alien->lock);
1224 if (unlikely(alien->avail == alien->limit)) {
1225 STATS_INC_ACOVERFLOW(cachep);
1226 __drain_alien_cache(cachep, alien, nodeid);
1227 }
1228 ac_put_obj(cachep, alien, objp);
1229 spin_unlock(&alien->lock);
1230 } else {
1231 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1232 free_block(cachep, &objp, 1, nodeid);
1233 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1234 }
1235 return 1;
1236}
1237#endif
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248static int init_cache_nodelists_node(int node)
1249{
1250 struct kmem_cache *cachep;
1251 struct kmem_list3 *l3;
1252 const int memsize = sizeof(struct kmem_list3);
1253
1254 list_for_each_entry(cachep, &slab_caches, list) {
1255
1256
1257
1258
1259
1260 if (!cachep->nodelists[node]) {
1261 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1262 if (!l3)
1263 return -ENOMEM;
1264 kmem_list3_init(l3);
1265 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1266 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1267
1268
1269
1270
1271
1272
1273 cachep->nodelists[node] = l3;
1274 }
1275
1276 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1277 cachep->nodelists[node]->free_limit =
1278 (1 + nr_cpus_node(node)) *
1279 cachep->batchcount + cachep->num;
1280 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1281 }
1282 return 0;
1283}
1284
1285static void __cpuinit cpuup_canceled(long cpu)
1286{
1287 struct kmem_cache *cachep;
1288 struct kmem_list3 *l3 = NULL;
1289 int node = cpu_to_mem(cpu);
1290 const struct cpumask *mask = cpumask_of_node(node);
1291
1292 list_for_each_entry(cachep, &slab_caches, list) {
1293 struct array_cache *nc;
1294 struct array_cache *shared;
1295 struct array_cache **alien;
1296
1297
1298 nc = cachep->array[cpu];
1299 cachep->array[cpu] = NULL;
1300 l3 = cachep->nodelists[node];
1301
1302 if (!l3)
1303 goto free_array_cache;
1304
1305 spin_lock_irq(&l3->list_lock);
1306
1307
1308 l3->free_limit -= cachep->batchcount;
1309 if (nc)
1310 free_block(cachep, nc->entry, nc->avail, node);
1311
1312 if (!cpumask_empty(mask)) {
1313 spin_unlock_irq(&l3->list_lock);
1314 goto free_array_cache;
1315 }
1316
1317 shared = l3->shared;
1318 if (shared) {
1319 free_block(cachep, shared->entry,
1320 shared->avail, node);
1321 l3->shared = NULL;
1322 }
1323
1324 alien = l3->alien;
1325 l3->alien = NULL;
1326
1327 spin_unlock_irq(&l3->list_lock);
1328
1329 kfree(shared);
1330 if (alien) {
1331 drain_alien_cache(cachep, alien);
1332 free_alien_cache(alien);
1333 }
1334free_array_cache:
1335 kfree(nc);
1336 }
1337
1338
1339
1340
1341
1342 list_for_each_entry(cachep, &slab_caches, list) {
1343 l3 = cachep->nodelists[node];
1344 if (!l3)
1345 continue;
1346 drain_freelist(cachep, l3, l3->free_objects);
1347 }
1348}
1349
1350static int __cpuinit cpuup_prepare(long cpu)
1351{
1352 struct kmem_cache *cachep;
1353 struct kmem_list3 *l3 = NULL;
1354 int node = cpu_to_mem(cpu);
1355 int err;
1356
1357
1358
1359
1360
1361
1362
1363 err = init_cache_nodelists_node(node);
1364 if (err < 0)
1365 goto bad;
1366
1367
1368
1369
1370
1371 list_for_each_entry(cachep, &slab_caches, list) {
1372 struct array_cache *nc;
1373 struct array_cache *shared = NULL;
1374 struct array_cache **alien = NULL;
1375
1376 nc = alloc_arraycache(node, cachep->limit,
1377 cachep->batchcount, GFP_KERNEL);
1378 if (!nc)
1379 goto bad;
1380 if (cachep->shared) {
1381 shared = alloc_arraycache(node,
1382 cachep->shared * cachep->batchcount,
1383 0xbaadf00d, GFP_KERNEL);
1384 if (!shared) {
1385 kfree(nc);
1386 goto bad;
1387 }
1388 }
1389 if (use_alien_caches) {
1390 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1391 if (!alien) {
1392 kfree(shared);
1393 kfree(nc);
1394 goto bad;
1395 }
1396 }
1397 cachep->array[cpu] = nc;
1398 l3 = cachep->nodelists[node];
1399 BUG_ON(!l3);
1400
1401 spin_lock_irq(&l3->list_lock);
1402 if (!l3->shared) {
1403
1404
1405
1406
1407 l3->shared = shared;
1408 shared = NULL;
1409 }
1410#ifdef CONFIG_NUMA
1411 if (!l3->alien) {
1412 l3->alien = alien;
1413 alien = NULL;
1414 }
1415#endif
1416 spin_unlock_irq(&l3->list_lock);
1417 kfree(shared);
1418 free_alien_cache(alien);
1419 if (cachep->flags & SLAB_DEBUG_OBJECTS)
1420 slab_set_debugobj_lock_classes_node(cachep, node);
1421 }
1422 init_node_lock_keys(node);
1423
1424 return 0;
1425bad:
1426 cpuup_canceled(cpu);
1427 return -ENOMEM;
1428}
1429
1430static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1431 unsigned long action, void *hcpu)
1432{
1433 long cpu = (long)hcpu;
1434 int err = 0;
1435
1436 switch (action) {
1437 case CPU_UP_PREPARE:
1438 case CPU_UP_PREPARE_FROZEN:
1439 mutex_lock(&slab_mutex);
1440 err = cpuup_prepare(cpu);
1441 mutex_unlock(&slab_mutex);
1442 break;
1443 case CPU_ONLINE:
1444 case CPU_ONLINE_FROZEN:
1445 start_cpu_timer(cpu);
1446 break;
1447#ifdef CONFIG_HOTPLUG_CPU
1448 case CPU_DOWN_PREPARE:
1449 case CPU_DOWN_PREPARE_FROZEN:
1450
1451
1452
1453
1454
1455
1456 cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
1457
1458 per_cpu(slab_reap_work, cpu).work.func = NULL;
1459 break;
1460 case CPU_DOWN_FAILED:
1461 case CPU_DOWN_FAILED_FROZEN:
1462 start_cpu_timer(cpu);
1463 break;
1464 case CPU_DEAD:
1465 case CPU_DEAD_FROZEN:
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475#endif
1476 case CPU_UP_CANCELED:
1477 case CPU_UP_CANCELED_FROZEN:
1478 mutex_lock(&slab_mutex);
1479 cpuup_canceled(cpu);
1480 mutex_unlock(&slab_mutex);
1481 break;
1482 }
1483 return notifier_from_errno(err);
1484}
1485
1486static struct notifier_block __cpuinitdata cpucache_notifier = {
1487 &cpuup_callback, NULL, 0
1488};
1489
1490#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1491
1492
1493
1494
1495
1496
1497
1498static int __meminit drain_cache_nodelists_node(int node)
1499{
1500 struct kmem_cache *cachep;
1501 int ret = 0;
1502
1503 list_for_each_entry(cachep, &slab_caches, list) {
1504 struct kmem_list3 *l3;
1505
1506 l3 = cachep->nodelists[node];
1507 if (!l3)
1508 continue;
1509
1510 drain_freelist(cachep, l3, l3->free_objects);
1511
1512 if (!list_empty(&l3->slabs_full) ||
1513 !list_empty(&l3->slabs_partial)) {
1514 ret = -EBUSY;
1515 break;
1516 }
1517 }
1518 return ret;
1519}
1520
1521static int __meminit slab_memory_callback(struct notifier_block *self,
1522 unsigned long action, void *arg)
1523{
1524 struct memory_notify *mnb = arg;
1525 int ret = 0;
1526 int nid;
1527
1528 nid = mnb->status_change_nid;
1529 if (nid < 0)
1530 goto out;
1531
1532 switch (action) {
1533 case MEM_GOING_ONLINE:
1534 mutex_lock(&slab_mutex);
1535 ret = init_cache_nodelists_node(nid);
1536 mutex_unlock(&slab_mutex);
1537 break;
1538 case MEM_GOING_OFFLINE:
1539 mutex_lock(&slab_mutex);
1540 ret = drain_cache_nodelists_node(nid);
1541 mutex_unlock(&slab_mutex);
1542 break;
1543 case MEM_ONLINE:
1544 case MEM_OFFLINE:
1545 case MEM_CANCEL_ONLINE:
1546 case MEM_CANCEL_OFFLINE:
1547 break;
1548 }
1549out:
1550 return notifier_from_errno(ret);
1551}
1552#endif
1553
1554
1555
1556
1557static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1558 int nodeid)
1559{
1560 struct kmem_list3 *ptr;
1561
1562 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1563 BUG_ON(!ptr);
1564
1565 memcpy(ptr, list, sizeof(struct kmem_list3));
1566
1567
1568
1569 spin_lock_init(&ptr->list_lock);
1570
1571 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1572 cachep->nodelists[nodeid] = ptr;
1573}
1574
1575
1576
1577
1578
1579static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1580{
1581 int node;
1582
1583 for_each_online_node(node) {
1584 cachep->nodelists[node] = &initkmem_list3[index + node];
1585 cachep->nodelists[node]->next_reap = jiffies +
1586 REAPTIMEOUT_LIST3 +
1587 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1588 }
1589}
1590
1591
1592
1593
1594
1595void __init kmem_cache_init(void)
1596{
1597 size_t left_over;
1598 struct cache_sizes *sizes;
1599 struct cache_names *names;
1600 int i;
1601 int order;
1602 int node;
1603
1604 if (num_possible_nodes() == 1)
1605 use_alien_caches = 0;
1606
1607 for (i = 0; i < NUM_INIT_LISTS; i++) {
1608 kmem_list3_init(&initkmem_list3[i]);
1609 if (i < MAX_NUMNODES)
1610 cache_cache.nodelists[i] = NULL;
1611 }
1612 set_up_list3s(&cache_cache, CACHE_CACHE);
1613
1614
1615
1616
1617
1618
1619 if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
1620 slab_max_order = SLAB_MAX_ORDER_HI;
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642 node = numa_mem_id();
1643
1644
1645 INIT_LIST_HEAD(&slab_caches);
1646 list_add(&cache_cache.list, &slab_caches);
1647 cache_cache.colour_off = cache_line_size();
1648 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1649 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1650
1651
1652
1653
1654 cache_cache.size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1655 nr_node_ids * sizeof(struct kmem_list3 *);
1656 cache_cache.object_size = cache_cache.size;
1657 cache_cache.size = ALIGN(cache_cache.size,
1658 cache_line_size());
1659 cache_cache.reciprocal_buffer_size =
1660 reciprocal_value(cache_cache.size);
1661
1662 for (order = 0; order < MAX_ORDER; order++) {
1663 cache_estimate(order, cache_cache.size,
1664 cache_line_size(), 0, &left_over, &cache_cache.num);
1665 if (cache_cache.num)
1666 break;
1667 }
1668 BUG_ON(!cache_cache.num);
1669 cache_cache.gfporder = order;
1670 cache_cache.colour = left_over / cache_cache.colour_off;
1671 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1672 sizeof(struct slab), cache_line_size());
1673
1674
1675 sizes = malloc_sizes;
1676 names = cache_names;
1677
1678
1679
1680
1681
1682
1683
1684 sizes[INDEX_AC].cs_cachep = __kmem_cache_create(names[INDEX_AC].name,
1685 sizes[INDEX_AC].cs_size,
1686 ARCH_KMALLOC_MINALIGN,
1687 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1688 NULL);
1689
1690 if (INDEX_AC != INDEX_L3) {
1691 sizes[INDEX_L3].cs_cachep =
1692 __kmem_cache_create(names[INDEX_L3].name,
1693 sizes[INDEX_L3].cs_size,
1694 ARCH_KMALLOC_MINALIGN,
1695 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1696 NULL);
1697 }
1698
1699 slab_early_init = 0;
1700
1701 while (sizes->cs_size != ULONG_MAX) {
1702
1703
1704
1705
1706
1707
1708
1709 if (!sizes->cs_cachep) {
1710 sizes->cs_cachep = __kmem_cache_create(names->name,
1711 sizes->cs_size,
1712 ARCH_KMALLOC_MINALIGN,
1713 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1714 NULL);
1715 }
1716#ifdef CONFIG_ZONE_DMA
1717 sizes->cs_dmacachep = __kmem_cache_create(
1718 names->name_dma,
1719 sizes->cs_size,
1720 ARCH_KMALLOC_MINALIGN,
1721 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1722 SLAB_PANIC,
1723 NULL);
1724#endif
1725 sizes++;
1726 names++;
1727 }
1728
1729 {
1730 struct array_cache *ptr;
1731
1732 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1733
1734 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1735 memcpy(ptr, cpu_cache_get(&cache_cache),
1736 sizeof(struct arraycache_init));
1737
1738
1739
1740 spin_lock_init(&ptr->lock);
1741
1742 cache_cache.array[smp_processor_id()] = ptr;
1743
1744 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1745
1746 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1747 != &initarray_generic.cache);
1748 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1749 sizeof(struct arraycache_init));
1750
1751
1752
1753 spin_lock_init(&ptr->lock);
1754
1755 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1756 ptr;
1757 }
1758
1759 {
1760 int nid;
1761
1762 for_each_online_node(nid) {
1763 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1764
1765 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1766 &initkmem_list3[SIZE_AC + nid], nid);
1767
1768 if (INDEX_AC != INDEX_L3) {
1769 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1770 &initkmem_list3[SIZE_L3 + nid], nid);
1771 }
1772 }
1773 }
1774
1775 slab_state = UP;
1776}
1777
1778void __init kmem_cache_init_late(void)
1779{
1780 struct kmem_cache *cachep;
1781
1782 slab_state = UP;
1783
1784
1785 mutex_lock(&slab_mutex);
1786 list_for_each_entry(cachep, &slab_caches, list)
1787 if (enable_cpucache(cachep, GFP_NOWAIT))
1788 BUG();
1789 mutex_unlock(&slab_mutex);
1790
1791
1792 init_lock_keys();
1793
1794
1795 slab_state = FULL;
1796
1797
1798
1799
1800
1801 register_cpu_notifier(&cpucache_notifier);
1802
1803#ifdef CONFIG_NUMA
1804
1805
1806
1807
1808 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1809#endif
1810
1811
1812
1813
1814
1815}
1816
1817static int __init cpucache_init(void)
1818{
1819 int cpu;
1820
1821
1822
1823
1824 for_each_online_cpu(cpu)
1825 start_cpu_timer(cpu);
1826
1827
1828 slab_state = FULL;
1829 return 0;
1830}
1831__initcall(cpucache_init);
1832
1833static noinline void
1834slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1835{
1836 struct kmem_list3 *l3;
1837 struct slab *slabp;
1838 unsigned long flags;
1839 int node;
1840
1841 printk(KERN_WARNING
1842 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1843 nodeid, gfpflags);
1844 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
1845 cachep->name, cachep->size, cachep->gfporder);
1846
1847 for_each_online_node(node) {
1848 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
1849 unsigned long active_slabs = 0, num_slabs = 0;
1850
1851 l3 = cachep->nodelists[node];
1852 if (!l3)
1853 continue;
1854
1855 spin_lock_irqsave(&l3->list_lock, flags);
1856 list_for_each_entry(slabp, &l3->slabs_full, list) {
1857 active_objs += cachep->num;
1858 active_slabs++;
1859 }
1860 list_for_each_entry(slabp, &l3->slabs_partial, list) {
1861 active_objs += slabp->inuse;
1862 active_slabs++;
1863 }
1864 list_for_each_entry(slabp, &l3->slabs_free, list)
1865 num_slabs++;
1866
1867 free_objects += l3->free_objects;
1868 spin_unlock_irqrestore(&l3->list_lock, flags);
1869
1870 num_slabs += active_slabs;
1871 num_objs = num_slabs * cachep->num;
1872 printk(KERN_WARNING
1873 " node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
1874 node, active_slabs, num_slabs, active_objs, num_objs,
1875 free_objects);
1876 }
1877}
1878
1879
1880
1881
1882
1883
1884
1885
1886static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1887{
1888 struct page *page;
1889 int nr_pages;
1890 int i;
1891
1892#ifndef CONFIG_MMU
1893
1894
1895
1896
1897 flags |= __GFP_COMP;
1898#endif
1899
1900 flags |= cachep->allocflags;
1901 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1902 flags |= __GFP_RECLAIMABLE;
1903
1904 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1905 if (!page) {
1906 if (!(flags & __GFP_NOWARN) && printk_ratelimit())
1907 slab_out_of_memory(cachep, flags, nodeid);
1908 return NULL;
1909 }
1910
1911
1912 if (unlikely(page->pfmemalloc))
1913 pfmemalloc_active = true;
1914
1915 nr_pages = (1 << cachep->gfporder);
1916 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1917 add_zone_page_state(page_zone(page),
1918 NR_SLAB_RECLAIMABLE, nr_pages);
1919 else
1920 add_zone_page_state(page_zone(page),
1921 NR_SLAB_UNRECLAIMABLE, nr_pages);
1922 for (i = 0; i < nr_pages; i++) {
1923 __SetPageSlab(page + i);
1924
1925 if (page->pfmemalloc)
1926 SetPageSlabPfmemalloc(page + i);
1927 }
1928
1929 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1930 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1931
1932 if (cachep->ctor)
1933 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1934 else
1935 kmemcheck_mark_unallocated_pages(page, nr_pages);
1936 }
1937
1938 return page_address(page);
1939}
1940
1941
1942
1943
1944static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1945{
1946 unsigned long i = (1 << cachep->gfporder);
1947 struct page *page = virt_to_page(addr);
1948 const unsigned long nr_freed = i;
1949
1950 kmemcheck_free_shadow(page, cachep->gfporder);
1951
1952 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1953 sub_zone_page_state(page_zone(page),
1954 NR_SLAB_RECLAIMABLE, nr_freed);
1955 else
1956 sub_zone_page_state(page_zone(page),
1957 NR_SLAB_UNRECLAIMABLE, nr_freed);
1958 while (i--) {
1959 BUG_ON(!PageSlab(page));
1960 __ClearPageSlabPfmemalloc(page);
1961 __ClearPageSlab(page);
1962 page++;
1963 }
1964 if (current->reclaim_state)
1965 current->reclaim_state->reclaimed_slab += nr_freed;
1966 free_pages((unsigned long)addr, cachep->gfporder);
1967}
1968
1969static void kmem_rcu_free(struct rcu_head *head)
1970{
1971 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1972 struct kmem_cache *cachep = slab_rcu->cachep;
1973
1974 kmem_freepages(cachep, slab_rcu->addr);
1975 if (OFF_SLAB(cachep))
1976 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1977}
1978
1979#if DEBUG
1980
1981#ifdef CONFIG_DEBUG_PAGEALLOC
1982static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1983 unsigned long caller)
1984{
1985 int size = cachep->object_size;
1986
1987 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1988
1989 if (size < 5 * sizeof(unsigned long))
1990 return;
1991
1992 *addr++ = 0x12345678;
1993 *addr++ = caller;
1994 *addr++ = smp_processor_id();
1995 size -= 3 * sizeof(unsigned long);
1996 {
1997 unsigned long *sptr = &caller;
1998 unsigned long svalue;
1999
2000 while (!kstack_end(sptr)) {
2001 svalue = *sptr++;
2002 if (kernel_text_address(svalue)) {
2003 *addr++ = svalue;
2004 size -= sizeof(unsigned long);
2005 if (size <= sizeof(unsigned long))
2006 break;
2007 }
2008 }
2009
2010 }
2011 *addr++ = 0x87654321;
2012}
2013#endif
2014
2015static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
2016{
2017 int size = cachep->object_size;
2018 addr = &((char *)addr)[obj_offset(cachep)];
2019
2020 memset(addr, val, size);
2021 *(unsigned char *)(addr + size - 1) = POISON_END;
2022}
2023
2024static void dump_line(char *data, int offset, int limit)
2025{
2026 int i;
2027 unsigned char error = 0;
2028 int bad_count = 0;
2029
2030 printk(KERN_ERR "%03x: ", offset);
2031 for (i = 0; i < limit; i++) {
2032 if (data[offset + i] != POISON_FREE) {
2033 error = data[offset + i];
2034 bad_count++;
2035 }
2036 }
2037 print_hex_dump(KERN_CONT, "", 0, 16, 1,
2038 &data[offset], limit, 1);
2039
2040 if (bad_count == 1) {
2041 error ^= POISON_FREE;
2042 if (!(error & (error - 1))) {
2043 printk(KERN_ERR "Single bit error detected. Probably "
2044 "bad RAM.\n");
2045#ifdef CONFIG_X86
2046 printk(KERN_ERR "Run memtest86+ or a similar memory "
2047 "test tool.\n");
2048#else
2049 printk(KERN_ERR "Run a memory test tool.\n");
2050#endif
2051 }
2052 }
2053}
2054#endif
2055
2056#if DEBUG
2057
2058static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
2059{
2060 int i, size;
2061 char *realobj;
2062
2063 if (cachep->flags & SLAB_RED_ZONE) {
2064 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
2065 *dbg_redzone1(cachep, objp),
2066 *dbg_redzone2(cachep, objp));
2067 }
2068
2069 if (cachep->flags & SLAB_STORE_USER) {
2070 printk(KERN_ERR "Last user: [<%p>]",
2071 *dbg_userword(cachep, objp));
2072 print_symbol("(%s)",
2073 (unsigned long)*dbg_userword(cachep, objp));
2074 printk("\n");
2075 }
2076 realobj = (char *)objp + obj_offset(cachep);
2077 size = cachep->object_size;
2078 for (i = 0; i < size && lines; i += 16, lines--) {
2079 int limit;
2080 limit = 16;
2081 if (i + limit > size)
2082 limit = size - i;
2083 dump_line(realobj, i, limit);
2084 }
2085}
2086
2087static void check_poison_obj(struct kmem_cache *cachep, void *objp)
2088{
2089 char *realobj;
2090 int size, i;
2091 int lines = 0;
2092
2093 realobj = (char *)objp + obj_offset(cachep);
2094 size = cachep->object_size;
2095
2096 for (i = 0; i < size; i++) {
2097 char exp = POISON_FREE;
2098 if (i == size - 1)
2099 exp = POISON_END;
2100 if (realobj[i] != exp) {
2101 int limit;
2102
2103
2104 if (lines == 0) {
2105 printk(KERN_ERR
2106 "Slab corruption (%s): %s start=%p, len=%d\n",
2107 print_tainted(), cachep->name, realobj, size);
2108 print_objinfo(cachep, objp, 0);
2109 }
2110
2111 i = (i / 16) * 16;
2112 limit = 16;
2113 if (i + limit > size)
2114 limit = size - i;
2115 dump_line(realobj, i, limit);
2116 i += 16;
2117 lines++;
2118
2119 if (lines > 5)
2120 break;
2121 }
2122 }
2123 if (lines != 0) {
2124
2125
2126
2127 struct slab *slabp = virt_to_slab(objp);
2128 unsigned int objnr;
2129
2130 objnr = obj_to_index(cachep, slabp, objp);
2131 if (objnr) {
2132 objp = index_to_obj(cachep, slabp, objnr - 1);
2133 realobj = (char *)objp + obj_offset(cachep);
2134 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
2135 realobj, size);
2136 print_objinfo(cachep, objp, 2);
2137 }
2138 if (objnr + 1 < cachep->num) {
2139 objp = index_to_obj(cachep, slabp, objnr + 1);
2140 realobj = (char *)objp + obj_offset(cachep);
2141 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
2142 realobj, size);
2143 print_objinfo(cachep, objp, 2);
2144 }
2145 }
2146}
2147#endif
2148
2149#if DEBUG
2150static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
2151{
2152 int i;
2153 for (i = 0; i < cachep->num; i++) {
2154 void *objp = index_to_obj(cachep, slabp, i);
2155
2156 if (cachep->flags & SLAB_POISON) {
2157#ifdef CONFIG_DEBUG_PAGEALLOC
2158 if (cachep->size % PAGE_SIZE == 0 &&
2159 OFF_SLAB(cachep))
2160 kernel_map_pages(virt_to_page(objp),
2161 cachep->size / PAGE_SIZE, 1);
2162 else
2163 check_poison_obj(cachep, objp);
2164#else
2165 check_poison_obj(cachep, objp);
2166#endif
2167 }
2168 if (cachep->flags & SLAB_RED_ZONE) {
2169 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2170 slab_error(cachep, "start of a freed object "
2171 "was overwritten");
2172 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2173 slab_error(cachep, "end of a freed object "
2174 "was overwritten");
2175 }
2176 }
2177}
2178#else
2179static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
2180{
2181}
2182#endif
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
2194{
2195 void *addr = slabp->s_mem - slabp->colouroff;
2196
2197 slab_destroy_debugcheck(cachep, slabp);
2198 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
2199 struct slab_rcu *slab_rcu;
2200
2201 slab_rcu = (struct slab_rcu *)slabp;
2202 slab_rcu->cachep = cachep;
2203 slab_rcu->addr = addr;
2204 call_rcu(&slab_rcu->head, kmem_rcu_free);
2205 } else {
2206 kmem_freepages(cachep, addr);
2207 if (OFF_SLAB(cachep))
2208 kmem_cache_free(cachep->slabp_cache, slabp);
2209 }
2210}
2211
2212static void __kmem_cache_destroy(struct kmem_cache *cachep)
2213{
2214 int i;
2215 struct kmem_list3 *l3;
2216
2217 for_each_online_cpu(i)
2218 kfree(cachep->array[i]);
2219
2220
2221 for_each_online_node(i) {
2222 l3 = cachep->nodelists[i];
2223 if (l3) {
2224 kfree(l3->shared);
2225 free_alien_cache(l3->alien);
2226 kfree(l3);
2227 }
2228 }
2229 kmem_cache_free(&cache_cache, cachep);
2230}
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246static size_t calculate_slab_order(struct kmem_cache *cachep,
2247 size_t size, size_t align, unsigned long flags)
2248{
2249 unsigned long offslab_limit;
2250 size_t left_over = 0;
2251 int gfporder;
2252
2253 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2254 unsigned int num;
2255 size_t remainder;
2256
2257 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2258 if (!num)
2259 continue;
2260
2261 if (flags & CFLGS_OFF_SLAB) {
2262
2263
2264
2265
2266
2267 offslab_limit = size - sizeof(struct slab);
2268 offslab_limit /= sizeof(kmem_bufctl_t);
2269
2270 if (num > offslab_limit)
2271 break;
2272 }
2273
2274
2275 cachep->num = num;
2276 cachep->gfporder = gfporder;
2277 left_over = remainder;
2278
2279
2280
2281
2282
2283
2284 if (flags & SLAB_RECLAIM_ACCOUNT)
2285 break;
2286
2287
2288
2289
2290
2291 if (gfporder >= slab_max_order)
2292 break;
2293
2294
2295
2296
2297 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2298 break;
2299 }
2300 return left_over;
2301}
2302
2303static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2304{
2305 if (slab_state >= FULL)
2306 return enable_cpucache(cachep, gfp);
2307
2308 if (slab_state == DOWN) {
2309
2310
2311
2312
2313
2314 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2315
2316
2317
2318
2319
2320
2321 set_up_list3s(cachep, SIZE_AC);
2322 if (INDEX_AC == INDEX_L3)
2323 slab_state = PARTIAL_L3;
2324 else
2325 slab_state = PARTIAL_ARRAYCACHE;
2326 } else {
2327 cachep->array[smp_processor_id()] =
2328 kmalloc(sizeof(struct arraycache_init), gfp);
2329
2330 if (slab_state == PARTIAL_ARRAYCACHE) {
2331 set_up_list3s(cachep, SIZE_L3);
2332 slab_state = PARTIAL_L3;
2333 } else {
2334 int node;
2335 for_each_online_node(node) {
2336 cachep->nodelists[node] =
2337 kmalloc_node(sizeof(struct kmem_list3),
2338 gfp, node);
2339 BUG_ON(!cachep->nodelists[node]);
2340 kmem_list3_init(cachep->nodelists[node]);
2341 }
2342 }
2343 }
2344 cachep->nodelists[numa_mem_id()]->next_reap =
2345 jiffies + REAPTIMEOUT_LIST3 +
2346 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2347
2348 cpu_cache_get(cachep)->avail = 0;
2349 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2350 cpu_cache_get(cachep)->batchcount = 1;
2351 cpu_cache_get(cachep)->touched = 0;
2352 cachep->batchcount = 1;
2353 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2354 return 0;
2355}
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384struct kmem_cache *
2385__kmem_cache_create (const char *name, size_t size, size_t align,
2386 unsigned long flags, void (*ctor)(void *))
2387{
2388 size_t left_over, slab_size, ralign;
2389 struct kmem_cache *cachep = NULL;
2390 gfp_t gfp;
2391
2392#if DEBUG
2393#if FORCED_DEBUG
2394
2395
2396
2397
2398
2399
2400 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2401 2 * sizeof(unsigned long long)))
2402 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2403 if (!(flags & SLAB_DESTROY_BY_RCU))
2404 flags |= SLAB_POISON;
2405#endif
2406 if (flags & SLAB_DESTROY_BY_RCU)
2407 BUG_ON(flags & SLAB_POISON);
2408#endif
2409
2410
2411
2412
2413 BUG_ON(flags & ~CREATE_MASK);
2414
2415
2416
2417
2418
2419
2420 if (size & (BYTES_PER_WORD - 1)) {
2421 size += (BYTES_PER_WORD - 1);
2422 size &= ~(BYTES_PER_WORD - 1);
2423 }
2424
2425
2426
2427
2428 if (flags & SLAB_HWCACHE_ALIGN) {
2429
2430
2431
2432
2433
2434 ralign = cache_line_size();
2435 while (size <= ralign / 2)
2436 ralign /= 2;
2437 } else {
2438 ralign = BYTES_PER_WORD;
2439 }
2440
2441
2442
2443
2444
2445
2446 if (flags & SLAB_STORE_USER)
2447 ralign = BYTES_PER_WORD;
2448
2449 if (flags & SLAB_RED_ZONE) {
2450 ralign = REDZONE_ALIGN;
2451
2452
2453 size += REDZONE_ALIGN - 1;
2454 size &= ~(REDZONE_ALIGN - 1);
2455 }
2456
2457
2458 if (ralign < ARCH_SLAB_MINALIGN) {
2459 ralign = ARCH_SLAB_MINALIGN;
2460 }
2461
2462 if (ralign < align) {
2463 ralign = align;
2464 }
2465
2466 if (ralign > __alignof__(unsigned long long))
2467 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2468
2469
2470
2471 align = ralign;
2472
2473 if (slab_is_available())
2474 gfp = GFP_KERNEL;
2475 else
2476 gfp = GFP_NOWAIT;
2477
2478
2479 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2480 if (!cachep)
2481 return NULL;
2482
2483 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
2484 cachep->object_size = size;
2485 cachep->align = align;
2486#if DEBUG
2487
2488
2489
2490
2491
2492 if (flags & SLAB_RED_ZONE) {
2493
2494 cachep->obj_offset += sizeof(unsigned long long);
2495 size += 2 * sizeof(unsigned long long);
2496 }
2497 if (flags & SLAB_STORE_USER) {
2498
2499
2500
2501
2502 if (flags & SLAB_RED_ZONE)
2503 size += REDZONE_ALIGN;
2504 else
2505 size += BYTES_PER_WORD;
2506 }
2507#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2508 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2509 && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
2510 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
2511 size = PAGE_SIZE;
2512 }
2513#endif
2514#endif
2515
2516
2517
2518
2519
2520
2521
2522 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2523 !(flags & SLAB_NOLEAKTRACE))
2524
2525
2526
2527
2528 flags |= CFLGS_OFF_SLAB;
2529
2530 size = ALIGN(size, align);
2531
2532 left_over = calculate_slab_order(cachep, size, align, flags);
2533
2534 if (!cachep->num) {
2535 printk(KERN_ERR
2536 "kmem_cache_create: couldn't create cache %s.\n", name);
2537 kmem_cache_free(&cache_cache, cachep);
2538 return NULL;
2539 }
2540 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2541 + sizeof(struct slab), align);
2542
2543
2544
2545
2546
2547 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2548 flags &= ~CFLGS_OFF_SLAB;
2549 left_over -= slab_size;
2550 }
2551
2552 if (flags & CFLGS_OFF_SLAB) {
2553
2554 slab_size =
2555 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2556
2557#ifdef CONFIG_PAGE_POISONING
2558
2559
2560
2561
2562 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2563 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2564#endif
2565 }
2566
2567 cachep->colour_off = cache_line_size();
2568
2569 if (cachep->colour_off < align)
2570 cachep->colour_off = align;
2571 cachep->colour = left_over / cachep->colour_off;
2572 cachep->slab_size = slab_size;
2573 cachep->flags = flags;
2574 cachep->allocflags = 0;
2575 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2576 cachep->allocflags |= GFP_DMA;
2577 cachep->size = size;
2578 cachep->reciprocal_buffer_size = reciprocal_value(size);
2579
2580 if (flags & CFLGS_OFF_SLAB) {
2581 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2582
2583
2584
2585
2586
2587
2588
2589 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2590 }
2591 cachep->ctor = ctor;
2592 cachep->name = name;
2593
2594 if (setup_cpu_cache(cachep, gfp)) {
2595 __kmem_cache_destroy(cachep);
2596 return NULL;
2597 }
2598
2599 if (flags & SLAB_DEBUG_OBJECTS) {
2600
2601
2602
2603
2604 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
2605
2606 slab_set_debugobj_lock_classes(cachep);
2607 }
2608
2609
2610 list_add(&cachep->list, &slab_caches);
2611 return cachep;
2612}
2613
2614#if DEBUG
2615static void check_irq_off(void)
2616{
2617 BUG_ON(!irqs_disabled());
2618}
2619
2620static void check_irq_on(void)
2621{
2622 BUG_ON(irqs_disabled());
2623}
2624
2625static void check_spinlock_acquired(struct kmem_cache *cachep)
2626{
2627#ifdef CONFIG_SMP
2628 check_irq_off();
2629 assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
2630#endif
2631}
2632
2633static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2634{
2635#ifdef CONFIG_SMP
2636 check_irq_off();
2637 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2638#endif
2639}
2640
2641#else
2642#define check_irq_off() do { } while(0)
2643#define check_irq_on() do { } while(0)
2644#define check_spinlock_acquired(x) do { } while(0)
2645#define check_spinlock_acquired_node(x, y) do { } while(0)
2646#endif
2647
2648static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2649 struct array_cache *ac,
2650 int force, int node);
2651
2652static void do_drain(void *arg)
2653{
2654 struct kmem_cache *cachep = arg;
2655 struct array_cache *ac;
2656 int node = numa_mem_id();
2657
2658 check_irq_off();
2659 ac = cpu_cache_get(cachep);
2660 spin_lock(&cachep->nodelists[node]->list_lock);
2661 free_block(cachep, ac->entry, ac->avail, node);
2662 spin_unlock(&cachep->nodelists[node]->list_lock);
2663 ac->avail = 0;
2664}
2665
2666static void drain_cpu_caches(struct kmem_cache *cachep)
2667{
2668 struct kmem_list3 *l3;
2669 int node;
2670
2671 on_each_cpu(do_drain, cachep, 1);
2672 check_irq_on();
2673 for_each_online_node(node) {
2674 l3 = cachep->nodelists[node];
2675 if (l3 && l3->alien)
2676 drain_alien_cache(cachep, l3->alien);
2677 }
2678
2679 for_each_online_node(node) {
2680 l3 = cachep->nodelists[node];
2681 if (l3)
2682 drain_array(cachep, l3, l3->shared, 1, node);
2683 }
2684}
2685
2686
2687
2688
2689
2690
2691
2692static int drain_freelist(struct kmem_cache *cache,
2693 struct kmem_list3 *l3, int tofree)
2694{
2695 struct list_head *p;
2696 int nr_freed;
2697 struct slab *slabp;
2698
2699 nr_freed = 0;
2700 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2701
2702 spin_lock_irq(&l3->list_lock);
2703 p = l3->slabs_free.prev;
2704 if (p == &l3->slabs_free) {
2705 spin_unlock_irq(&l3->list_lock);
2706 goto out;
2707 }
2708
2709 slabp = list_entry(p, struct slab, list);
2710#if DEBUG
2711 BUG_ON(slabp->inuse);
2712#endif
2713 list_del(&slabp->list);
2714
2715
2716
2717
2718 l3->free_objects -= cache->num;
2719 spin_unlock_irq(&l3->list_lock);
2720 slab_destroy(cache, slabp);
2721 nr_freed++;
2722 }
2723out:
2724 return nr_freed;
2725}
2726
2727
2728static int __cache_shrink(struct kmem_cache *cachep)
2729{
2730 int ret = 0, i = 0;
2731 struct kmem_list3 *l3;
2732
2733 drain_cpu_caches(cachep);
2734
2735 check_irq_on();
2736 for_each_online_node(i) {
2737 l3 = cachep->nodelists[i];
2738 if (!l3)
2739 continue;
2740
2741 drain_freelist(cachep, l3, l3->free_objects);
2742
2743 ret += !list_empty(&l3->slabs_full) ||
2744 !list_empty(&l3->slabs_partial);
2745 }
2746 return (ret ? 1 : 0);
2747}
2748
2749
2750
2751
2752
2753
2754
2755
2756int kmem_cache_shrink(struct kmem_cache *cachep)
2757{
2758 int ret;
2759 BUG_ON(!cachep || in_interrupt());
2760
2761 get_online_cpus();
2762 mutex_lock(&slab_mutex);
2763 ret = __cache_shrink(cachep);
2764 mutex_unlock(&slab_mutex);
2765 put_online_cpus();
2766 return ret;
2767}
2768EXPORT_SYMBOL(kmem_cache_shrink);
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786void kmem_cache_destroy(struct kmem_cache *cachep)
2787{
2788 BUG_ON(!cachep || in_interrupt());
2789
2790
2791 get_online_cpus();
2792 mutex_lock(&slab_mutex);
2793
2794
2795
2796 list_del(&cachep->list);
2797 if (__cache_shrink(cachep)) {
2798 slab_error(cachep, "Can't free all objects");
2799 list_add(&cachep->list, &slab_caches);
2800 mutex_unlock(&slab_mutex);
2801 put_online_cpus();
2802 return;
2803 }
2804
2805 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2806 rcu_barrier();
2807
2808 __kmem_cache_destroy(cachep);
2809 mutex_unlock(&slab_mutex);
2810 put_online_cpus();
2811}
2812EXPORT_SYMBOL(kmem_cache_destroy);
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2826 int colour_off, gfp_t local_flags,
2827 int nodeid)
2828{
2829 struct slab *slabp;
2830
2831 if (OFF_SLAB(cachep)) {
2832
2833 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2834 local_flags, nodeid);
2835
2836
2837
2838
2839
2840
2841 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2842 local_flags);
2843 if (!slabp)
2844 return NULL;
2845 } else {
2846 slabp = objp + colour_off;
2847 colour_off += cachep->slab_size;
2848 }
2849 slabp->inuse = 0;
2850 slabp->colouroff = colour_off;
2851 slabp->s_mem = objp + colour_off;
2852 slabp->nodeid = nodeid;
2853 slabp->free = 0;
2854 return slabp;
2855}
2856
2857static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2858{
2859 return (kmem_bufctl_t *) (slabp + 1);
2860}
2861
2862static void cache_init_objs(struct kmem_cache *cachep,
2863 struct slab *slabp)
2864{
2865 int i;
2866
2867 for (i = 0; i < cachep->num; i++) {
2868 void *objp = index_to_obj(cachep, slabp, i);
2869#if DEBUG
2870
2871 if (cachep->flags & SLAB_POISON)
2872 poison_obj(cachep, objp, POISON_FREE);
2873 if (cachep->flags & SLAB_STORE_USER)
2874 *dbg_userword(cachep, objp) = NULL;
2875
2876 if (cachep->flags & SLAB_RED_ZONE) {
2877 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2878 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2879 }
2880
2881
2882
2883
2884
2885 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2886 cachep->ctor(objp + obj_offset(cachep));
2887
2888 if (cachep->flags & SLAB_RED_ZONE) {
2889 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2890 slab_error(cachep, "constructor overwrote the"
2891 " end of an object");
2892 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2893 slab_error(cachep, "constructor overwrote the"
2894 " start of an object");
2895 }
2896 if ((cachep->size % PAGE_SIZE) == 0 &&
2897 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2898 kernel_map_pages(virt_to_page(objp),
2899 cachep->size / PAGE_SIZE, 0);
2900#else
2901 if (cachep->ctor)
2902 cachep->ctor(objp);
2903#endif
2904 slab_bufctl(slabp)[i] = i + 1;
2905 }
2906 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2907}
2908
2909static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2910{
2911 if (CONFIG_ZONE_DMA_FLAG) {
2912 if (flags & GFP_DMA)
2913 BUG_ON(!(cachep->allocflags & GFP_DMA));
2914 else
2915 BUG_ON(cachep->allocflags & GFP_DMA);
2916 }
2917}
2918
2919static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2920 int nodeid)
2921{
2922 void *objp = index_to_obj(cachep, slabp, slabp->free);
2923 kmem_bufctl_t next;
2924
2925 slabp->inuse++;
2926 next = slab_bufctl(slabp)[slabp->free];
2927#if DEBUG
2928 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2929 WARN_ON(slabp->nodeid != nodeid);
2930#endif
2931 slabp->free = next;
2932
2933 return objp;
2934}
2935
2936static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2937 void *objp, int nodeid)
2938{
2939 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2940
2941#if DEBUG
2942
2943 WARN_ON(slabp->nodeid != nodeid);
2944
2945 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2946 printk(KERN_ERR "slab: double free detected in cache "
2947 "'%s', objp %p\n", cachep->name, objp);
2948 BUG();
2949 }
2950#endif
2951 slab_bufctl(slabp)[objnr] = slabp->free;
2952 slabp->free = objnr;
2953 slabp->inuse--;
2954}
2955
2956
2957
2958
2959
2960
2961static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2962 void *addr)
2963{
2964 int nr_pages;
2965 struct page *page;
2966
2967 page = virt_to_page(addr);
2968
2969 nr_pages = 1;
2970 if (likely(!PageCompound(page)))
2971 nr_pages <<= cache->gfporder;
2972
2973 do {
2974 page->slab_cache = cache;
2975 page->slab_page = slab;
2976 page++;
2977 } while (--nr_pages);
2978}
2979
2980
2981
2982
2983
2984static int cache_grow(struct kmem_cache *cachep,
2985 gfp_t flags, int nodeid, void *objp)
2986{
2987 struct slab *slabp;
2988 size_t offset;
2989 gfp_t local_flags;
2990 struct kmem_list3 *l3;
2991
2992
2993
2994
2995
2996 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2997 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2998
2999
3000 check_irq_off();
3001 l3 = cachep->nodelists[nodeid];
3002 spin_lock(&l3->list_lock);
3003
3004
3005 offset = l3->colour_next;
3006 l3->colour_next++;
3007 if (l3->colour_next >= cachep->colour)
3008 l3->colour_next = 0;
3009 spin_unlock(&l3->list_lock);
3010
3011 offset *= cachep->colour_off;
3012
3013 if (local_flags & __GFP_WAIT)
3014 local_irq_enable();
3015
3016
3017
3018
3019
3020
3021
3022 kmem_flagcheck(cachep, flags);
3023
3024
3025
3026
3027
3028 if (!objp)
3029 objp = kmem_getpages(cachep, local_flags, nodeid);
3030 if (!objp)
3031 goto failed;
3032
3033
3034 slabp = alloc_slabmgmt(cachep, objp, offset,
3035 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
3036 if (!slabp)
3037 goto opps1;
3038
3039 slab_map_pages(cachep, slabp, objp);
3040
3041 cache_init_objs(cachep, slabp);
3042
3043 if (local_flags & __GFP_WAIT)
3044 local_irq_disable();
3045 check_irq_off();
3046 spin_lock(&l3->list_lock);
3047
3048
3049 list_add_tail(&slabp->list, &(l3->slabs_free));
3050 STATS_INC_GROWN(cachep);
3051 l3->free_objects += cachep->num;
3052 spin_unlock(&l3->list_lock);
3053 return 1;
3054opps1:
3055 kmem_freepages(cachep, objp);
3056failed:
3057 if (local_flags & __GFP_WAIT)
3058 local_irq_disable();
3059 return 0;
3060}
3061
3062#if DEBUG
3063
3064
3065
3066
3067
3068
3069static void kfree_debugcheck(const void *objp)
3070{
3071 if (!virt_addr_valid(objp)) {
3072 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
3073 (unsigned long)objp);
3074 BUG();
3075 }
3076}
3077
3078static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
3079{
3080 unsigned long long redzone1, redzone2;
3081
3082 redzone1 = *dbg_redzone1(cache, obj);
3083 redzone2 = *dbg_redzone2(cache, obj);
3084
3085
3086
3087
3088 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
3089 return;
3090
3091 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
3092 slab_error(cache, "double free detected");
3093 else
3094 slab_error(cache, "memory outside object was overwritten");
3095
3096 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
3097 obj, redzone1, redzone2);
3098}
3099
3100static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
3101 void *caller)
3102{
3103 struct page *page;
3104 unsigned int objnr;
3105 struct slab *slabp;
3106
3107 BUG_ON(virt_to_cache(objp) != cachep);
3108
3109 objp -= obj_offset(cachep);
3110 kfree_debugcheck(objp);
3111 page = virt_to_head_page(objp);
3112
3113 slabp = page->slab_page;
3114
3115 if (cachep->flags & SLAB_RED_ZONE) {
3116 verify_redzone_free(cachep, objp);
3117 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
3118 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
3119 }
3120 if (cachep->flags & SLAB_STORE_USER)
3121 *dbg_userword(cachep, objp) = caller;
3122
3123 objnr = obj_to_index(cachep, slabp, objp);
3124
3125 BUG_ON(objnr >= cachep->num);
3126 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
3127
3128#ifdef CONFIG_DEBUG_SLAB_LEAK
3129 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
3130#endif
3131 if (cachep->flags & SLAB_POISON) {
3132#ifdef CONFIG_DEBUG_PAGEALLOC
3133 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
3134 store_stackinfo(cachep, objp, (unsigned long)caller);
3135 kernel_map_pages(virt_to_page(objp),
3136 cachep->size / PAGE_SIZE, 0);
3137 } else {
3138 poison_obj(cachep, objp, POISON_FREE);
3139 }
3140#else
3141 poison_obj(cachep, objp, POISON_FREE);
3142#endif
3143 }
3144 return objp;
3145}
3146
3147static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
3148{
3149 kmem_bufctl_t i;
3150 int entries = 0;
3151
3152
3153 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
3154 entries++;
3155 if (entries > cachep->num || i >= cachep->num)
3156 goto bad;
3157 }
3158 if (entries != cachep->num - slabp->inuse) {
3159bad:
3160 printk(KERN_ERR "slab: Internal list corruption detected in "
3161 "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
3162 cachep->name, cachep->num, slabp, slabp->inuse,
3163 print_tainted());
3164 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
3165 sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
3166 1);
3167 BUG();
3168 }
3169}
3170#else
3171#define kfree_debugcheck(x) do { } while(0)
3172#define cache_free_debugcheck(x,objp,z) (objp)
3173#define check_slabp(x,y) do { } while(0)
3174#endif
3175
3176static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
3177 bool force_refill)
3178{
3179 int batchcount;
3180 struct kmem_list3 *l3;
3181 struct array_cache *ac;
3182 int node;
3183
3184 check_irq_off();
3185 node = numa_mem_id();
3186 if (unlikely(force_refill))
3187 goto force_grow;
3188retry:
3189 ac = cpu_cache_get(cachep);
3190 batchcount = ac->batchcount;
3191 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
3192
3193
3194
3195
3196
3197 batchcount = BATCHREFILL_LIMIT;
3198 }
3199 l3 = cachep->nodelists[node];
3200
3201 BUG_ON(ac->avail > 0 || !l3);
3202 spin_lock(&l3->list_lock);
3203
3204
3205 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
3206 l3->shared->touched = 1;
3207 goto alloc_done;
3208 }
3209
3210 while (batchcount > 0) {
3211 struct list_head *entry;
3212 struct slab *slabp;
3213
3214 entry = l3->slabs_partial.next;
3215 if (entry == &l3->slabs_partial) {
3216 l3->free_touched = 1;
3217 entry = l3->slabs_free.next;
3218 if (entry == &l3->slabs_free)
3219 goto must_grow;
3220 }
3221
3222 slabp = list_entry(entry, struct slab, list);
3223 check_slabp(cachep, slabp);
3224 check_spinlock_acquired(cachep);
3225
3226
3227
3228
3229
3230
3231 BUG_ON(slabp->inuse >= cachep->num);
3232
3233 while (slabp->inuse < cachep->num && batchcount--) {
3234 STATS_INC_ALLOCED(cachep);
3235 STATS_INC_ACTIVE(cachep);
3236 STATS_SET_HIGH(cachep);
3237
3238 ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
3239 node));
3240 }
3241 check_slabp(cachep, slabp);
3242
3243
3244 list_del(&slabp->list);
3245 if (slabp->free == BUFCTL_END)
3246 list_add(&slabp->list, &l3->slabs_full);
3247 else
3248 list_add(&slabp->list, &l3->slabs_partial);
3249 }
3250
3251must_grow:
3252 l3->free_objects -= ac->avail;
3253alloc_done:
3254 spin_unlock(&l3->list_lock);
3255
3256 if (unlikely(!ac->avail)) {
3257 int x;
3258force_grow:
3259 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3260
3261
3262 ac = cpu_cache_get(cachep);
3263 node = numa_mem_id();
3264
3265
3266 if (!x && (ac->avail == 0 || force_refill))
3267 return NULL;
3268
3269 if (!ac->avail)
3270 goto retry;
3271 }
3272 ac->touched = 1;
3273
3274 return ac_get_obj(cachep, ac, flags, force_refill);
3275}
3276
3277static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3278 gfp_t flags)
3279{
3280 might_sleep_if(flags & __GFP_WAIT);
3281#if DEBUG
3282 kmem_flagcheck(cachep, flags);
3283#endif
3284}
3285
3286#if DEBUG
3287static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3288 gfp_t flags, void *objp, void *caller)
3289{
3290 if (!objp)
3291 return objp;
3292 if (cachep->flags & SLAB_POISON) {
3293#ifdef CONFIG_DEBUG_PAGEALLOC
3294 if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3295 kernel_map_pages(virt_to_page(objp),
3296 cachep->size / PAGE_SIZE, 1);
3297 else
3298 check_poison_obj(cachep, objp);
3299#else
3300 check_poison_obj(cachep, objp);
3301#endif
3302 poison_obj(cachep, objp, POISON_INUSE);
3303 }
3304 if (cachep->flags & SLAB_STORE_USER)
3305 *dbg_userword(cachep, objp) = caller;
3306
3307 if (cachep->flags & SLAB_RED_ZONE) {
3308 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3309 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3310 slab_error(cachep, "double free, or memory outside"
3311 " object was overwritten");
3312 printk(KERN_ERR
3313 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3314 objp, *dbg_redzone1(cachep, objp),
3315 *dbg_redzone2(cachep, objp));
3316 }
3317 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3318 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3319 }
3320#ifdef CONFIG_DEBUG_SLAB_LEAK
3321 {
3322 struct slab *slabp;
3323 unsigned objnr;
3324
3325 slabp = virt_to_head_page(objp)->slab_page;
3326 objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
3327 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3328 }
3329#endif
3330 objp += obj_offset(cachep);
3331 if (cachep->ctor && cachep->flags & SLAB_POISON)
3332 cachep->ctor(objp);
3333 if (ARCH_SLAB_MINALIGN &&
3334 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
3335 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3336 objp, (int)ARCH_SLAB_MINALIGN);
3337 }
3338 return objp;
3339}
3340#else
3341#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3342#endif
3343
3344static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3345{
3346 if (cachep == &cache_cache)
3347 return false;
3348
3349 return should_failslab(cachep->object_size, flags, cachep->flags);
3350}
3351
3352static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3353{
3354 void *objp;
3355 struct array_cache *ac;
3356 bool force_refill = false;
3357
3358 check_irq_off();
3359
3360 ac = cpu_cache_get(cachep);
3361 if (likely(ac->avail)) {
3362 ac->touched = 1;
3363 objp = ac_get_obj(cachep, ac, flags, false);
3364
3365
3366
3367
3368
3369 if (objp) {
3370 STATS_INC_ALLOCHIT(cachep);
3371 goto out;
3372 }
3373 force_refill = true;
3374 }
3375
3376 STATS_INC_ALLOCMISS(cachep);
3377 objp = cache_alloc_refill(cachep, flags, force_refill);
3378
3379
3380
3381
3382 ac = cpu_cache_get(cachep);
3383
3384out:
3385
3386
3387
3388
3389
3390 if (objp)
3391 kmemleak_erase(&ac->entry[ac->avail]);
3392 return objp;
3393}
3394
3395#ifdef CONFIG_NUMA
3396
3397
3398
3399
3400
3401
3402static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3403{
3404 int nid_alloc, nid_here;
3405
3406 if (in_interrupt() || (flags & __GFP_THISNODE))
3407 return NULL;
3408 nid_alloc = nid_here = numa_mem_id();
3409 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3410 nid_alloc = cpuset_slab_spread_node();
3411 else if (current->mempolicy)
3412 nid_alloc = slab_node();
3413 if (nid_alloc != nid_here)
3414 return ____cache_alloc_node(cachep, flags, nid_alloc);
3415 return NULL;
3416}
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3427{
3428 struct zonelist *zonelist;
3429 gfp_t local_flags;
3430 struct zoneref *z;
3431 struct zone *zone;
3432 enum zone_type high_zoneidx = gfp_zone(flags);
3433 void *obj = NULL;
3434 int nid;
3435 unsigned int cpuset_mems_cookie;
3436
3437 if (flags & __GFP_THISNODE)
3438 return NULL;
3439
3440 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3441
3442retry_cpuset:
3443 cpuset_mems_cookie = get_mems_allowed();
3444 zonelist = node_zonelist(slab_node(), flags);
3445
3446retry:
3447
3448
3449
3450
3451 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3452 nid = zone_to_nid(zone);
3453
3454 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3455 cache->nodelists[nid] &&
3456 cache->nodelists[nid]->free_objects) {
3457 obj = ____cache_alloc_node(cache,
3458 flags | GFP_THISNODE, nid);
3459 if (obj)
3460 break;
3461 }
3462 }
3463
3464 if (!obj) {
3465
3466
3467
3468
3469
3470
3471 if (local_flags & __GFP_WAIT)
3472 local_irq_enable();
3473 kmem_flagcheck(cache, flags);
3474 obj = kmem_getpages(cache, local_flags, numa_mem_id());
3475 if (local_flags & __GFP_WAIT)
3476 local_irq_disable();
3477 if (obj) {
3478
3479
3480
3481 nid = page_to_nid(virt_to_page(obj));
3482 if (cache_grow(cache, flags, nid, obj)) {
3483 obj = ____cache_alloc_node(cache,
3484 flags | GFP_THISNODE, nid);
3485 if (!obj)
3486
3487
3488
3489
3490
3491 goto retry;
3492 } else {
3493
3494 obj = NULL;
3495 }
3496 }
3497 }
3498
3499 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
3500 goto retry_cpuset;
3501 return obj;
3502}
3503
3504
3505
3506
3507static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3508 int nodeid)
3509{
3510 struct list_head *entry;
3511 struct slab *slabp;
3512 struct kmem_list3 *l3;
3513 void *obj;
3514 int x;
3515
3516 l3 = cachep->nodelists[nodeid];
3517 BUG_ON(!l3);
3518
3519retry:
3520 check_irq_off();
3521 spin_lock(&l3->list_lock);
3522 entry = l3->slabs_partial.next;
3523 if (entry == &l3->slabs_partial) {
3524 l3->free_touched = 1;
3525 entry = l3->slabs_free.next;
3526 if (entry == &l3->slabs_free)
3527 goto must_grow;
3528 }
3529
3530 slabp = list_entry(entry, struct slab, list);
3531 check_spinlock_acquired_node(cachep, nodeid);
3532 check_slabp(cachep, slabp);
3533
3534 STATS_INC_NODEALLOCS(cachep);
3535 STATS_INC_ACTIVE(cachep);
3536 STATS_SET_HIGH(cachep);
3537
3538 BUG_ON(slabp->inuse == cachep->num);
3539
3540 obj = slab_get_obj(cachep, slabp, nodeid);
3541 check_slabp(cachep, slabp);
3542 l3->free_objects--;
3543
3544 list_del(&slabp->list);
3545
3546 if (slabp->free == BUFCTL_END)
3547 list_add(&slabp->list, &l3->slabs_full);
3548 else
3549 list_add(&slabp->list, &l3->slabs_partial);
3550
3551 spin_unlock(&l3->list_lock);
3552 goto done;
3553
3554must_grow:
3555 spin_unlock(&l3->list_lock);
3556 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3557 if (x)
3558 goto retry;
3559
3560 return fallback_alloc(cachep, flags);
3561
3562done:
3563 return obj;
3564}
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578static __always_inline void *
3579__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3580 void *caller)
3581{
3582 unsigned long save_flags;
3583 void *ptr;
3584 int slab_node = numa_mem_id();
3585
3586 flags &= gfp_allowed_mask;
3587
3588 lockdep_trace_alloc(flags);
3589
3590 if (slab_should_failslab(cachep, flags))
3591 return NULL;
3592
3593 cache_alloc_debugcheck_before(cachep, flags);
3594 local_irq_save(save_flags);
3595
3596 if (nodeid == NUMA_NO_NODE)
3597 nodeid = slab_node;
3598
3599 if (unlikely(!cachep->nodelists[nodeid])) {
3600
3601 ptr = fallback_alloc(cachep, flags);
3602 goto out;
3603 }
3604
3605 if (nodeid == slab_node) {
3606
3607
3608
3609
3610
3611
3612 ptr = ____cache_alloc(cachep, flags);
3613 if (ptr)
3614 goto out;
3615 }
3616
3617 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3618 out:
3619 local_irq_restore(save_flags);
3620 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3621 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
3622 flags);
3623
3624 if (likely(ptr))
3625 kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
3626
3627 if (unlikely((flags & __GFP_ZERO) && ptr))
3628 memset(ptr, 0, cachep->object_size);
3629
3630 return ptr;
3631}
3632
3633static __always_inline void *
3634__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3635{
3636 void *objp;
3637
3638 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3639 objp = alternate_node_alloc(cache, flags);
3640 if (objp)
3641 goto out;
3642 }
3643 objp = ____cache_alloc(cache, flags);
3644
3645
3646
3647
3648
3649 if (!objp)
3650 objp = ____cache_alloc_node(cache, flags, numa_mem_id());
3651
3652 out:
3653 return objp;
3654}
3655#else
3656
3657static __always_inline void *
3658__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3659{
3660 return ____cache_alloc(cachep, flags);
3661}
3662
3663#endif
3664
3665static __always_inline void *
3666__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3667{
3668 unsigned long save_flags;
3669 void *objp;
3670
3671 flags &= gfp_allowed_mask;
3672
3673 lockdep_trace_alloc(flags);
3674
3675 if (slab_should_failslab(cachep, flags))
3676 return NULL;
3677
3678 cache_alloc_debugcheck_before(cachep, flags);
3679 local_irq_save(save_flags);
3680 objp = __do_cache_alloc(cachep, flags);
3681 local_irq_restore(save_flags);
3682 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3683 kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
3684 flags);
3685 prefetchw(objp);
3686
3687 if (likely(objp))
3688 kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
3689
3690 if (unlikely((flags & __GFP_ZERO) && objp))
3691 memset(objp, 0, cachep->object_size);
3692
3693 return objp;
3694}
3695
3696
3697
3698
3699static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3700 int node)
3701{
3702 int i;
3703 struct kmem_list3 *l3;
3704
3705 for (i = 0; i < nr_objects; i++) {
3706 void *objp;
3707 struct slab *slabp;
3708
3709 clear_obj_pfmemalloc(&objpp[i]);
3710 objp = objpp[i];
3711
3712 slabp = virt_to_slab(objp);
3713 l3 = cachep->nodelists[node];
3714 list_del(&slabp->list);
3715 check_spinlock_acquired_node(cachep, node);
3716 check_slabp(cachep, slabp);
3717 slab_put_obj(cachep, slabp, objp, node);
3718 STATS_DEC_ACTIVE(cachep);
3719 l3->free_objects++;
3720 check_slabp(cachep, slabp);
3721
3722
3723 if (slabp->inuse == 0) {
3724 if (l3->free_objects > l3->free_limit) {
3725 l3->free_objects -= cachep->num;
3726
3727
3728
3729
3730
3731
3732 slab_destroy(cachep, slabp);
3733 } else {
3734 list_add(&slabp->list, &l3->slabs_free);
3735 }
3736 } else {
3737
3738
3739
3740
3741 list_add_tail(&slabp->list, &l3->slabs_partial);
3742 }
3743 }
3744}
3745
3746static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3747{
3748 int batchcount;
3749 struct kmem_list3 *l3;
3750 int node = numa_mem_id();
3751
3752 batchcount = ac->batchcount;
3753#if DEBUG
3754 BUG_ON(!batchcount || batchcount > ac->avail);
3755#endif
3756 check_irq_off();
3757 l3 = cachep->nodelists[node];
3758 spin_lock(&l3->list_lock);
3759 if (l3->shared) {
3760 struct array_cache *shared_array = l3->shared;
3761 int max = shared_array->limit - shared_array->avail;
3762 if (max) {
3763 if (batchcount > max)
3764 batchcount = max;
3765 memcpy(&(shared_array->entry[shared_array->avail]),
3766 ac->entry, sizeof(void *) * batchcount);
3767 shared_array->avail += batchcount;
3768 goto free_done;
3769 }
3770 }
3771
3772 free_block(cachep, ac->entry, batchcount, node);
3773free_done:
3774#if STATS
3775 {
3776 int i = 0;
3777 struct list_head *p;
3778
3779 p = l3->slabs_free.next;
3780 while (p != &(l3->slabs_free)) {
3781 struct slab *slabp;
3782
3783 slabp = list_entry(p, struct slab, list);
3784 BUG_ON(slabp->inuse);
3785
3786 i++;
3787 p = p->next;
3788 }
3789 STATS_SET_FREEABLE(cachep, i);
3790 }
3791#endif
3792 spin_unlock(&l3->list_lock);
3793 ac->avail -= batchcount;
3794 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3795}
3796
3797
3798
3799
3800
3801static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3802 void *caller)
3803{
3804 struct array_cache *ac = cpu_cache_get(cachep);
3805
3806 check_irq_off();
3807 kmemleak_free_recursive(objp, cachep->flags);
3808 objp = cache_free_debugcheck(cachep, objp, caller);
3809
3810 kmemcheck_slab_free(cachep, objp, cachep->object_size);
3811
3812
3813
3814
3815
3816
3817
3818
3819 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3820 return;
3821
3822 if (likely(ac->avail < ac->limit)) {
3823 STATS_INC_FREEHIT(cachep);
3824 } else {
3825 STATS_INC_FREEMISS(cachep);
3826 cache_flusharray(cachep, ac);
3827 }
3828
3829 ac_put_obj(cachep, ac, objp);
3830}
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3841{
3842 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3843
3844 trace_kmem_cache_alloc(_RET_IP_, ret,
3845 cachep->object_size, cachep->size, flags);
3846
3847 return ret;
3848}
3849EXPORT_SYMBOL(kmem_cache_alloc);
3850
3851#ifdef CONFIG_TRACING
3852void *
3853kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
3854{
3855 void *ret;
3856
3857 ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3858
3859 trace_kmalloc(_RET_IP_, ret,
3860 size, slab_buffer_size(cachep), flags);
3861 return ret;
3862}
3863EXPORT_SYMBOL(kmem_cache_alloc_trace);
3864#endif
3865
3866#ifdef CONFIG_NUMA
3867void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3868{
3869 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3870 __builtin_return_address(0));
3871
3872 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3873 cachep->object_size, cachep->size,
3874 flags, nodeid);
3875
3876 return ret;
3877}
3878EXPORT_SYMBOL(kmem_cache_alloc_node);
3879
3880#ifdef CONFIG_TRACING
3881void *kmem_cache_alloc_node_trace(size_t size,
3882 struct kmem_cache *cachep,
3883 gfp_t flags,
3884 int nodeid)
3885{
3886 void *ret;
3887
3888 ret = __cache_alloc_node(cachep, flags, nodeid,
3889 __builtin_return_address(0));
3890 trace_kmalloc_node(_RET_IP_, ret,
3891 size, slab_buffer_size(cachep),
3892 flags, nodeid);
3893 return ret;
3894}
3895EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3896#endif
3897
3898static __always_inline void *
3899__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3900{
3901 struct kmem_cache *cachep;
3902
3903 cachep = kmem_find_general_cachep(size, flags);
3904 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3905 return cachep;
3906 return kmem_cache_alloc_node_trace(size, cachep, flags, node);
3907}
3908
3909#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3910void *__kmalloc_node(size_t size, gfp_t flags, int node)
3911{
3912 return __do_kmalloc_node(size, flags, node,
3913 __builtin_return_address(0));
3914}
3915EXPORT_SYMBOL(__kmalloc_node);
3916
3917void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3918 int node, unsigned long caller)
3919{
3920 return __do_kmalloc_node(size, flags, node, (void *)caller);
3921}
3922EXPORT_SYMBOL(__kmalloc_node_track_caller);
3923#else
3924void *__kmalloc_node(size_t size, gfp_t flags, int node)
3925{
3926 return __do_kmalloc_node(size, flags, node, NULL);
3927}
3928EXPORT_SYMBOL(__kmalloc_node);
3929#endif
3930#endif
3931
3932
3933
3934
3935
3936
3937
3938static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3939 void *caller)
3940{
3941 struct kmem_cache *cachep;
3942 void *ret;
3943
3944
3945
3946
3947
3948
3949 cachep = __find_general_cachep(size, flags);
3950 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3951 return cachep;
3952 ret = __cache_alloc(cachep, flags, caller);
3953
3954 trace_kmalloc((unsigned long) caller, ret,
3955 size, cachep->size, flags);
3956
3957 return ret;
3958}
3959
3960
3961#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3962void *__kmalloc(size_t size, gfp_t flags)
3963{
3964 return __do_kmalloc(size, flags, __builtin_return_address(0));
3965}
3966EXPORT_SYMBOL(__kmalloc);
3967
3968void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3969{
3970 return __do_kmalloc(size, flags, (void *)caller);
3971}
3972EXPORT_SYMBOL(__kmalloc_track_caller);
3973
3974#else
3975void *__kmalloc(size_t size, gfp_t flags)
3976{
3977 return __do_kmalloc(size, flags, NULL);
3978}
3979EXPORT_SYMBOL(__kmalloc);
3980#endif
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3991{
3992 unsigned long flags;
3993
3994 local_irq_save(flags);
3995 debug_check_no_locks_freed(objp, cachep->object_size);
3996 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3997 debug_check_no_obj_freed(objp, cachep->object_size);
3998 __cache_free(cachep, objp, __builtin_return_address(0));
3999 local_irq_restore(flags);
4000
4001 trace_kmem_cache_free(_RET_IP_, objp);
4002}
4003EXPORT_SYMBOL(kmem_cache_free);
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014void kfree(const void *objp)
4015{
4016 struct kmem_cache *c;
4017 unsigned long flags;
4018
4019 trace_kfree(_RET_IP_, objp);
4020
4021 if (unlikely(ZERO_OR_NULL_PTR(objp)))
4022 return;
4023 local_irq_save(flags);
4024 kfree_debugcheck(objp);
4025 c = virt_to_cache(objp);
4026 debug_check_no_locks_freed(objp, c->object_size);
4027
4028 debug_check_no_obj_freed(objp, c->object_size);
4029 __cache_free(c, (void *)objp, __builtin_return_address(0));
4030 local_irq_restore(flags);
4031}
4032EXPORT_SYMBOL(kfree);
4033
4034unsigned int kmem_cache_size(struct kmem_cache *cachep)
4035{
4036 return cachep->object_size;
4037}
4038EXPORT_SYMBOL(kmem_cache_size);
4039
4040
4041
4042
4043static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
4044{
4045 int node;
4046 struct kmem_list3 *l3;
4047 struct array_cache *new_shared;
4048 struct array_cache **new_alien = NULL;
4049
4050 for_each_online_node(node) {
4051
4052 if (use_alien_caches) {
4053 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
4054 if (!new_alien)
4055 goto fail;
4056 }
4057
4058 new_shared = NULL;
4059 if (cachep->shared) {
4060 new_shared = alloc_arraycache(node,
4061 cachep->shared*cachep->batchcount,
4062 0xbaadf00d, gfp);
4063 if (!new_shared) {
4064 free_alien_cache(new_alien);
4065 goto fail;
4066 }
4067 }
4068
4069 l3 = cachep->nodelists[node];
4070 if (l3) {
4071 struct array_cache *shared = l3->shared;
4072
4073 spin_lock_irq(&l3->list_lock);
4074
4075 if (shared)
4076 free_block(cachep, shared->entry,
4077 shared->avail, node);
4078
4079 l3->shared = new_shared;
4080 if (!l3->alien) {
4081 l3->alien = new_alien;
4082 new_alien = NULL;
4083 }
4084 l3->free_limit = (1 + nr_cpus_node(node)) *
4085 cachep->batchcount + cachep->num;
4086 spin_unlock_irq(&l3->list_lock);
4087 kfree(shared);
4088 free_alien_cache(new_alien);
4089 continue;
4090 }
4091 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
4092 if (!l3) {
4093 free_alien_cache(new_alien);
4094 kfree(new_shared);
4095 goto fail;
4096 }
4097
4098 kmem_list3_init(l3);
4099 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
4100 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
4101 l3->shared = new_shared;
4102 l3->alien = new_alien;
4103 l3->free_limit = (1 + nr_cpus_node(node)) *
4104 cachep->batchcount + cachep->num;
4105 cachep->nodelists[node] = l3;
4106 }
4107 return 0;
4108
4109fail:
4110 if (!cachep->list.next) {
4111
4112 node--;
4113 while (node >= 0) {
4114 if (cachep->nodelists[node]) {
4115 l3 = cachep->nodelists[node];
4116
4117 kfree(l3->shared);
4118 free_alien_cache(l3->alien);
4119 kfree(l3);
4120 cachep->nodelists[node] = NULL;
4121 }
4122 node--;
4123 }
4124 }
4125 return -ENOMEM;
4126}
4127
4128struct ccupdate_struct {
4129 struct kmem_cache *cachep;
4130 struct array_cache *new[0];
4131};
4132
4133static void do_ccupdate_local(void *info)
4134{
4135 struct ccupdate_struct *new = info;
4136 struct array_cache *old;
4137
4138 check_irq_off();
4139 old = cpu_cache_get(new->cachep);
4140
4141 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
4142 new->new[smp_processor_id()] = old;
4143}
4144
4145
4146static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
4147 int batchcount, int shared, gfp_t gfp)
4148{
4149 struct ccupdate_struct *new;
4150 int i;
4151
4152 new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
4153 gfp);
4154 if (!new)
4155 return -ENOMEM;
4156
4157 for_each_online_cpu(i) {
4158 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
4159 batchcount, gfp);
4160 if (!new->new[i]) {
4161 for (i--; i >= 0; i--)
4162 kfree(new->new[i]);
4163 kfree(new);
4164 return -ENOMEM;
4165 }
4166 }
4167 new->cachep = cachep;
4168
4169 on_each_cpu(do_ccupdate_local, (void *)new, 1);
4170
4171 check_irq_on();
4172 cachep->batchcount = batchcount;
4173 cachep->limit = limit;
4174 cachep->shared = shared;
4175
4176 for_each_online_cpu(i) {
4177 struct array_cache *ccold = new->new[i];
4178 if (!ccold)
4179 continue;
4180 spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
4181 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
4182 spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
4183 kfree(ccold);
4184 }
4185 kfree(new);
4186 return alloc_kmemlist(cachep, gfp);
4187}
4188
4189
4190static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4191{
4192 int err;
4193 int limit, shared;
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204 if (cachep->size > 131072)
4205 limit = 1;
4206 else if (cachep->size > PAGE_SIZE)
4207 limit = 8;
4208 else if (cachep->size > 1024)
4209 limit = 24;
4210 else if (cachep->size > 256)
4211 limit = 54;
4212 else
4213 limit = 120;
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224 shared = 0;
4225 if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
4226 shared = 8;
4227
4228#if DEBUG
4229
4230
4231
4232
4233 if (limit > 32)
4234 limit = 32;
4235#endif
4236 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
4237 if (err)
4238 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4239 cachep->name, -err);
4240 return err;
4241}
4242
4243
4244
4245
4246
4247
4248static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4249 struct array_cache *ac, int force, int node)
4250{
4251 int tofree;
4252
4253 if (!ac || !ac->avail)
4254 return;
4255 if (ac->touched && !force) {
4256 ac->touched = 0;
4257 } else {
4258 spin_lock_irq(&l3->list_lock);
4259 if (ac->avail) {
4260 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4261 if (tofree > ac->avail)
4262 tofree = (ac->avail + 1) / 2;
4263 free_block(cachep, ac->entry, tofree, node);
4264 ac->avail -= tofree;
4265 memmove(ac->entry, &(ac->entry[tofree]),
4266 sizeof(void *) * ac->avail);
4267 }
4268 spin_unlock_irq(&l3->list_lock);
4269 }
4270}
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284static void cache_reap(struct work_struct *w)
4285{
4286 struct kmem_cache *searchp;
4287 struct kmem_list3 *l3;
4288 int node = numa_mem_id();
4289 struct delayed_work *work = to_delayed_work(w);
4290
4291 if (!mutex_trylock(&slab_mutex))
4292
4293 goto out;
4294
4295 list_for_each_entry(searchp, &slab_caches, list) {
4296 check_irq_on();
4297
4298
4299
4300
4301
4302
4303 l3 = searchp->nodelists[node];
4304
4305 reap_alien(searchp, l3);
4306
4307 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4308
4309
4310
4311
4312
4313 if (time_after(l3->next_reap, jiffies))
4314 goto next;
4315
4316 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4317
4318 drain_array(searchp, l3, l3->shared, 0, node);
4319
4320 if (l3->free_touched)
4321 l3->free_touched = 0;
4322 else {
4323 int freed;
4324
4325 freed = drain_freelist(searchp, l3, (l3->free_limit +
4326 5 * searchp->num - 1) / (5 * searchp->num));
4327 STATS_ADD_REAPED(searchp, freed);
4328 }
4329next:
4330 cond_resched();
4331 }
4332 check_irq_on();
4333 mutex_unlock(&slab_mutex);
4334 next_reap_node();
4335out:
4336
4337 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4338}
4339
4340#ifdef CONFIG_SLABINFO
4341
4342static void print_slabinfo_header(struct seq_file *m)
4343{
4344
4345
4346
4347
4348#if STATS
4349 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4350#else
4351 seq_puts(m, "slabinfo - version: 2.1\n");
4352#endif
4353 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4354 "<objperslab> <pagesperslab>");
4355 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4356 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4357#if STATS
4358 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4359 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4360 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4361#endif
4362 seq_putc(m, '\n');
4363}
4364
4365static void *s_start(struct seq_file *m, loff_t *pos)
4366{
4367 loff_t n = *pos;
4368
4369 mutex_lock(&slab_mutex);
4370 if (!n)
4371 print_slabinfo_header(m);
4372
4373 return seq_list_start(&slab_caches, *pos);
4374}
4375
4376static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4377{
4378 return seq_list_next(p, &slab_caches, pos);
4379}
4380
4381static void s_stop(struct seq_file *m, void *p)
4382{
4383 mutex_unlock(&slab_mutex);
4384}
4385
4386static int s_show(struct seq_file *m, void *p)
4387{
4388 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4389 struct slab *slabp;
4390 unsigned long active_objs;
4391 unsigned long num_objs;
4392 unsigned long active_slabs = 0;
4393 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4394 const char *name;
4395 char *error = NULL;
4396 int node;
4397 struct kmem_list3 *l3;
4398
4399 active_objs = 0;
4400 num_slabs = 0;
4401 for_each_online_node(node) {
4402 l3 = cachep->nodelists[node];
4403 if (!l3)
4404 continue;
4405
4406 check_irq_on();
4407 spin_lock_irq(&l3->list_lock);
4408
4409 list_for_each_entry(slabp, &l3->slabs_full, list) {
4410 if (slabp->inuse != cachep->num && !error)
4411 error = "slabs_full accounting error";
4412 active_objs += cachep->num;
4413 active_slabs++;
4414 }
4415 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4416 if (slabp->inuse == cachep->num && !error)
4417 error = "slabs_partial inuse accounting error";
4418 if (!slabp->inuse && !error)
4419 error = "slabs_partial/inuse accounting error";
4420 active_objs += slabp->inuse;
4421 active_slabs++;
4422 }
4423 list_for_each_entry(slabp, &l3->slabs_free, list) {
4424 if (slabp->inuse && !error)
4425 error = "slabs_free/inuse accounting error";
4426 num_slabs++;
4427 }
4428 free_objects += l3->free_objects;
4429 if (l3->shared)
4430 shared_avail += l3->shared->avail;
4431
4432 spin_unlock_irq(&l3->list_lock);
4433 }
4434 num_slabs += active_slabs;
4435 num_objs = num_slabs * cachep->num;
4436 if (num_objs - active_objs != free_objects && !error)
4437 error = "free_objects accounting error";
4438
4439 name = cachep->name;
4440 if (error)
4441 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4442
4443 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4444 name, active_objs, num_objs, cachep->size,
4445 cachep->num, (1 << cachep->gfporder));
4446 seq_printf(m, " : tunables %4u %4u %4u",
4447 cachep->limit, cachep->batchcount, cachep->shared);
4448 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4449 active_slabs, num_slabs, shared_avail);
4450#if STATS
4451 {
4452 unsigned long high = cachep->high_mark;
4453 unsigned long allocs = cachep->num_allocations;
4454 unsigned long grown = cachep->grown;
4455 unsigned long reaped = cachep->reaped;
4456 unsigned long errors = cachep->errors;
4457 unsigned long max_freeable = cachep->max_freeable;
4458 unsigned long node_allocs = cachep->node_allocs;
4459 unsigned long node_frees = cachep->node_frees;
4460 unsigned long overflows = cachep->node_overflow;
4461
4462 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
4463 "%4lu %4lu %4lu %4lu %4lu",
4464 allocs, high, grown,
4465 reaped, errors, max_freeable, node_allocs,
4466 node_frees, overflows);
4467 }
4468
4469 {
4470 unsigned long allochit = atomic_read(&cachep->allochit);
4471 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4472 unsigned long freehit = atomic_read(&cachep->freehit);
4473 unsigned long freemiss = atomic_read(&cachep->freemiss);
4474
4475 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4476 allochit, allocmiss, freehit, freemiss);
4477 }
4478#endif
4479 seq_putc(m, '\n');
4480 return 0;
4481}
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497static const struct seq_operations slabinfo_op = {
4498 .start = s_start,
4499 .next = s_next,
4500 .stop = s_stop,
4501 .show = s_show,
4502};
4503
4504#define MAX_SLABINFO_WRITE 128
4505
4506
4507
4508
4509
4510
4511
4512static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4513 size_t count, loff_t *ppos)
4514{
4515 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4516 int limit, batchcount, shared, res;
4517 struct kmem_cache *cachep;
4518
4519 if (count > MAX_SLABINFO_WRITE)
4520 return -EINVAL;
4521 if (copy_from_user(&kbuf, buffer, count))
4522 return -EFAULT;
4523 kbuf[MAX_SLABINFO_WRITE] = '\0';
4524
4525 tmp = strchr(kbuf, ' ');
4526 if (!tmp)
4527 return -EINVAL;
4528 *tmp = '\0';
4529 tmp++;
4530 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4531 return -EINVAL;
4532
4533
4534 mutex_lock(&slab_mutex);
4535 res = -EINVAL;
4536 list_for_each_entry(cachep, &slab_caches, list) {
4537 if (!strcmp(cachep->name, kbuf)) {
4538 if (limit < 1 || batchcount < 1 ||
4539 batchcount > limit || shared < 0) {
4540 res = 0;
4541 } else {
4542 res = do_tune_cpucache(cachep, limit,
4543 batchcount, shared,
4544 GFP_KERNEL);
4545 }
4546 break;
4547 }
4548 }
4549 mutex_unlock(&slab_mutex);
4550 if (res >= 0)
4551 res = count;
4552 return res;
4553}
4554
4555static int slabinfo_open(struct inode *inode, struct file *file)
4556{
4557 return seq_open(file, &slabinfo_op);
4558}
4559
4560static const struct file_operations proc_slabinfo_operations = {
4561 .open = slabinfo_open,
4562 .read = seq_read,
4563 .write = slabinfo_write,
4564 .llseek = seq_lseek,
4565 .release = seq_release,
4566};
4567
4568#ifdef CONFIG_DEBUG_SLAB_LEAK
4569
4570static void *leaks_start(struct seq_file *m, loff_t *pos)
4571{
4572 mutex_lock(&slab_mutex);
4573 return seq_list_start(&slab_caches, *pos);
4574}
4575
4576static inline int add_caller(unsigned long *n, unsigned long v)
4577{
4578 unsigned long *p;
4579 int l;
4580 if (!v)
4581 return 1;
4582 l = n[1];
4583 p = n + 2;
4584 while (l) {
4585 int i = l/2;
4586 unsigned long *q = p + 2 * i;
4587 if (*q == v) {
4588 q[1]++;
4589 return 1;
4590 }
4591 if (*q > v) {
4592 l = i;
4593 } else {
4594 p = q + 2;
4595 l -= i + 1;
4596 }
4597 }
4598 if (++n[1] == n[0])
4599 return 0;
4600 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4601 p[0] = v;
4602 p[1] = 1;
4603 return 1;
4604}
4605
4606static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4607{
4608 void *p;
4609 int i;
4610 if (n[0] == n[1])
4611 return;
4612 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
4613 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4614 continue;
4615 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4616 return;
4617 }
4618}
4619
4620static void show_symbol(struct seq_file *m, unsigned long address)
4621{
4622#ifdef CONFIG_KALLSYMS
4623 unsigned long offset, size;
4624 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4625
4626 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4627 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4628 if (modname[0])
4629 seq_printf(m, " [%s]", modname);
4630 return;
4631 }
4632#endif
4633 seq_printf(m, "%p", (void *)address);
4634}
4635
4636static int leaks_show(struct seq_file *m, void *p)
4637{
4638 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4639 struct slab *slabp;
4640 struct kmem_list3 *l3;
4641 const char *name;
4642 unsigned long *n = m->private;
4643 int node;
4644 int i;
4645
4646 if (!(cachep->flags & SLAB_STORE_USER))
4647 return 0;
4648 if (!(cachep->flags & SLAB_RED_ZONE))
4649 return 0;
4650
4651
4652
4653 n[1] = 0;
4654
4655 for_each_online_node(node) {
4656 l3 = cachep->nodelists[node];
4657 if (!l3)
4658 continue;
4659
4660 check_irq_on();
4661 spin_lock_irq(&l3->list_lock);
4662
4663 list_for_each_entry(slabp, &l3->slabs_full, list)
4664 handle_slab(n, cachep, slabp);
4665 list_for_each_entry(slabp, &l3->slabs_partial, list)
4666 handle_slab(n, cachep, slabp);
4667 spin_unlock_irq(&l3->list_lock);
4668 }
4669 name = cachep->name;
4670 if (n[0] == n[1]) {
4671
4672 mutex_unlock(&slab_mutex);
4673 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4674 if (!m->private) {
4675
4676 m->private = n;
4677 mutex_lock(&slab_mutex);
4678 return -ENOMEM;
4679 }
4680 *(unsigned long *)m->private = n[0] * 2;
4681 kfree(n);
4682 mutex_lock(&slab_mutex);
4683
4684 m->count = m->size;
4685 return 0;
4686 }
4687 for (i = 0; i < n[1]; i++) {
4688 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4689 show_symbol(m, n[2*i+2]);
4690 seq_putc(m, '\n');
4691 }
4692
4693 return 0;
4694}
4695
4696static const struct seq_operations slabstats_op = {
4697 .start = leaks_start,
4698 .next = s_next,
4699 .stop = s_stop,
4700 .show = leaks_show,
4701};
4702
4703static int slabstats_open(struct inode *inode, struct file *file)
4704{
4705 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4706 int ret = -ENOMEM;
4707 if (n) {
4708 ret = seq_open(file, &slabstats_op);
4709 if (!ret) {
4710 struct seq_file *m = file->private_data;
4711 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4712 m->private = n;
4713 n = NULL;
4714 }
4715 kfree(n);
4716 }
4717 return ret;
4718}
4719
4720static const struct file_operations proc_slabstats_operations = {
4721 .open = slabstats_open,
4722 .read = seq_read,
4723 .llseek = seq_lseek,
4724 .release = seq_release_private,
4725};
4726#endif
4727
4728static int __init slab_proc_init(void)
4729{
4730 proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations);
4731#ifdef CONFIG_DEBUG_SLAB_LEAK
4732 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4733#endif
4734 return 0;
4735}
4736module_init(slab_proc_init);
4737#endif
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751size_t ksize(const void *objp)
4752{
4753 BUG_ON(!objp);
4754 if (unlikely(objp == ZERO_SIZE_PTR))
4755 return 0;
4756
4757 return virt_to_cache(objp)->object_size;
4758}
4759EXPORT_SYMBOL(ksize);
4760