1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/kmemcheck.h>
22#include <linux/cpu.h>
23#include <linux/cpuset.h>
24#include <linux/mempolicy.h>
25#include <linux/ctype.h>
26#include <linux/debugobjects.h>
27#include <linux/kallsyms.h>
28#include <linux/memory.h>
29#include <linux/math64.h>
30#include <linux/fault-inject.h>
31#include <linux/stacktrace.h>
32
33#include <trace/events/kmem.h>
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
112 SLAB_TRACE | SLAB_DEBUG_FREE)
113
114static inline int kmem_cache_debug(struct kmem_cache *s)
115{
116#ifdef CONFIG_SLUB_DEBUG
117 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
118#else
119 return 0;
120#endif
121}
122
123
124
125
126
127
128
129
130
131
132#undef SLUB_RESILIENCY_TEST
133
134
135#undef SLUB_DEBUG_CMPXCHG
136
137
138
139
140
141#define MIN_PARTIAL 5
142
143
144
145
146
147
148#define MAX_PARTIAL 10
149
150#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
151 SLAB_POISON | SLAB_STORE_USER)
152
153
154
155
156
157
158#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
159
160
161
162
163#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
164 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
165 SLAB_FAILSLAB)
166
167#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
168 SLAB_CACHE_DMA | SLAB_NOTRACK)
169
170#define OO_SHIFT 16
171#define OO_MASK ((1 << OO_SHIFT) - 1)
172#define MAX_OBJS_PER_PAGE 32767
173
174
175#define __OBJECT_POISON 0x80000000UL
176#define __CMPXCHG_DOUBLE 0x40000000UL
177
178static int kmem_size = sizeof(struct kmem_cache);
179
180#ifdef CONFIG_SMP
181static struct notifier_block slab_notifier;
182#endif
183
184static enum {
185 DOWN,
186 PARTIAL,
187 UP,
188 SYSFS
189} slab_state = DOWN;
190
191
192static DECLARE_RWSEM(slub_lock);
193static LIST_HEAD(slab_caches);
194
195
196
197
198#define TRACK_ADDRS_COUNT 16
199struct track {
200 unsigned long addr;
201#ifdef CONFIG_STACKTRACE
202 unsigned long addrs[TRACK_ADDRS_COUNT];
203#endif
204 int cpu;
205 int pid;
206 unsigned long when;
207};
208
209enum track_item { TRACK_ALLOC, TRACK_FREE };
210
211#ifdef CONFIG_SYSFS
212static int sysfs_slab_add(struct kmem_cache *);
213static int sysfs_slab_alias(struct kmem_cache *, const char *);
214static void sysfs_slab_remove(struct kmem_cache *);
215
216#else
217static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
218static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
219 { return 0; }
220static inline void sysfs_slab_remove(struct kmem_cache *s)
221{
222 kfree(s->name);
223 kfree(s);
224}
225
226#endif
227
228static inline void stat(const struct kmem_cache *s, enum stat_item si)
229{
230#ifdef CONFIG_SLUB_STATS
231 __this_cpu_inc(s->cpu_slab->stat[si]);
232#endif
233}
234
235
236
237
238
239int slab_is_available(void)
240{
241 return slab_state >= UP;
242}
243
244static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
245{
246 return s->node[node];
247}
248
249
250static inline int check_valid_pointer(struct kmem_cache *s,
251 struct page *page, const void *object)
252{
253 void *base;
254
255 if (!object)
256 return 1;
257
258 base = page_address(page);
259 if (object < base || object >= base + page->objects * s->size ||
260 (object - base) % s->size) {
261 return 0;
262 }
263
264 return 1;
265}
266
267static inline void *get_freepointer(struct kmem_cache *s, void *object)
268{
269 return *(void **)(object + s->offset);
270}
271
272static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
273{
274 void *p;
275
276#ifdef CONFIG_DEBUG_PAGEALLOC
277 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
278#else
279 p = get_freepointer(s, object);
280#endif
281 return p;
282}
283
284static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
285{
286 *(void **)(object + s->offset) = fp;
287}
288
289
290#define for_each_object(__p, __s, __addr, __objects) \
291 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
292 __p += (__s)->size)
293
294
295static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
296{
297 return (p - addr) / s->size;
298}
299
300static inline size_t slab_ksize(const struct kmem_cache *s)
301{
302#ifdef CONFIG_SLUB_DEBUG
303
304
305
306
307 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
308 return s->objsize;
309
310#endif
311
312
313
314
315
316 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
317 return s->inuse;
318
319
320
321 return s->size;
322}
323
324static inline int order_objects(int order, unsigned long size, int reserved)
325{
326 return ((PAGE_SIZE << order) - reserved) / size;
327}
328
329static inline struct kmem_cache_order_objects oo_make(int order,
330 unsigned long size, int reserved)
331{
332 struct kmem_cache_order_objects x = {
333 (order << OO_SHIFT) + order_objects(order, size, reserved)
334 };
335
336 return x;
337}
338
339static inline int oo_order(struct kmem_cache_order_objects x)
340{
341 return x.x >> OO_SHIFT;
342}
343
344static inline int oo_objects(struct kmem_cache_order_objects x)
345{
346 return x.x & OO_MASK;
347}
348
349
350
351
352static __always_inline void slab_lock(struct page *page)
353{
354 bit_spin_lock(PG_locked, &page->flags);
355}
356
357static __always_inline void slab_unlock(struct page *page)
358{
359 __bit_spin_unlock(PG_locked, &page->flags);
360}
361
362
363static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
364 void *freelist_old, unsigned long counters_old,
365 void *freelist_new, unsigned long counters_new,
366 const char *n)
367{
368 VM_BUG_ON(!irqs_disabled());
369#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
370 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
371 if (s->flags & __CMPXCHG_DOUBLE) {
372 if (cmpxchg_double(&page->freelist, &page->counters,
373 freelist_old, counters_old,
374 freelist_new, counters_new))
375 return 1;
376 } else
377#endif
378 {
379 slab_lock(page);
380 if (page->freelist == freelist_old && page->counters == counters_old) {
381 page->freelist = freelist_new;
382 page->counters = counters_new;
383 slab_unlock(page);
384 return 1;
385 }
386 slab_unlock(page);
387 }
388
389 cpu_relax();
390 stat(s, CMPXCHG_DOUBLE_FAIL);
391
392#ifdef SLUB_DEBUG_CMPXCHG
393 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
394#endif
395
396 return 0;
397}
398
399static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
400 void *freelist_old, unsigned long counters_old,
401 void *freelist_new, unsigned long counters_new,
402 const char *n)
403{
404#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
405 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
406 if (s->flags & __CMPXCHG_DOUBLE) {
407 if (cmpxchg_double(&page->freelist, &page->counters,
408 freelist_old, counters_old,
409 freelist_new, counters_new))
410 return 1;
411 } else
412#endif
413 {
414 unsigned long flags;
415
416 local_irq_save(flags);
417 slab_lock(page);
418 if (page->freelist == freelist_old && page->counters == counters_old) {
419 page->freelist = freelist_new;
420 page->counters = counters_new;
421 slab_unlock(page);
422 local_irq_restore(flags);
423 return 1;
424 }
425 slab_unlock(page);
426 local_irq_restore(flags);
427 }
428
429 cpu_relax();
430 stat(s, CMPXCHG_DOUBLE_FAIL);
431
432#ifdef SLUB_DEBUG_CMPXCHG
433 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
434#endif
435
436 return 0;
437}
438
439#ifdef CONFIG_SLUB_DEBUG
440
441
442
443
444
445
446static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
447{
448 void *p;
449 void *addr = page_address(page);
450
451 for (p = page->freelist; p; p = get_freepointer(s, p))
452 set_bit(slab_index(p, s, addr), map);
453}
454
455
456
457
458#ifdef CONFIG_SLUB_DEBUG_ON
459static int slub_debug = DEBUG_DEFAULT_FLAGS;
460#else
461static int slub_debug;
462#endif
463
464static char *slub_debug_slabs;
465static int disable_higher_order_debug;
466
467
468
469
470static void print_section(char *text, u8 *addr, unsigned int length)
471{
472 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
473 length, 1);
474}
475
476static struct track *get_track(struct kmem_cache *s, void *object,
477 enum track_item alloc)
478{
479 struct track *p;
480
481 if (s->offset)
482 p = object + s->offset + sizeof(void *);
483 else
484 p = object + s->inuse;
485
486 return p + alloc;
487}
488
489static void set_track(struct kmem_cache *s, void *object,
490 enum track_item alloc, unsigned long addr)
491{
492 struct track *p = get_track(s, object, alloc);
493
494 if (addr) {
495#ifdef CONFIG_STACKTRACE
496 struct stack_trace trace;
497 int i;
498
499 trace.nr_entries = 0;
500 trace.max_entries = TRACK_ADDRS_COUNT;
501 trace.entries = p->addrs;
502 trace.skip = 3;
503 save_stack_trace(&trace);
504
505
506 if (trace.nr_entries != 0 &&
507 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
508 trace.nr_entries--;
509
510 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
511 p->addrs[i] = 0;
512#endif
513 p->addr = addr;
514 p->cpu = smp_processor_id();
515 p->pid = current->pid;
516 p->when = jiffies;
517 } else
518 memset(p, 0, sizeof(struct track));
519}
520
521static void init_tracking(struct kmem_cache *s, void *object)
522{
523 if (!(s->flags & SLAB_STORE_USER))
524 return;
525
526 set_track(s, object, TRACK_FREE, 0UL);
527 set_track(s, object, TRACK_ALLOC, 0UL);
528}
529
530static void print_track(const char *s, struct track *t)
531{
532 if (!t->addr)
533 return;
534
535 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
536 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
537#ifdef CONFIG_STACKTRACE
538 {
539 int i;
540 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
541 if (t->addrs[i])
542 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
543 else
544 break;
545 }
546#endif
547}
548
549static void print_tracking(struct kmem_cache *s, void *object)
550{
551 if (!(s->flags & SLAB_STORE_USER))
552 return;
553
554 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
555 print_track("Freed", get_track(s, object, TRACK_FREE));
556}
557
558static void print_page_info(struct page *page)
559{
560 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
561 page, page->objects, page->inuse, page->freelist, page->flags);
562
563}
564
565static void slab_bug(struct kmem_cache *s, char *fmt, ...)
566{
567 va_list args;
568 char buf[100];
569
570 va_start(args, fmt);
571 vsnprintf(buf, sizeof(buf), fmt, args);
572 va_end(args);
573 printk(KERN_ERR "========================================"
574 "=====================================\n");
575 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
576 printk(KERN_ERR "----------------------------------------"
577 "-------------------------------------\n\n");
578}
579
580static void slab_fix(struct kmem_cache *s, char *fmt, ...)
581{
582 va_list args;
583 char buf[100];
584
585 va_start(args, fmt);
586 vsnprintf(buf, sizeof(buf), fmt, args);
587 va_end(args);
588 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
589}
590
591static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
592{
593 unsigned int off;
594 u8 *addr = page_address(page);
595
596 print_tracking(s, p);
597
598 print_page_info(page);
599
600 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
601 p, p - addr, get_freepointer(s, p));
602
603 if (p > addr + 16)
604 print_section("Bytes b4 ", p - 16, 16);
605
606 print_section("Object ", p, min_t(unsigned long, s->objsize,
607 PAGE_SIZE));
608 if (s->flags & SLAB_RED_ZONE)
609 print_section("Redzone ", p + s->objsize,
610 s->inuse - s->objsize);
611
612 if (s->offset)
613 off = s->offset + sizeof(void *);
614 else
615 off = s->inuse;
616
617 if (s->flags & SLAB_STORE_USER)
618 off += 2 * sizeof(struct track);
619
620 if (off != s->size)
621
622 print_section("Padding ", p + off, s->size - off);
623
624 dump_stack();
625}
626
627static void object_err(struct kmem_cache *s, struct page *page,
628 u8 *object, char *reason)
629{
630 slab_bug(s, "%s", reason);
631 print_trailer(s, page, object);
632}
633
634static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
635{
636 va_list args;
637 char buf[100];
638
639 va_start(args, fmt);
640 vsnprintf(buf, sizeof(buf), fmt, args);
641 va_end(args);
642 slab_bug(s, "%s", buf);
643 print_page_info(page);
644 dump_stack();
645}
646
647static void init_object(struct kmem_cache *s, void *object, u8 val)
648{
649 u8 *p = object;
650
651 if (s->flags & __OBJECT_POISON) {
652 memset(p, POISON_FREE, s->objsize - 1);
653 p[s->objsize - 1] = POISON_END;
654 }
655
656 if (s->flags & SLAB_RED_ZONE)
657 memset(p + s->objsize, val, s->inuse - s->objsize);
658}
659
660static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
661 void *from, void *to)
662{
663 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
664 memset(from, data, to - from);
665}
666
667static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
668 u8 *object, char *what,
669 u8 *start, unsigned int value, unsigned int bytes)
670{
671 u8 *fault;
672 u8 *end;
673
674 fault = memchr_inv(start, value, bytes);
675 if (!fault)
676 return 1;
677
678 end = start + bytes;
679 while (end > fault && end[-1] == value)
680 end--;
681
682 slab_bug(s, "%s overwritten", what);
683 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
684 fault, end - 1, fault[0], value);
685 print_trailer(s, page, object);
686
687 restore_bytes(s, what, value, fault, end);
688 return 0;
689}
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
730{
731 unsigned long off = s->inuse;
732
733 if (s->offset)
734
735 off += sizeof(void *);
736
737 if (s->flags & SLAB_STORE_USER)
738
739 off += 2 * sizeof(struct track);
740
741 if (s->size == off)
742 return 1;
743
744 return check_bytes_and_report(s, page, p, "Object padding",
745 p + off, POISON_INUSE, s->size - off);
746}
747
748
749static int slab_pad_check(struct kmem_cache *s, struct page *page)
750{
751 u8 *start;
752 u8 *fault;
753 u8 *end;
754 int length;
755 int remainder;
756
757 if (!(s->flags & SLAB_POISON))
758 return 1;
759
760 start = page_address(page);
761 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
762 end = start + length;
763 remainder = length % s->size;
764 if (!remainder)
765 return 1;
766
767 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
768 if (!fault)
769 return 1;
770 while (end > fault && end[-1] == POISON_INUSE)
771 end--;
772
773 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
774 print_section("Padding ", end - remainder, remainder);
775
776 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
777 return 0;
778}
779
780static int check_object(struct kmem_cache *s, struct page *page,
781 void *object, u8 val)
782{
783 u8 *p = object;
784 u8 *endobject = object + s->objsize;
785
786 if (s->flags & SLAB_RED_ZONE) {
787 if (!check_bytes_and_report(s, page, object, "Redzone",
788 endobject, val, s->inuse - s->objsize))
789 return 0;
790 } else {
791 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
792 check_bytes_and_report(s, page, p, "Alignment padding",
793 endobject, POISON_INUSE, s->inuse - s->objsize);
794 }
795 }
796
797 if (s->flags & SLAB_POISON) {
798 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
799 (!check_bytes_and_report(s, page, p, "Poison", p,
800 POISON_FREE, s->objsize - 1) ||
801 !check_bytes_and_report(s, page, p, "Poison",
802 p + s->objsize - 1, POISON_END, 1)))
803 return 0;
804
805
806
807 check_pad_bytes(s, page, p);
808 }
809
810 if (!s->offset && val == SLUB_RED_ACTIVE)
811
812
813
814
815 return 1;
816
817
818 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
819 object_err(s, page, p, "Freepointer corrupt");
820
821
822
823
824
825 set_freepointer(s, p, NULL);
826 return 0;
827 }
828 return 1;
829}
830
831static int check_slab(struct kmem_cache *s, struct page *page)
832{
833 int maxobj;
834
835 VM_BUG_ON(!irqs_disabled());
836
837 if (!PageSlab(page)) {
838 slab_err(s, page, "Not a valid slab page");
839 return 0;
840 }
841
842 maxobj = order_objects(compound_order(page), s->size, s->reserved);
843 if (page->objects > maxobj) {
844 slab_err(s, page, "objects %u > max %u",
845 s->name, page->objects, maxobj);
846 return 0;
847 }
848 if (page->inuse > page->objects) {
849 slab_err(s, page, "inuse %u > max %u",
850 s->name, page->inuse, page->objects);
851 return 0;
852 }
853
854 slab_pad_check(s, page);
855 return 1;
856}
857
858
859
860
861
862static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
863{
864 int nr = 0;
865 void *fp;
866 void *object = NULL;
867 unsigned long max_objects;
868
869 fp = page->freelist;
870 while (fp && nr <= page->objects) {
871 if (fp == search)
872 return 1;
873 if (!check_valid_pointer(s, page, fp)) {
874 if (object) {
875 object_err(s, page, object,
876 "Freechain corrupt");
877 set_freepointer(s, object, NULL);
878 break;
879 } else {
880 slab_err(s, page, "Freepointer corrupt");
881 page->freelist = NULL;
882 page->inuse = page->objects;
883 slab_fix(s, "Freelist cleared");
884 return 0;
885 }
886 break;
887 }
888 object = fp;
889 fp = get_freepointer(s, object);
890 nr++;
891 }
892
893 max_objects = order_objects(compound_order(page), s->size, s->reserved);
894 if (max_objects > MAX_OBJS_PER_PAGE)
895 max_objects = MAX_OBJS_PER_PAGE;
896
897 if (page->objects != max_objects) {
898 slab_err(s, page, "Wrong number of objects. Found %d but "
899 "should be %d", page->objects, max_objects);
900 page->objects = max_objects;
901 slab_fix(s, "Number of objects adjusted.");
902 }
903 if (page->inuse != page->objects - nr) {
904 slab_err(s, page, "Wrong object count. Counter is %d but "
905 "counted were %d", page->inuse, page->objects - nr);
906 page->inuse = page->objects - nr;
907 slab_fix(s, "Object count adjusted.");
908 }
909 return search == NULL;
910}
911
912static void trace(struct kmem_cache *s, struct page *page, void *object,
913 int alloc)
914{
915 if (s->flags & SLAB_TRACE) {
916 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
917 s->name,
918 alloc ? "alloc" : "free",
919 object, page->inuse,
920 page->freelist);
921
922 if (!alloc)
923 print_section("Object ", (void *)object, s->objsize);
924
925 dump_stack();
926 }
927}
928
929
930
931
932
933static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
934{
935 flags &= gfp_allowed_mask;
936 lockdep_trace_alloc(flags);
937 might_sleep_if(flags & __GFP_WAIT);
938
939 return should_failslab(s->objsize, flags, s->flags);
940}
941
942static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
943{
944 flags &= gfp_allowed_mask;
945 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
946 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
947}
948
949static inline void slab_free_hook(struct kmem_cache *s, void *x)
950{
951 kmemleak_free_recursive(x, s->flags);
952
953
954
955
956
957
958#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
959 {
960 unsigned long flags;
961
962 local_irq_save(flags);
963 kmemcheck_slab_free(s, x, s->objsize);
964 debug_check_no_locks_freed(x, s->objsize);
965 local_irq_restore(flags);
966 }
967#endif
968 if (!(s->flags & SLAB_DEBUG_OBJECTS))
969 debug_check_no_obj_freed(x, s->objsize);
970}
971
972
973
974
975
976
977static void add_full(struct kmem_cache *s,
978 struct kmem_cache_node *n, struct page *page)
979{
980 if (!(s->flags & SLAB_STORE_USER))
981 return;
982
983 list_add(&page->lru, &n->full);
984}
985
986
987
988
989static void remove_full(struct kmem_cache *s, struct page *page)
990{
991 if (!(s->flags & SLAB_STORE_USER))
992 return;
993
994 list_del(&page->lru);
995}
996
997
998static inline unsigned long slabs_node(struct kmem_cache *s, int node)
999{
1000 struct kmem_cache_node *n = get_node(s, node);
1001
1002 return atomic_long_read(&n->nr_slabs);
1003}
1004
1005static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1006{
1007 return atomic_long_read(&n->nr_slabs);
1008}
1009
1010static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1011{
1012 struct kmem_cache_node *n = get_node(s, node);
1013
1014
1015
1016
1017
1018
1019
1020 if (n) {
1021 atomic_long_inc(&n->nr_slabs);
1022 atomic_long_add(objects, &n->total_objects);
1023 }
1024}
1025static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1026{
1027 struct kmem_cache_node *n = get_node(s, node);
1028
1029 atomic_long_dec(&n->nr_slabs);
1030 atomic_long_sub(objects, &n->total_objects);
1031}
1032
1033
1034static void setup_object_debug(struct kmem_cache *s, struct page *page,
1035 void *object)
1036{
1037 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1038 return;
1039
1040 init_object(s, object, SLUB_RED_INACTIVE);
1041 init_tracking(s, object);
1042}
1043
1044static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1045 void *object, unsigned long addr)
1046{
1047 if (!check_slab(s, page))
1048 goto bad;
1049
1050 if (!check_valid_pointer(s, page, object)) {
1051 object_err(s, page, object, "Freelist Pointer check fails");
1052 goto bad;
1053 }
1054
1055 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1056 goto bad;
1057
1058
1059 if (s->flags & SLAB_STORE_USER)
1060 set_track(s, object, TRACK_ALLOC, addr);
1061 trace(s, page, object, 1);
1062 init_object(s, object, SLUB_RED_ACTIVE);
1063 return 1;
1064
1065bad:
1066 if (PageSlab(page)) {
1067
1068
1069
1070
1071
1072 slab_fix(s, "Marking all objects used");
1073 page->inuse = page->objects;
1074 page->freelist = NULL;
1075 }
1076 return 0;
1077}
1078
1079static noinline int free_debug_processing(struct kmem_cache *s,
1080 struct page *page, void *object, unsigned long addr)
1081{
1082 unsigned long flags;
1083 int rc = 0;
1084
1085 local_irq_save(flags);
1086 slab_lock(page);
1087
1088 if (!check_slab(s, page))
1089 goto fail;
1090
1091 if (!check_valid_pointer(s, page, object)) {
1092 slab_err(s, page, "Invalid object pointer 0x%p", object);
1093 goto fail;
1094 }
1095
1096 if (on_freelist(s, page, object)) {
1097 object_err(s, page, object, "Object already free");
1098 goto fail;
1099 }
1100
1101 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1102 goto out;
1103
1104 if (unlikely(s != page->slab)) {
1105 if (!PageSlab(page)) {
1106 slab_err(s, page, "Attempt to free object(0x%p) "
1107 "outside of slab", object);
1108 } else if (!page->slab) {
1109 printk(KERN_ERR
1110 "SLUB <none>: no slab for object 0x%p.\n",
1111 object);
1112 dump_stack();
1113 } else
1114 object_err(s, page, object,
1115 "page slab pointer corrupt.");
1116 goto fail;
1117 }
1118
1119 if (s->flags & SLAB_STORE_USER)
1120 set_track(s, object, TRACK_FREE, addr);
1121 trace(s, page, object, 0);
1122 init_object(s, object, SLUB_RED_INACTIVE);
1123 rc = 1;
1124out:
1125 slab_unlock(page);
1126 local_irq_restore(flags);
1127 return rc;
1128
1129fail:
1130 slab_fix(s, "Object at 0x%p not freed", object);
1131 goto out;
1132}
1133
1134static int __init setup_slub_debug(char *str)
1135{
1136 slub_debug = DEBUG_DEFAULT_FLAGS;
1137 if (*str++ != '=' || !*str)
1138
1139
1140
1141 goto out;
1142
1143 if (*str == ',')
1144
1145
1146
1147
1148 goto check_slabs;
1149
1150 if (tolower(*str) == 'o') {
1151
1152
1153
1154
1155 disable_higher_order_debug = 1;
1156 goto out;
1157 }
1158
1159 slub_debug = 0;
1160 if (*str == '-')
1161
1162
1163
1164 goto out;
1165
1166
1167
1168
1169 for (; *str && *str != ','; str++) {
1170 switch (tolower(*str)) {
1171 case 'f':
1172 slub_debug |= SLAB_DEBUG_FREE;
1173 break;
1174 case 'z':
1175 slub_debug |= SLAB_RED_ZONE;
1176 break;
1177 case 'p':
1178 slub_debug |= SLAB_POISON;
1179 break;
1180 case 'u':
1181 slub_debug |= SLAB_STORE_USER;
1182 break;
1183 case 't':
1184 slub_debug |= SLAB_TRACE;
1185 break;
1186 case 'a':
1187 slub_debug |= SLAB_FAILSLAB;
1188 break;
1189 default:
1190 printk(KERN_ERR "slub_debug option '%c' "
1191 "unknown. skipped\n", *str);
1192 }
1193 }
1194
1195check_slabs:
1196 if (*str == ',')
1197 slub_debug_slabs = str + 1;
1198out:
1199 return 1;
1200}
1201
1202__setup("slub_debug", setup_slub_debug);
1203
1204static unsigned long kmem_cache_flags(unsigned long objsize,
1205 unsigned long flags, const char *name,
1206 void (*ctor)(void *))
1207{
1208
1209
1210
1211 if (slub_debug && (!slub_debug_slabs ||
1212 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1213 flags |= slub_debug;
1214
1215 return flags;
1216}
1217#else
1218static inline void setup_object_debug(struct kmem_cache *s,
1219 struct page *page, void *object) {}
1220
1221static inline int alloc_debug_processing(struct kmem_cache *s,
1222 struct page *page, void *object, unsigned long addr) { return 0; }
1223
1224static inline int free_debug_processing(struct kmem_cache *s,
1225 struct page *page, void *object, unsigned long addr) { return 0; }
1226
1227static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1228 { return 1; }
1229static inline int check_object(struct kmem_cache *s, struct page *page,
1230 void *object, u8 val) { return 1; }
1231static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1232 struct page *page) {}
1233static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1234static inline unsigned long kmem_cache_flags(unsigned long objsize,
1235 unsigned long flags, const char *name,
1236 void (*ctor)(void *))
1237{
1238 return flags;
1239}
1240#define slub_debug 0
1241
1242#define disable_higher_order_debug 0
1243
1244static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1245 { return 0; }
1246static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1247 { return 0; }
1248static inline void inc_slabs_node(struct kmem_cache *s, int node,
1249 int objects) {}
1250static inline void dec_slabs_node(struct kmem_cache *s, int node,
1251 int objects) {}
1252
1253static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1254 { return 0; }
1255
1256static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1257 void *object) {}
1258
1259static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1260
1261#endif
1262
1263
1264
1265
1266static inline struct page *alloc_slab_page(gfp_t flags, int node,
1267 struct kmem_cache_order_objects oo)
1268{
1269 int order = oo_order(oo);
1270
1271 flags |= __GFP_NOTRACK;
1272
1273 if (node == NUMA_NO_NODE)
1274 return alloc_pages(flags, order);
1275 else
1276 return alloc_pages_exact_node(node, flags, order);
1277}
1278
1279static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1280{
1281 struct page *page;
1282 struct kmem_cache_order_objects oo = s->oo;
1283 gfp_t alloc_gfp;
1284
1285 flags &= gfp_allowed_mask;
1286
1287 if (flags & __GFP_WAIT)
1288 local_irq_enable();
1289
1290 flags |= s->allocflags;
1291
1292
1293
1294
1295
1296 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1297
1298 page = alloc_slab_page(alloc_gfp, node, oo);
1299 if (unlikely(!page)) {
1300 oo = s->min;
1301
1302
1303
1304
1305 page = alloc_slab_page(flags, node, oo);
1306
1307 if (page)
1308 stat(s, ORDER_FALLBACK);
1309 }
1310
1311 if (flags & __GFP_WAIT)
1312 local_irq_disable();
1313
1314 if (!page)
1315 return NULL;
1316
1317 if (kmemcheck_enabled
1318 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1319 int pages = 1 << oo_order(oo);
1320
1321 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1322
1323
1324
1325
1326
1327 if (s->ctor)
1328 kmemcheck_mark_uninitialized_pages(page, pages);
1329 else
1330 kmemcheck_mark_unallocated_pages(page, pages);
1331 }
1332
1333 page->objects = oo_objects(oo);
1334 mod_zone_page_state(page_zone(page),
1335 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1336 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1337 1 << oo_order(oo));
1338
1339 return page;
1340}
1341
1342static void setup_object(struct kmem_cache *s, struct page *page,
1343 void *object)
1344{
1345 setup_object_debug(s, page, object);
1346 if (unlikely(s->ctor))
1347 s->ctor(object);
1348}
1349
1350static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1351{
1352 struct page *page;
1353 void *start;
1354 void *last;
1355 void *p;
1356
1357 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1358
1359 page = allocate_slab(s,
1360 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1361 if (!page)
1362 goto out;
1363
1364 inc_slabs_node(s, page_to_nid(page), page->objects);
1365 page->slab = s;
1366 page->flags |= 1 << PG_slab;
1367
1368 start = page_address(page);
1369
1370 if (unlikely(s->flags & SLAB_POISON))
1371 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1372
1373 last = start;
1374 for_each_object(p, s, start, page->objects) {
1375 setup_object(s, page, last);
1376 set_freepointer(s, last, p);
1377 last = p;
1378 }
1379 setup_object(s, page, last);
1380 set_freepointer(s, last, NULL);
1381
1382 page->freelist = start;
1383 page->inuse = page->objects;
1384 page->frozen = 1;
1385out:
1386 return page;
1387}
1388
1389static void __free_slab(struct kmem_cache *s, struct page *page)
1390{
1391 int order = compound_order(page);
1392 int pages = 1 << order;
1393
1394 if (kmem_cache_debug(s)) {
1395 void *p;
1396
1397 slab_pad_check(s, page);
1398 for_each_object(p, s, page_address(page),
1399 page->objects)
1400 check_object(s, page, p, SLUB_RED_INACTIVE);
1401 }
1402
1403 kmemcheck_free_shadow(page, compound_order(page));
1404
1405 mod_zone_page_state(page_zone(page),
1406 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1407 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1408 -pages);
1409
1410 __ClearPageSlab(page);
1411 reset_page_mapcount(page);
1412 if (current->reclaim_state)
1413 current->reclaim_state->reclaimed_slab += pages;
1414 __free_pages(page, order);
1415}
1416
1417#define need_reserve_slab_rcu \
1418 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1419
1420static void rcu_free_slab(struct rcu_head *h)
1421{
1422 struct page *page;
1423
1424 if (need_reserve_slab_rcu)
1425 page = virt_to_head_page(h);
1426 else
1427 page = container_of((struct list_head *)h, struct page, lru);
1428
1429 __free_slab(page->slab, page);
1430}
1431
1432static void free_slab(struct kmem_cache *s, struct page *page)
1433{
1434 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1435 struct rcu_head *head;
1436
1437 if (need_reserve_slab_rcu) {
1438 int order = compound_order(page);
1439 int offset = (PAGE_SIZE << order) - s->reserved;
1440
1441 VM_BUG_ON(s->reserved != sizeof(*head));
1442 head = page_address(page) + offset;
1443 } else {
1444
1445
1446
1447 head = (void *)&page->lru;
1448 }
1449
1450 call_rcu(head, rcu_free_slab);
1451 } else
1452 __free_slab(s, page);
1453}
1454
1455static void discard_slab(struct kmem_cache *s, struct page *page)
1456{
1457 dec_slabs_node(s, page_to_nid(page), page->objects);
1458 free_slab(s, page);
1459}
1460
1461
1462
1463
1464
1465
1466static inline void add_partial(struct kmem_cache_node *n,
1467 struct page *page, int tail)
1468{
1469 n->nr_partial++;
1470 if (tail == DEACTIVATE_TO_TAIL)
1471 list_add_tail(&page->lru, &n->partial);
1472 else
1473 list_add(&page->lru, &n->partial);
1474}
1475
1476
1477
1478
1479static inline void remove_partial(struct kmem_cache_node *n,
1480 struct page *page)
1481{
1482 list_del(&page->lru);
1483 n->nr_partial--;
1484}
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494static inline void *acquire_slab(struct kmem_cache *s,
1495 struct kmem_cache_node *n, struct page *page,
1496 int mode)
1497{
1498 void *freelist;
1499 unsigned long counters;
1500 struct page new;
1501
1502
1503
1504
1505
1506
1507 do {
1508 freelist = page->freelist;
1509 counters = page->counters;
1510 new.counters = counters;
1511 if (mode)
1512 new.inuse = page->objects;
1513
1514 VM_BUG_ON(new.frozen);
1515 new.frozen = 1;
1516
1517 } while (!__cmpxchg_double_slab(s, page,
1518 freelist, counters,
1519 NULL, new.counters,
1520 "lock and freeze"));
1521
1522 remove_partial(n, page);
1523 return freelist;
1524}
1525
1526static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1527
1528
1529
1530
1531static void *get_partial_node(struct kmem_cache *s,
1532 struct kmem_cache_node *n, struct kmem_cache_cpu *c)
1533{
1534 struct page *page, *page2;
1535 void *object = NULL;
1536
1537
1538
1539
1540
1541
1542
1543 if (!n || !n->nr_partial)
1544 return NULL;
1545
1546 spin_lock(&n->list_lock);
1547 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1548 void *t = acquire_slab(s, n, page, object == NULL);
1549 int available;
1550
1551 if (!t)
1552 break;
1553
1554 if (!object) {
1555 c->page = page;
1556 c->node = page_to_nid(page);
1557 stat(s, ALLOC_FROM_PARTIAL);
1558 object = t;
1559 available = page->objects - page->inuse;
1560 } else {
1561 page->freelist = t;
1562 available = put_cpu_partial(s, page, 0);
1563 }
1564 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1565 break;
1566
1567 }
1568 spin_unlock(&n->list_lock);
1569 return object;
1570}
1571
1572
1573
1574
1575static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
1576 struct kmem_cache_cpu *c)
1577{
1578#ifdef CONFIG_NUMA
1579 struct zonelist *zonelist;
1580 struct zoneref *z;
1581 struct zone *zone;
1582 enum zone_type high_zoneidx = gfp_zone(flags);
1583 void *object;
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603 if (!s->remote_node_defrag_ratio ||
1604 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1605 return NULL;
1606
1607 get_mems_allowed();
1608 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1609 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1610 struct kmem_cache_node *n;
1611
1612 n = get_node(s, zone_to_nid(zone));
1613
1614 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1615 n->nr_partial > s->min_partial) {
1616 object = get_partial_node(s, n, c);
1617 if (object) {
1618 put_mems_allowed();
1619 return object;
1620 }
1621 }
1622 }
1623 put_mems_allowed();
1624#endif
1625 return NULL;
1626}
1627
1628
1629
1630
1631static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1632 struct kmem_cache_cpu *c)
1633{
1634 void *object;
1635 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1636
1637 object = get_partial_node(s, get_node(s, searchnode), c);
1638 if (object || node != NUMA_NO_NODE)
1639 return object;
1640
1641 return get_any_partial(s, flags, c);
1642}
1643
1644#ifdef CONFIG_PREEMPT
1645
1646
1647
1648
1649
1650#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1651#else
1652
1653
1654
1655
1656#define TID_STEP 1
1657#endif
1658
1659static inline unsigned long next_tid(unsigned long tid)
1660{
1661 return tid + TID_STEP;
1662}
1663
1664static inline unsigned int tid_to_cpu(unsigned long tid)
1665{
1666 return tid % TID_STEP;
1667}
1668
1669static inline unsigned long tid_to_event(unsigned long tid)
1670{
1671 return tid / TID_STEP;
1672}
1673
1674static inline unsigned int init_tid(int cpu)
1675{
1676 return cpu;
1677}
1678
1679static inline void note_cmpxchg_failure(const char *n,
1680 const struct kmem_cache *s, unsigned long tid)
1681{
1682#ifdef SLUB_DEBUG_CMPXCHG
1683 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1684
1685 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1686
1687#ifdef CONFIG_PREEMPT
1688 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1689 printk("due to cpu change %d -> %d\n",
1690 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1691 else
1692#endif
1693 if (tid_to_event(tid) != tid_to_event(actual_tid))
1694 printk("due to cpu running other code. Event %ld->%ld\n",
1695 tid_to_event(tid), tid_to_event(actual_tid));
1696 else
1697 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1698 actual_tid, tid, next_tid(tid));
1699#endif
1700 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1701}
1702
1703void init_kmem_cache_cpus(struct kmem_cache *s)
1704{
1705 int cpu;
1706
1707 for_each_possible_cpu(cpu)
1708 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1709}
1710
1711
1712
1713
1714static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1715{
1716 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1717 struct page *page = c->page;
1718 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1719 int lock = 0;
1720 enum slab_modes l = M_NONE, m = M_NONE;
1721 void *freelist;
1722 void *nextfree;
1723 int tail = DEACTIVATE_TO_HEAD;
1724 struct page new;
1725 struct page old;
1726
1727 if (page->freelist) {
1728 stat(s, DEACTIVATE_REMOTE_FREES);
1729 tail = DEACTIVATE_TO_TAIL;
1730 }
1731
1732 c->tid = next_tid(c->tid);
1733 c->page = NULL;
1734 freelist = c->freelist;
1735 c->freelist = NULL;
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1746 void *prior;
1747 unsigned long counters;
1748
1749 do {
1750 prior = page->freelist;
1751 counters = page->counters;
1752 set_freepointer(s, freelist, prior);
1753 new.counters = counters;
1754 new.inuse--;
1755 VM_BUG_ON(!new.frozen);
1756
1757 } while (!__cmpxchg_double_slab(s, page,
1758 prior, counters,
1759 freelist, new.counters,
1760 "drain percpu freelist"));
1761
1762 freelist = nextfree;
1763 }
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779redo:
1780
1781 old.freelist = page->freelist;
1782 old.counters = page->counters;
1783 VM_BUG_ON(!old.frozen);
1784
1785
1786 new.counters = old.counters;
1787 if (freelist) {
1788 new.inuse--;
1789 set_freepointer(s, freelist, old.freelist);
1790 new.freelist = freelist;
1791 } else
1792 new.freelist = old.freelist;
1793
1794 new.frozen = 0;
1795
1796 if (!new.inuse && n->nr_partial > s->min_partial)
1797 m = M_FREE;
1798 else if (new.freelist) {
1799 m = M_PARTIAL;
1800 if (!lock) {
1801 lock = 1;
1802
1803
1804
1805
1806
1807 spin_lock(&n->list_lock);
1808 }
1809 } else {
1810 m = M_FULL;
1811 if (kmem_cache_debug(s) && !lock) {
1812 lock = 1;
1813
1814
1815
1816
1817
1818 spin_lock(&n->list_lock);
1819 }
1820 }
1821
1822 if (l != m) {
1823
1824 if (l == M_PARTIAL)
1825
1826 remove_partial(n, page);
1827
1828 else if (l == M_FULL)
1829
1830 remove_full(s, page);
1831
1832 if (m == M_PARTIAL) {
1833
1834 add_partial(n, page, tail);
1835 stat(s, tail);
1836
1837 } else if (m == M_FULL) {
1838
1839 stat(s, DEACTIVATE_FULL);
1840 add_full(s, n, page);
1841
1842 }
1843 }
1844
1845 l = m;
1846 if (!__cmpxchg_double_slab(s, page,
1847 old.freelist, old.counters,
1848 new.freelist, new.counters,
1849 "unfreezing slab"))
1850 goto redo;
1851
1852 if (lock)
1853 spin_unlock(&n->list_lock);
1854
1855 if (m == M_FREE) {
1856 stat(s, DEACTIVATE_EMPTY);
1857 discard_slab(s, page);
1858 stat(s, FREE_SLAB);
1859 }
1860}
1861
1862
1863static void unfreeze_partials(struct kmem_cache *s)
1864{
1865 struct kmem_cache_node *n = NULL;
1866 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1867 struct page *page, *discard_page = NULL;
1868
1869 while ((page = c->partial)) {
1870 enum slab_modes { M_PARTIAL, M_FREE };
1871 enum slab_modes l, m;
1872 struct page new;
1873 struct page old;
1874
1875 c->partial = page->next;
1876 l = M_FREE;
1877
1878 do {
1879
1880 old.freelist = page->freelist;
1881 old.counters = page->counters;
1882 VM_BUG_ON(!old.frozen);
1883
1884 new.counters = old.counters;
1885 new.freelist = old.freelist;
1886
1887 new.frozen = 0;
1888
1889 if (!new.inuse && (!n || n->nr_partial > s->min_partial))
1890 m = M_FREE;
1891 else {
1892 struct kmem_cache_node *n2 = get_node(s,
1893 page_to_nid(page));
1894
1895 m = M_PARTIAL;
1896 if (n != n2) {
1897 if (n)
1898 spin_unlock(&n->list_lock);
1899
1900 n = n2;
1901 spin_lock(&n->list_lock);
1902 }
1903 }
1904
1905 if (l != m) {
1906 if (l == M_PARTIAL) {
1907 remove_partial(n, page);
1908 stat(s, FREE_REMOVE_PARTIAL);
1909 } else {
1910 add_partial(n, page,
1911 DEACTIVATE_TO_TAIL);
1912 stat(s, FREE_ADD_PARTIAL);
1913 }
1914
1915 l = m;
1916 }
1917
1918 } while (!cmpxchg_double_slab(s, page,
1919 old.freelist, old.counters,
1920 new.freelist, new.counters,
1921 "unfreezing slab"));
1922
1923 if (m == M_FREE) {
1924 page->next = discard_page;
1925 discard_page = page;
1926 }
1927 }
1928
1929 if (n)
1930 spin_unlock(&n->list_lock);
1931
1932 while (discard_page) {
1933 page = discard_page;
1934 discard_page = discard_page->next;
1935
1936 stat(s, DEACTIVATE_EMPTY);
1937 discard_slab(s, page);
1938 stat(s, FREE_SLAB);
1939 }
1940}
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1952{
1953 struct page *oldpage;
1954 int pages;
1955 int pobjects;
1956
1957 do {
1958 pages = 0;
1959 pobjects = 0;
1960 oldpage = this_cpu_read(s->cpu_slab->partial);
1961
1962 if (oldpage) {
1963 pobjects = oldpage->pobjects;
1964 pages = oldpage->pages;
1965 if (drain && pobjects > s->cpu_partial) {
1966 unsigned long flags;
1967
1968
1969
1970
1971 local_irq_save(flags);
1972 unfreeze_partials(s);
1973 local_irq_restore(flags);
1974 pobjects = 0;
1975 pages = 0;
1976 }
1977 }
1978
1979 pages++;
1980 pobjects += page->objects - page->inuse;
1981
1982 page->pages = pages;
1983 page->pobjects = pobjects;
1984 page->next = oldpage;
1985
1986 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1987 stat(s, CPU_PARTIAL_FREE);
1988 return pobjects;
1989}
1990
1991static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1992{
1993 stat(s, CPUSLAB_FLUSH);
1994 deactivate_slab(s, c);
1995}
1996
1997
1998
1999
2000
2001
2002static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2003{
2004 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2005
2006 if (likely(c)) {
2007 if (c->page)
2008 flush_slab(s, c);
2009
2010 unfreeze_partials(s);
2011 }
2012}
2013
2014static void flush_cpu_slab(void *d)
2015{
2016 struct kmem_cache *s = d;
2017
2018 __flush_cpu_slab(s, smp_processor_id());
2019}
2020
2021static void flush_all(struct kmem_cache *s)
2022{
2023 on_each_cpu(flush_cpu_slab, s, 1);
2024}
2025
2026
2027
2028
2029
2030static inline int node_match(struct kmem_cache_cpu *c, int node)
2031{
2032#ifdef CONFIG_NUMA
2033 if (node != NUMA_NO_NODE && c->node != node)
2034 return 0;
2035#endif
2036 return 1;
2037}
2038
2039static int count_free(struct page *page)
2040{
2041 return page->objects - page->inuse;
2042}
2043
2044static unsigned long count_partial(struct kmem_cache_node *n,
2045 int (*get_count)(struct page *))
2046{
2047 unsigned long flags;
2048 unsigned long x = 0;
2049 struct page *page;
2050
2051 spin_lock_irqsave(&n->list_lock, flags);
2052 list_for_each_entry(page, &n->partial, lru)
2053 x += get_count(page);
2054 spin_unlock_irqrestore(&n->list_lock, flags);
2055 return x;
2056}
2057
2058static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2059{
2060#ifdef CONFIG_SLUB_DEBUG
2061 return atomic_long_read(&n->total_objects);
2062#else
2063 return 0;
2064#endif
2065}
2066
2067static noinline void
2068slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2069{
2070 int node;
2071
2072 printk(KERN_WARNING
2073 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2074 nid, gfpflags);
2075 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2076 "default order: %d, min order: %d\n", s->name, s->objsize,
2077 s->size, oo_order(s->oo), oo_order(s->min));
2078
2079 if (oo_order(s->min) > get_order(s->objsize))
2080 printk(KERN_WARNING " %s debugging increased min order, use "
2081 "slub_debug=O to disable.\n", s->name);
2082
2083 for_each_online_node(node) {
2084 struct kmem_cache_node *n = get_node(s, node);
2085 unsigned long nr_slabs;
2086 unsigned long nr_objs;
2087 unsigned long nr_free;
2088
2089 if (!n)
2090 continue;
2091
2092 nr_free = count_partial(n, count_free);
2093 nr_slabs = node_nr_slabs(n);
2094 nr_objs = node_nr_objs(n);
2095
2096 printk(KERN_WARNING
2097 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2098 node, nr_slabs, nr_objs, nr_free);
2099 }
2100}
2101
2102static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2103 int node, struct kmem_cache_cpu **pc)
2104{
2105 void *object;
2106 struct kmem_cache_cpu *c;
2107 struct page *page = new_slab(s, flags, node);
2108
2109 if (page) {
2110 c = __this_cpu_ptr(s->cpu_slab);
2111 if (c->page)
2112 flush_slab(s, c);
2113
2114
2115
2116
2117
2118 object = page->freelist;
2119 page->freelist = NULL;
2120
2121 stat(s, ALLOC_SLAB);
2122 c->node = page_to_nid(page);
2123 c->page = page;
2124 *pc = c;
2125 } else
2126 object = NULL;
2127
2128 return object;
2129}
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2140{
2141 struct page new;
2142 unsigned long counters;
2143 void *freelist;
2144
2145 do {
2146 freelist = page->freelist;
2147 counters = page->counters;
2148 new.counters = counters;
2149 VM_BUG_ON(!new.frozen);
2150
2151 new.inuse = page->objects;
2152 new.frozen = freelist != NULL;
2153
2154 } while (!cmpxchg_double_slab(s, page,
2155 freelist, counters,
2156 NULL, new.counters,
2157 "get_freelist"));
2158
2159 return freelist;
2160}
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2179 unsigned long addr, struct kmem_cache_cpu *c)
2180{
2181 void **object;
2182 unsigned long flags;
2183
2184 local_irq_save(flags);
2185#ifdef CONFIG_PREEMPT
2186
2187
2188
2189
2190
2191 c = this_cpu_ptr(s->cpu_slab);
2192#endif
2193
2194 if (!c->page)
2195 goto new_slab;
2196redo:
2197 if (unlikely(!node_match(c, node))) {
2198 stat(s, ALLOC_NODE_MISMATCH);
2199 deactivate_slab(s, c);
2200 goto new_slab;
2201 }
2202
2203
2204 object = c->freelist;
2205 if (object)
2206 goto load_freelist;
2207
2208 stat(s, ALLOC_SLOWPATH);
2209
2210 object = get_freelist(s, c->page);
2211
2212 if (!object) {
2213 c->page = NULL;
2214 stat(s, DEACTIVATE_BYPASS);
2215 goto new_slab;
2216 }
2217
2218 stat(s, ALLOC_REFILL);
2219
2220load_freelist:
2221 c->freelist = get_freepointer(s, object);
2222 c->tid = next_tid(c->tid);
2223 local_irq_restore(flags);
2224 return object;
2225
2226new_slab:
2227
2228 if (c->partial) {
2229 c->page = c->partial;
2230 c->partial = c->page->next;
2231 c->node = page_to_nid(c->page);
2232 stat(s, CPU_PARTIAL_ALLOC);
2233 c->freelist = NULL;
2234 goto redo;
2235 }
2236
2237
2238 object = get_partial(s, gfpflags, node, c);
2239
2240 if (unlikely(!object)) {
2241
2242 object = new_slab_objects(s, gfpflags, node, &c);
2243
2244 if (unlikely(!object)) {
2245 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2246 slab_out_of_memory(s, gfpflags, node);
2247
2248 local_irq_restore(flags);
2249 return NULL;
2250 }
2251 }
2252
2253 if (likely(!kmem_cache_debug(s)))
2254 goto load_freelist;
2255
2256
2257 if (!alloc_debug_processing(s, c->page, object, addr))
2258 goto new_slab;
2259
2260 c->freelist = get_freepointer(s, object);
2261 deactivate_slab(s, c);
2262 c->node = NUMA_NO_NODE;
2263 local_irq_restore(flags);
2264 return object;
2265}
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277static __always_inline void *slab_alloc(struct kmem_cache *s,
2278 gfp_t gfpflags, int node, unsigned long addr)
2279{
2280 void **object;
2281 struct kmem_cache_cpu *c;
2282 unsigned long tid;
2283
2284 if (slab_pre_alloc_hook(s, gfpflags))
2285 return NULL;
2286
2287redo:
2288
2289
2290
2291
2292
2293
2294
2295 c = __this_cpu_ptr(s->cpu_slab);
2296
2297
2298
2299
2300
2301
2302
2303 tid = c->tid;
2304 barrier();
2305
2306 object = c->freelist;
2307 if (unlikely(!object || !node_match(c, node)))
2308
2309 object = __slab_alloc(s, gfpflags, node, addr, c);
2310
2311 else {
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324 if (unlikely(!this_cpu_cmpxchg_double(
2325 s->cpu_slab->freelist, s->cpu_slab->tid,
2326 object, tid,
2327 get_freepointer_safe(s, object), next_tid(tid)))) {
2328
2329 note_cmpxchg_failure("slab_alloc", s, tid);
2330 goto redo;
2331 }
2332 stat(s, ALLOC_FASTPATH);
2333 }
2334
2335 if (unlikely(gfpflags & __GFP_ZERO) && object)
2336 memset(object, 0, s->objsize);
2337
2338 slab_post_alloc_hook(s, gfpflags, object);
2339
2340 return object;
2341}
2342
2343void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2344{
2345 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2346
2347 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
2348
2349 return ret;
2350}
2351EXPORT_SYMBOL(kmem_cache_alloc);
2352
2353#ifdef CONFIG_TRACING
2354void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2355{
2356 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2357 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2358 return ret;
2359}
2360EXPORT_SYMBOL(kmem_cache_alloc_trace);
2361
2362void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2363{
2364 void *ret = kmalloc_order(size, flags, order);
2365 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2366 return ret;
2367}
2368EXPORT_SYMBOL(kmalloc_order_trace);
2369#endif
2370
2371#ifdef CONFIG_NUMA
2372void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2373{
2374 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2375
2376 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2377 s->objsize, s->size, gfpflags, node);
2378
2379 return ret;
2380}
2381EXPORT_SYMBOL(kmem_cache_alloc_node);
2382
2383#ifdef CONFIG_TRACING
2384void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2385 gfp_t gfpflags,
2386 int node, size_t size)
2387{
2388 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2389
2390 trace_kmalloc_node(_RET_IP_, ret,
2391 size, s->size, gfpflags, node);
2392 return ret;
2393}
2394EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2395#endif
2396#endif
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406static void __slab_free(struct kmem_cache *s, struct page *page,
2407 void *x, unsigned long addr)
2408{
2409 void *prior;
2410 void **object = (void *)x;
2411 int was_frozen;
2412 int inuse;
2413 struct page new;
2414 unsigned long counters;
2415 struct kmem_cache_node *n = NULL;
2416 unsigned long uninitialized_var(flags);
2417
2418 stat(s, FREE_SLOWPATH);
2419
2420 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2421 return;
2422
2423 do {
2424 prior = page->freelist;
2425 counters = page->counters;
2426 set_freepointer(s, object, prior);
2427 new.counters = counters;
2428 was_frozen = new.frozen;
2429 new.inuse--;
2430 if ((!new.inuse || !prior) && !was_frozen && !n) {
2431
2432 if (!kmem_cache_debug(s) && !prior)
2433
2434
2435
2436
2437
2438 new.frozen = 1;
2439
2440 else {
2441
2442 n = get_node(s, page_to_nid(page));
2443
2444
2445
2446
2447
2448
2449
2450
2451 spin_lock_irqsave(&n->list_lock, flags);
2452
2453 }
2454 }
2455 inuse = new.inuse;
2456
2457 } while (!cmpxchg_double_slab(s, page,
2458 prior, counters,
2459 object, new.counters,
2460 "__slab_free"));
2461
2462 if (likely(!n)) {
2463
2464
2465
2466
2467
2468 if (new.frozen && !was_frozen)
2469 put_cpu_partial(s, page, 1);
2470
2471
2472
2473
2474
2475 if (was_frozen)
2476 stat(s, FREE_FROZEN);
2477 return;
2478 }
2479
2480
2481
2482
2483
2484 if (was_frozen)
2485 stat(s, FREE_FROZEN);
2486 else {
2487 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2488 goto slab_empty;
2489
2490
2491
2492
2493
2494 if (unlikely(!prior)) {
2495 remove_full(s, page);
2496 add_partial(n, page, DEACTIVATE_TO_TAIL);
2497 stat(s, FREE_ADD_PARTIAL);
2498 }
2499 }
2500 spin_unlock_irqrestore(&n->list_lock, flags);
2501 return;
2502
2503slab_empty:
2504 if (prior) {
2505
2506
2507
2508 remove_partial(n, page);
2509 stat(s, FREE_REMOVE_PARTIAL);
2510 } else
2511
2512 remove_full(s, page);
2513
2514 spin_unlock_irqrestore(&n->list_lock, flags);
2515 stat(s, FREE_SLAB);
2516 discard_slab(s, page);
2517}
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530static __always_inline void slab_free(struct kmem_cache *s,
2531 struct page *page, void *x, unsigned long addr)
2532{
2533 void **object = (void *)x;
2534 struct kmem_cache_cpu *c;
2535 unsigned long tid;
2536
2537 slab_free_hook(s, x);
2538
2539redo:
2540
2541
2542
2543
2544
2545
2546 c = __this_cpu_ptr(s->cpu_slab);
2547
2548 tid = c->tid;
2549 barrier();
2550
2551 if (likely(page == c->page)) {
2552 set_freepointer(s, object, c->freelist);
2553
2554 if (unlikely(!this_cpu_cmpxchg_double(
2555 s->cpu_slab->freelist, s->cpu_slab->tid,
2556 c->freelist, tid,
2557 object, next_tid(tid)))) {
2558
2559 note_cmpxchg_failure("slab_free", s, tid);
2560 goto redo;
2561 }
2562 stat(s, FREE_FASTPATH);
2563 } else
2564 __slab_free(s, page, x, addr);
2565
2566}
2567
2568void kmem_cache_free(struct kmem_cache *s, void *x)
2569{
2570 struct page *page;
2571
2572 page = virt_to_head_page(x);
2573
2574 slab_free(s, page, x, _RET_IP_);
2575
2576 trace_kmem_cache_free(_RET_IP_, x);
2577}
2578EXPORT_SYMBOL(kmem_cache_free);
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599static int slub_min_order;
2600static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2601static int slub_min_objects;
2602
2603
2604
2605
2606
2607static int slub_nomerge;
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634static inline int slab_order(int size, int min_objects,
2635 int max_order, int fract_leftover, int reserved)
2636{
2637 int order;
2638 int rem;
2639 int min_order = slub_min_order;
2640
2641 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2642 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2643
2644 for (order = max(min_order,
2645 fls(min_objects * size - 1) - PAGE_SHIFT);
2646 order <= max_order; order++) {
2647
2648 unsigned long slab_size = PAGE_SIZE << order;
2649
2650 if (slab_size < min_objects * size + reserved)
2651 continue;
2652
2653 rem = (slab_size - reserved) % size;
2654
2655 if (rem <= slab_size / fract_leftover)
2656 break;
2657
2658 }
2659
2660 return order;
2661}
2662
2663static inline int calculate_order(int size, int reserved)
2664{
2665 int order;
2666 int min_objects;
2667 int fraction;
2668 int max_objects;
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678 min_objects = slub_min_objects;
2679 if (!min_objects)
2680 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2681 max_objects = order_objects(slub_max_order, size, reserved);
2682 min_objects = min(min_objects, max_objects);
2683
2684 while (min_objects > 1) {
2685 fraction = 16;
2686 while (fraction >= 4) {
2687 order = slab_order(size, min_objects,
2688 slub_max_order, fraction, reserved);
2689 if (order <= slub_max_order)
2690 return order;
2691 fraction /= 2;
2692 }
2693 min_objects--;
2694 }
2695
2696
2697
2698
2699
2700 order = slab_order(size, 1, slub_max_order, 1, reserved);
2701 if (order <= slub_max_order)
2702 return order;
2703
2704
2705
2706
2707 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2708 if (order < MAX_ORDER)
2709 return order;
2710 return -ENOSYS;
2711}
2712
2713
2714
2715
2716static unsigned long calculate_alignment(unsigned long flags,
2717 unsigned long align, unsigned long size)
2718{
2719
2720
2721
2722
2723
2724
2725
2726 if (flags & SLAB_HWCACHE_ALIGN) {
2727 unsigned long ralign = cache_line_size();
2728 while (size <= ralign / 2)
2729 ralign /= 2;
2730 align = max(align, ralign);
2731 }
2732
2733 if (align < ARCH_SLAB_MINALIGN)
2734 align = ARCH_SLAB_MINALIGN;
2735
2736 return ALIGN(align, sizeof(void *));
2737}
2738
2739static void
2740init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2741{
2742 n->nr_partial = 0;
2743 spin_lock_init(&n->list_lock);
2744 INIT_LIST_HEAD(&n->partial);
2745#ifdef CONFIG_SLUB_DEBUG
2746 atomic_long_set(&n->nr_slabs, 0);
2747 atomic_long_set(&n->total_objects, 0);
2748 INIT_LIST_HEAD(&n->full);
2749#endif
2750}
2751
2752static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2753{
2754 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2755 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2756
2757
2758
2759
2760
2761 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2762 2 * sizeof(void *));
2763
2764 if (!s->cpu_slab)
2765 return 0;
2766
2767 init_kmem_cache_cpus(s);
2768
2769 return 1;
2770}
2771
2772static struct kmem_cache *kmem_cache_node;
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783static void early_kmem_cache_node_alloc(int node)
2784{
2785 struct page *page;
2786 struct kmem_cache_node *n;
2787
2788 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2789
2790 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2791
2792 BUG_ON(!page);
2793 if (page_to_nid(page) != node) {
2794 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2795 "node %d\n", node);
2796 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2797 "in order to be able to continue\n");
2798 }
2799
2800 n = page->freelist;
2801 BUG_ON(!n);
2802 page->freelist = get_freepointer(kmem_cache_node, n);
2803 page->inuse = 1;
2804 page->frozen = 0;
2805 kmem_cache_node->node[node] = n;
2806#ifdef CONFIG_SLUB_DEBUG
2807 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2808 init_tracking(kmem_cache_node, n);
2809#endif
2810 init_kmem_cache_node(n, kmem_cache_node);
2811 inc_slabs_node(kmem_cache_node, node, page->objects);
2812
2813 add_partial(n, page, DEACTIVATE_TO_HEAD);
2814}
2815
2816static void free_kmem_cache_nodes(struct kmem_cache *s)
2817{
2818 int node;
2819
2820 for_each_node_state(node, N_NORMAL_MEMORY) {
2821 struct kmem_cache_node *n = s->node[node];
2822
2823 if (n)
2824 kmem_cache_free(kmem_cache_node, n);
2825
2826 s->node[node] = NULL;
2827 }
2828}
2829
2830static int init_kmem_cache_nodes(struct kmem_cache *s)
2831{
2832 int node;
2833
2834 for_each_node_state(node, N_NORMAL_MEMORY) {
2835 struct kmem_cache_node *n;
2836
2837 if (slab_state == DOWN) {
2838 early_kmem_cache_node_alloc(node);
2839 continue;
2840 }
2841 n = kmem_cache_alloc_node(kmem_cache_node,
2842 GFP_KERNEL, node);
2843
2844 if (!n) {
2845 free_kmem_cache_nodes(s);
2846 return 0;
2847 }
2848
2849 s->node[node] = n;
2850 init_kmem_cache_node(n, s);
2851 }
2852 return 1;
2853}
2854
2855static void set_min_partial(struct kmem_cache *s, unsigned long min)
2856{
2857 if (min < MIN_PARTIAL)
2858 min = MIN_PARTIAL;
2859 else if (min > MAX_PARTIAL)
2860 min = MAX_PARTIAL;
2861 s->min_partial = min;
2862}
2863
2864
2865
2866
2867
2868static int calculate_sizes(struct kmem_cache *s, int forced_order)
2869{
2870 unsigned long flags = s->flags;
2871 unsigned long size = s->objsize;
2872 unsigned long align = s->align;
2873 int order;
2874
2875
2876
2877
2878
2879
2880 size = ALIGN(size, sizeof(void *));
2881
2882#ifdef CONFIG_SLUB_DEBUG
2883
2884
2885
2886
2887
2888 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2889 !s->ctor)
2890 s->flags |= __OBJECT_POISON;
2891 else
2892 s->flags &= ~__OBJECT_POISON;
2893
2894
2895
2896
2897
2898
2899
2900 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2901 size += sizeof(void *);
2902#endif
2903
2904
2905
2906
2907
2908 s->inuse = size;
2909
2910 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2911 s->ctor)) {
2912
2913
2914
2915
2916
2917
2918
2919
2920 s->offset = size;
2921 size += sizeof(void *);
2922 }
2923
2924#ifdef CONFIG_SLUB_DEBUG
2925 if (flags & SLAB_STORE_USER)
2926
2927
2928
2929
2930 size += 2 * sizeof(struct track);
2931
2932 if (flags & SLAB_RED_ZONE)
2933
2934
2935
2936
2937
2938
2939
2940 size += sizeof(void *);
2941#endif
2942
2943
2944
2945
2946
2947
2948 align = calculate_alignment(flags, align, s->objsize);
2949 s->align = align;
2950
2951
2952
2953
2954
2955
2956 size = ALIGN(size, align);
2957 s->size = size;
2958 if (forced_order >= 0)
2959 order = forced_order;
2960 else
2961 order = calculate_order(size, s->reserved);
2962
2963 if (order < 0)
2964 return 0;
2965
2966 s->allocflags = 0;
2967 if (order)
2968 s->allocflags |= __GFP_COMP;
2969
2970 if (s->flags & SLAB_CACHE_DMA)
2971 s->allocflags |= SLUB_DMA;
2972
2973 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2974 s->allocflags |= __GFP_RECLAIMABLE;
2975
2976
2977
2978
2979 s->oo = oo_make(order, size, s->reserved);
2980 s->min = oo_make(get_order(size), size, s->reserved);
2981 if (oo_objects(s->oo) > oo_objects(s->max))
2982 s->max = s->oo;
2983
2984 return !!oo_objects(s->oo);
2985
2986}
2987
2988static int kmem_cache_open(struct kmem_cache *s,
2989 const char *name, size_t size,
2990 size_t align, unsigned long flags,
2991 void (*ctor)(void *))
2992{
2993 memset(s, 0, kmem_size);
2994 s->name = name;
2995 s->ctor = ctor;
2996 s->objsize = size;
2997 s->align = align;
2998 s->flags = kmem_cache_flags(size, flags, name, ctor);
2999 s->reserved = 0;
3000
3001 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3002 s->reserved = sizeof(struct rcu_head);
3003
3004 if (!calculate_sizes(s, -1))
3005 goto error;
3006 if (disable_higher_order_debug) {
3007
3008
3009
3010
3011 if (get_order(s->size) > get_order(s->objsize)) {
3012 s->flags &= ~DEBUG_METADATA_FLAGS;
3013 s->offset = 0;
3014 if (!calculate_sizes(s, -1))
3015 goto error;
3016 }
3017 }
3018
3019#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3020 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3021 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3022
3023 s->flags |= __CMPXCHG_DOUBLE;
3024#endif
3025
3026
3027
3028
3029
3030 set_min_partial(s, ilog2(s->size) / 2);
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049 if (kmem_cache_debug(s))
3050 s->cpu_partial = 0;
3051 else if (s->size >= PAGE_SIZE)
3052 s->cpu_partial = 2;
3053 else if (s->size >= 1024)
3054 s->cpu_partial = 6;
3055 else if (s->size >= 256)
3056 s->cpu_partial = 13;
3057 else
3058 s->cpu_partial = 30;
3059
3060 s->refcount = 1;
3061#ifdef CONFIG_NUMA
3062 s->remote_node_defrag_ratio = 1000;
3063#endif
3064 if (!init_kmem_cache_nodes(s))
3065 goto error;
3066
3067 if (alloc_kmem_cache_cpus(s))
3068 return 1;
3069
3070 free_kmem_cache_nodes(s);
3071error:
3072 if (flags & SLAB_PANIC)
3073 panic("Cannot create slab %s size=%lu realsize=%u "
3074 "order=%u offset=%u flags=%lx\n",
3075 s->name, (unsigned long)size, s->size, oo_order(s->oo),
3076 s->offset, flags);
3077 return 0;
3078}
3079
3080
3081
3082
3083unsigned int kmem_cache_size(struct kmem_cache *s)
3084{
3085 return s->objsize;
3086}
3087EXPORT_SYMBOL(kmem_cache_size);
3088
3089static void list_slab_objects(struct kmem_cache *s, struct page *page,
3090 const char *text)
3091{
3092#ifdef CONFIG_SLUB_DEBUG
3093 void *addr = page_address(page);
3094 void *p;
3095 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3096 sizeof(long), GFP_ATOMIC);
3097 if (!map)
3098 return;
3099 slab_err(s, page, "%s", text);
3100 slab_lock(page);
3101
3102 get_map(s, page, map);
3103 for_each_object(p, s, addr, page->objects) {
3104
3105 if (!test_bit(slab_index(p, s, addr), map)) {
3106 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3107 p, p - addr);
3108 print_tracking(s, p);
3109 }
3110 }
3111 slab_unlock(page);
3112 kfree(map);
3113#endif
3114}
3115
3116
3117
3118
3119
3120
3121static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3122{
3123 struct page *page, *h;
3124
3125 list_for_each_entry_safe(page, h, &n->partial, lru) {
3126 if (!page->inuse) {
3127 remove_partial(n, page);
3128 discard_slab(s, page);
3129 } else {
3130 list_slab_objects(s, page,
3131 "Objects remaining on kmem_cache_close()");
3132 }
3133 }
3134}
3135
3136
3137
3138
3139static inline int kmem_cache_close(struct kmem_cache *s)
3140{
3141 int node;
3142
3143 flush_all(s);
3144 free_percpu(s->cpu_slab);
3145
3146 for_each_node_state(node, N_NORMAL_MEMORY) {
3147 struct kmem_cache_node *n = get_node(s, node);
3148
3149 free_partial(s, n);
3150 if (n->nr_partial || slabs_node(s, node))
3151 return 1;
3152 }
3153 free_kmem_cache_nodes(s);
3154 return 0;
3155}
3156
3157
3158
3159
3160
3161void kmem_cache_destroy(struct kmem_cache *s)
3162{
3163 down_write(&slub_lock);
3164 s->refcount--;
3165 if (!s->refcount) {
3166 list_del(&s->list);
3167 up_write(&slub_lock);
3168 if (kmem_cache_close(s)) {
3169 printk(KERN_ERR "SLUB %s: %s called for cache that "
3170 "still has objects.\n", s->name, __func__);
3171 dump_stack();
3172 }
3173 if (s->flags & SLAB_DESTROY_BY_RCU)
3174 rcu_barrier();
3175 sysfs_slab_remove(s);
3176 } else
3177 up_write(&slub_lock);
3178}
3179EXPORT_SYMBOL(kmem_cache_destroy);
3180
3181
3182
3183
3184
3185struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3186EXPORT_SYMBOL(kmalloc_caches);
3187
3188static struct kmem_cache *kmem_cache;
3189
3190#ifdef CONFIG_ZONE_DMA
3191static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3192#endif
3193
3194static int __init setup_slub_min_order(char *str)
3195{
3196 get_option(&str, &slub_min_order);
3197
3198 return 1;
3199}
3200
3201__setup("slub_min_order=", setup_slub_min_order);
3202
3203static int __init setup_slub_max_order(char *str)
3204{
3205 get_option(&str, &slub_max_order);
3206 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3207
3208 return 1;
3209}
3210
3211__setup("slub_max_order=", setup_slub_max_order);
3212
3213static int __init setup_slub_min_objects(char *str)
3214{
3215 get_option(&str, &slub_min_objects);
3216
3217 return 1;
3218}
3219
3220__setup("slub_min_objects=", setup_slub_min_objects);
3221
3222static int __init setup_slub_nomerge(char *str)
3223{
3224 slub_nomerge = 1;
3225 return 1;
3226}
3227
3228__setup("slub_nomerge", setup_slub_nomerge);
3229
3230static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3231 int size, unsigned int flags)
3232{
3233 struct kmem_cache *s;
3234
3235 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3236
3237
3238
3239
3240
3241 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3242 flags, NULL))
3243 goto panic;
3244
3245 list_add(&s->list, &slab_caches);
3246 return s;
3247
3248panic:
3249 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
3250 return NULL;
3251}
3252
3253
3254
3255
3256
3257
3258
3259static s8 size_index[24] = {
3260 3,
3261 4,
3262 5,
3263 5,
3264 6,
3265 6,
3266 6,
3267 6,
3268 1,
3269 1,
3270 1,
3271 1,
3272 7,
3273 7,
3274 7,
3275 7,
3276 2,
3277 2,
3278 2,
3279 2,
3280 2,
3281 2,
3282 2,
3283 2
3284};
3285
3286static inline int size_index_elem(size_t bytes)
3287{
3288 return (bytes - 1) / 8;
3289}
3290
3291static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3292{
3293 int index;
3294
3295 if (size <= 192) {
3296 if (!size)
3297 return ZERO_SIZE_PTR;
3298
3299 index = size_index[size_index_elem(size)];
3300 } else
3301 index = fls(size - 1);
3302
3303#ifdef CONFIG_ZONE_DMA
3304 if (unlikely((flags & SLUB_DMA)))
3305 return kmalloc_dma_caches[index];
3306
3307#endif
3308 return kmalloc_caches[index];
3309}
3310
3311void *__kmalloc(size_t size, gfp_t flags)
3312{
3313 struct kmem_cache *s;
3314 void *ret;
3315
3316 if (unlikely(size > SLUB_MAX_SIZE))
3317 return kmalloc_large(size, flags);
3318
3319 s = get_slab(size, flags);
3320
3321 if (unlikely(ZERO_OR_NULL_PTR(s)))
3322 return s;
3323
3324 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
3325
3326 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3327
3328 return ret;
3329}
3330EXPORT_SYMBOL(__kmalloc);
3331
3332#ifdef CONFIG_NUMA
3333static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3334{
3335 struct page *page;
3336 void *ptr = NULL;
3337
3338 flags |= __GFP_COMP | __GFP_NOTRACK;
3339 page = alloc_pages_node(node, flags, get_order(size));
3340 if (page)
3341 ptr = page_address(page);
3342
3343 kmemleak_alloc(ptr, size, 1, flags);
3344 return ptr;
3345}
3346
3347void *__kmalloc_node(size_t size, gfp_t flags, int node)
3348{
3349 struct kmem_cache *s;
3350 void *ret;
3351
3352 if (unlikely(size > SLUB_MAX_SIZE)) {
3353 ret = kmalloc_large_node(size, flags, node);
3354
3355 trace_kmalloc_node(_RET_IP_, ret,
3356 size, PAGE_SIZE << get_order(size),
3357 flags, node);
3358
3359 return ret;
3360 }
3361
3362 s = get_slab(size, flags);
3363
3364 if (unlikely(ZERO_OR_NULL_PTR(s)))
3365 return s;
3366
3367 ret = slab_alloc(s, flags, node, _RET_IP_);
3368
3369 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3370
3371 return ret;
3372}
3373EXPORT_SYMBOL(__kmalloc_node);
3374#endif
3375
3376size_t ksize(const void *object)
3377{
3378 struct page *page;
3379
3380 if (unlikely(object == ZERO_SIZE_PTR))
3381 return 0;
3382
3383 page = virt_to_head_page(object);
3384
3385 if (unlikely(!PageSlab(page))) {
3386 WARN_ON(!PageCompound(page));
3387 return PAGE_SIZE << compound_order(page);
3388 }
3389
3390 return slab_ksize(page->slab);
3391}
3392EXPORT_SYMBOL(ksize);
3393
3394#ifdef CONFIG_SLUB_DEBUG
3395bool verify_mem_not_deleted(const void *x)
3396{
3397 struct page *page;
3398 void *object = (void *)x;
3399 unsigned long flags;
3400 bool rv;
3401
3402 if (unlikely(ZERO_OR_NULL_PTR(x)))
3403 return false;
3404
3405 local_irq_save(flags);
3406
3407 page = virt_to_head_page(x);
3408 if (unlikely(!PageSlab(page))) {
3409
3410 rv = true;
3411 goto out_unlock;
3412 }
3413
3414 slab_lock(page);
3415 if (on_freelist(page->slab, page, object)) {
3416 object_err(page->slab, page, object, "Object is on free-list");
3417 rv = false;
3418 } else {
3419 rv = true;
3420 }
3421 slab_unlock(page);
3422
3423out_unlock:
3424 local_irq_restore(flags);
3425 return rv;
3426}
3427EXPORT_SYMBOL(verify_mem_not_deleted);
3428#endif
3429
3430void kfree(const void *x)
3431{
3432 struct page *page;
3433 void *object = (void *)x;
3434
3435 trace_kfree(_RET_IP_, x);
3436
3437 if (unlikely(ZERO_OR_NULL_PTR(x)))
3438 return;
3439
3440 page = virt_to_head_page(x);
3441 if (unlikely(!PageSlab(page))) {
3442 BUG_ON(!PageCompound(page));
3443 kmemleak_free(x);
3444 put_page(page);
3445 return;
3446 }
3447 slab_free(page->slab, page, object, _RET_IP_);
3448}
3449EXPORT_SYMBOL(kfree);
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461int kmem_cache_shrink(struct kmem_cache *s)
3462{
3463 int node;
3464 int i;
3465 struct kmem_cache_node *n;
3466 struct page *page;
3467 struct page *t;
3468 int objects = oo_objects(s->max);
3469 struct list_head *slabs_by_inuse =
3470 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3471 unsigned long flags;
3472
3473 if (!slabs_by_inuse)
3474 return -ENOMEM;
3475
3476 flush_all(s);
3477 for_each_node_state(node, N_NORMAL_MEMORY) {
3478 n = get_node(s, node);
3479
3480 if (!n->nr_partial)
3481 continue;
3482
3483 for (i = 0; i < objects; i++)
3484 INIT_LIST_HEAD(slabs_by_inuse + i);
3485
3486 spin_lock_irqsave(&n->list_lock, flags);
3487
3488
3489
3490
3491
3492
3493
3494 list_for_each_entry_safe(page, t, &n->partial, lru) {
3495 list_move(&page->lru, slabs_by_inuse + page->inuse);
3496 if (!page->inuse)
3497 n->nr_partial--;
3498 }
3499
3500
3501
3502
3503
3504 for (i = objects - 1; i > 0; i--)
3505 list_splice(slabs_by_inuse + i, n->partial.prev);
3506
3507 spin_unlock_irqrestore(&n->list_lock, flags);
3508
3509
3510 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3511 discard_slab(s, page);
3512 }
3513
3514 kfree(slabs_by_inuse);
3515 return 0;
3516}
3517EXPORT_SYMBOL(kmem_cache_shrink);
3518
3519#if defined(CONFIG_MEMORY_HOTPLUG)
3520static int slab_mem_going_offline_callback(void *arg)
3521{
3522 struct kmem_cache *s;
3523
3524 down_read(&slub_lock);
3525 list_for_each_entry(s, &slab_caches, list)
3526 kmem_cache_shrink(s);
3527 up_read(&slub_lock);
3528
3529 return 0;
3530}
3531
3532static void slab_mem_offline_callback(void *arg)
3533{
3534 struct kmem_cache_node *n;
3535 struct kmem_cache *s;
3536 struct memory_notify *marg = arg;
3537 int offline_node;
3538
3539 offline_node = marg->status_change_nid;
3540
3541
3542
3543
3544
3545 if (offline_node < 0)
3546 return;
3547
3548 down_read(&slub_lock);
3549 list_for_each_entry(s, &slab_caches, list) {
3550 n = get_node(s, offline_node);
3551 if (n) {
3552
3553
3554
3555
3556
3557
3558 BUG_ON(slabs_node(s, offline_node));
3559
3560 s->node[offline_node] = NULL;
3561 kmem_cache_free(kmem_cache_node, n);
3562 }
3563 }
3564 up_read(&slub_lock);
3565}
3566
3567static int slab_mem_going_online_callback(void *arg)
3568{
3569 struct kmem_cache_node *n;
3570 struct kmem_cache *s;
3571 struct memory_notify *marg = arg;
3572 int nid = marg->status_change_nid;
3573 int ret = 0;
3574
3575
3576
3577
3578
3579 if (nid < 0)
3580 return 0;
3581
3582
3583
3584
3585
3586
3587 down_read(&slub_lock);
3588 list_for_each_entry(s, &slab_caches, list) {
3589
3590
3591
3592
3593
3594 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3595 if (!n) {
3596 ret = -ENOMEM;
3597 goto out;
3598 }
3599 init_kmem_cache_node(n, s);
3600 s->node[nid] = n;
3601 }
3602out:
3603 up_read(&slub_lock);
3604 return ret;
3605}
3606
3607static int slab_memory_callback(struct notifier_block *self,
3608 unsigned long action, void *arg)
3609{
3610 int ret = 0;
3611
3612 switch (action) {
3613 case MEM_GOING_ONLINE:
3614 ret = slab_mem_going_online_callback(arg);
3615 break;
3616 case MEM_GOING_OFFLINE:
3617 ret = slab_mem_going_offline_callback(arg);
3618 break;
3619 case MEM_OFFLINE:
3620 case MEM_CANCEL_ONLINE:
3621 slab_mem_offline_callback(arg);
3622 break;
3623 case MEM_ONLINE:
3624 case MEM_CANCEL_OFFLINE:
3625 break;
3626 }
3627 if (ret)
3628 ret = notifier_from_errno(ret);
3629 else
3630 ret = NOTIFY_OK;
3631 return ret;
3632}
3633
3634#endif
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3646{
3647 int node;
3648
3649 list_add(&s->list, &slab_caches);
3650 s->refcount = -1;
3651
3652 for_each_node_state(node, N_NORMAL_MEMORY) {
3653 struct kmem_cache_node *n = get_node(s, node);
3654 struct page *p;
3655
3656 if (n) {
3657 list_for_each_entry(p, &n->partial, lru)
3658 p->slab = s;
3659
3660#ifdef CONFIG_SLUB_DEBUG
3661 list_for_each_entry(p, &n->full, lru)
3662 p->slab = s;
3663#endif
3664 }
3665 }
3666}
3667
3668void __init kmem_cache_init(void)
3669{
3670 int i;
3671 int caches = 0;
3672 struct kmem_cache *temp_kmem_cache;
3673 int order;
3674 struct kmem_cache *temp_kmem_cache_node;
3675 unsigned long kmalloc_size;
3676
3677 if (debug_guardpage_minorder())
3678 slub_max_order = 0;
3679
3680 kmem_size = offsetof(struct kmem_cache, node) +
3681 nr_node_ids * sizeof(struct kmem_cache_node *);
3682
3683
3684 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3685 order = get_order(2 * kmalloc_size);
3686 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
3687
3688
3689
3690
3691
3692
3693 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3694
3695 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
3696 sizeof(struct kmem_cache_node),
3697 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3698
3699 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3700
3701
3702 slab_state = PARTIAL;
3703
3704 temp_kmem_cache = kmem_cache;
3705 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
3706 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3707 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3708 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3709
3710
3711
3712
3713
3714
3715 temp_kmem_cache_node = kmem_cache_node;
3716
3717 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3718 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3719
3720 kmem_cache_bootstrap_fixup(kmem_cache_node);
3721
3722 caches++;
3723 kmem_cache_bootstrap_fixup(kmem_cache);
3724 caches++;
3725
3726 free_pages((unsigned long)temp_kmem_cache, order);
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3742 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3743
3744 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3745 int elem = size_index_elem(i);
3746 if (elem >= ARRAY_SIZE(size_index))
3747 break;
3748 size_index[elem] = KMALLOC_SHIFT_LOW;
3749 }
3750
3751 if (KMALLOC_MIN_SIZE == 64) {
3752
3753
3754
3755
3756 for (i = 64 + 8; i <= 96; i += 8)
3757 size_index[size_index_elem(i)] = 7;
3758 } else if (KMALLOC_MIN_SIZE == 128) {
3759
3760
3761
3762
3763
3764 for (i = 128 + 8; i <= 192; i += 8)
3765 size_index[size_index_elem(i)] = 8;
3766 }
3767
3768
3769 if (KMALLOC_MIN_SIZE <= 32) {
3770 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3771 caches++;
3772 }
3773
3774 if (KMALLOC_MIN_SIZE <= 64) {
3775 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3776 caches++;
3777 }
3778
3779 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3780 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3781 caches++;
3782 }
3783
3784 slab_state = UP;
3785
3786
3787 if (KMALLOC_MIN_SIZE <= 32) {
3788 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3789 BUG_ON(!kmalloc_caches[1]->name);
3790 }
3791
3792 if (KMALLOC_MIN_SIZE <= 64) {
3793 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3794 BUG_ON(!kmalloc_caches[2]->name);
3795 }
3796
3797 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3798 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3799
3800 BUG_ON(!s);
3801 kmalloc_caches[i]->name = s;
3802 }
3803
3804#ifdef CONFIG_SMP
3805 register_cpu_notifier(&slab_notifier);
3806#endif
3807
3808#ifdef CONFIG_ZONE_DMA
3809 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3810 struct kmem_cache *s = kmalloc_caches[i];
3811
3812 if (s && s->size) {
3813 char *name = kasprintf(GFP_NOWAIT,
3814 "dma-kmalloc-%d", s->objsize);
3815
3816 BUG_ON(!name);
3817 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3818 s->objsize, SLAB_CACHE_DMA);
3819 }
3820 }
3821#endif
3822 printk(KERN_INFO
3823 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3824 " CPUs=%d, Nodes=%d\n",
3825 caches, cache_line_size(),
3826 slub_min_order, slub_max_order, slub_min_objects,
3827 nr_cpu_ids, nr_node_ids);
3828}
3829
3830void __init kmem_cache_init_late(void)
3831{
3832}
3833
3834
3835
3836
3837static int slab_unmergeable(struct kmem_cache *s)
3838{
3839 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3840 return 1;
3841
3842 if (s->ctor)
3843 return 1;
3844
3845
3846
3847
3848 if (s->refcount < 0)
3849 return 1;
3850
3851 return 0;
3852}
3853
3854static struct kmem_cache *find_mergeable(size_t size,
3855 size_t align, unsigned long flags, const char *name,
3856 void (*ctor)(void *))
3857{
3858 struct kmem_cache *s;
3859
3860 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3861 return NULL;
3862
3863 if (ctor)
3864 return NULL;
3865
3866 size = ALIGN(size, sizeof(void *));
3867 align = calculate_alignment(flags, align, size);
3868 size = ALIGN(size, align);
3869 flags = kmem_cache_flags(size, flags, name, NULL);
3870
3871 list_for_each_entry(s, &slab_caches, list) {
3872 if (slab_unmergeable(s))
3873 continue;
3874
3875 if (size > s->size)
3876 continue;
3877
3878 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3879 continue;
3880
3881
3882
3883
3884 if ((s->size & ~(align - 1)) != s->size)
3885 continue;
3886
3887 if (s->size - size >= sizeof(void *))
3888 continue;
3889
3890 return s;
3891 }
3892 return NULL;
3893}
3894
3895struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3896 size_t align, unsigned long flags, void (*ctor)(void *))
3897{
3898 struct kmem_cache *s;
3899 char *n;
3900
3901 if (WARN_ON(!name))
3902 return NULL;
3903
3904 down_write(&slub_lock);
3905 s = find_mergeable(size, align, flags, name, ctor);
3906 if (s) {
3907 s->refcount++;
3908
3909
3910
3911
3912 s->objsize = max(s->objsize, (int)size);
3913 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3914
3915 if (sysfs_slab_alias(s, name)) {
3916 s->refcount--;
3917 goto err;
3918 }
3919 up_write(&slub_lock);
3920 return s;
3921 }
3922
3923 n = kstrdup(name, GFP_KERNEL);
3924 if (!n)
3925 goto err;
3926
3927 s = kmalloc(kmem_size, GFP_KERNEL);
3928 if (s) {
3929 if (kmem_cache_open(s, n,
3930 size, align, flags, ctor)) {
3931 list_add(&s->list, &slab_caches);
3932 up_write(&slub_lock);
3933 if (sysfs_slab_add(s)) {
3934 down_write(&slub_lock);
3935 list_del(&s->list);
3936 kfree(n);
3937 kfree(s);
3938 goto err;
3939 }
3940 return s;
3941 }
3942 kfree(n);
3943 kfree(s);
3944 }
3945err:
3946 up_write(&slub_lock);
3947
3948 if (flags & SLAB_PANIC)
3949 panic("Cannot create slabcache %s\n", name);
3950 else
3951 s = NULL;
3952 return s;
3953}
3954EXPORT_SYMBOL(kmem_cache_create);
3955
3956#ifdef CONFIG_SMP
3957
3958
3959
3960
3961static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3962 unsigned long action, void *hcpu)
3963{
3964 long cpu = (long)hcpu;
3965 struct kmem_cache *s;
3966 unsigned long flags;
3967
3968 switch (action) {
3969 case CPU_UP_CANCELED:
3970 case CPU_UP_CANCELED_FROZEN:
3971 case CPU_DEAD:
3972 case CPU_DEAD_FROZEN:
3973 down_read(&slub_lock);
3974 list_for_each_entry(s, &slab_caches, list) {
3975 local_irq_save(flags);
3976 __flush_cpu_slab(s, cpu);
3977 local_irq_restore(flags);
3978 }
3979 up_read(&slub_lock);
3980 break;
3981 default:
3982 break;
3983 }
3984 return NOTIFY_OK;
3985}
3986
3987static struct notifier_block __cpuinitdata slab_notifier = {
3988 .notifier_call = slab_cpuup_callback
3989};
3990
3991#endif
3992
3993void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3994{
3995 struct kmem_cache *s;
3996 void *ret;
3997
3998 if (unlikely(size > SLUB_MAX_SIZE))
3999 return kmalloc_large(size, gfpflags);
4000
4001 s = get_slab(size, gfpflags);
4002
4003 if (unlikely(ZERO_OR_NULL_PTR(s)))
4004 return s;
4005
4006 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
4007
4008
4009 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4010
4011 return ret;
4012}
4013
4014#ifdef CONFIG_NUMA
4015void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4016 int node, unsigned long caller)
4017{
4018 struct kmem_cache *s;
4019 void *ret;
4020
4021 if (unlikely(size > SLUB_MAX_SIZE)) {
4022 ret = kmalloc_large_node(size, gfpflags, node);
4023
4024 trace_kmalloc_node(caller, ret,
4025 size, PAGE_SIZE << get_order(size),
4026 gfpflags, node);
4027
4028 return ret;
4029 }
4030
4031 s = get_slab(size, gfpflags);
4032
4033 if (unlikely(ZERO_OR_NULL_PTR(s)))
4034 return s;
4035
4036 ret = slab_alloc(s, gfpflags, node, caller);
4037
4038
4039 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4040
4041 return ret;
4042}
4043#endif
4044
4045#ifdef CONFIG_SYSFS
4046static int count_inuse(struct page *page)
4047{
4048 return page->inuse;
4049}
4050
4051static int count_total(struct page *page)
4052{
4053 return page->objects;
4054}
4055#endif
4056
4057#ifdef CONFIG_SLUB_DEBUG
4058static int validate_slab(struct kmem_cache *s, struct page *page,
4059 unsigned long *map)
4060{
4061 void *p;
4062 void *addr = page_address(page);
4063
4064 if (!check_slab(s, page) ||
4065 !on_freelist(s, page, NULL))
4066 return 0;
4067
4068
4069 bitmap_zero(map, page->objects);
4070
4071 get_map(s, page, map);
4072 for_each_object(p, s, addr, page->objects) {
4073 if (test_bit(slab_index(p, s, addr), map))
4074 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4075 return 0;
4076 }
4077
4078 for_each_object(p, s, addr, page->objects)
4079 if (!test_bit(slab_index(p, s, addr), map))
4080 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4081 return 0;
4082 return 1;
4083}
4084
4085static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4086 unsigned long *map)
4087{
4088 slab_lock(page);
4089 validate_slab(s, page, map);
4090 slab_unlock(page);
4091}
4092
4093static int validate_slab_node(struct kmem_cache *s,
4094 struct kmem_cache_node *n, unsigned long *map)
4095{
4096 unsigned long count = 0;
4097 struct page *page;
4098 unsigned long flags;
4099
4100 spin_lock_irqsave(&n->list_lock, flags);
4101
4102 list_for_each_entry(page, &n->partial, lru) {
4103 validate_slab_slab(s, page, map);
4104 count++;
4105 }
4106 if (count != n->nr_partial)
4107 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4108 "counter=%ld\n", s->name, count, n->nr_partial);
4109
4110 if (!(s->flags & SLAB_STORE_USER))
4111 goto out;
4112
4113 list_for_each_entry(page, &n->full, lru) {
4114 validate_slab_slab(s, page, map);
4115 count++;
4116 }
4117 if (count != atomic_long_read(&n->nr_slabs))
4118 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4119 "counter=%ld\n", s->name, count,
4120 atomic_long_read(&n->nr_slabs));
4121
4122out:
4123 spin_unlock_irqrestore(&n->list_lock, flags);
4124 return count;
4125}
4126
4127static long validate_slab_cache(struct kmem_cache *s)
4128{
4129 int node;
4130 unsigned long count = 0;
4131 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4132 sizeof(unsigned long), GFP_KERNEL);
4133
4134 if (!map)
4135 return -ENOMEM;
4136
4137 flush_all(s);
4138 for_each_node_state(node, N_NORMAL_MEMORY) {
4139 struct kmem_cache_node *n = get_node(s, node);
4140
4141 count += validate_slab_node(s, n, map);
4142 }
4143 kfree(map);
4144 return count;
4145}
4146
4147
4148
4149
4150
4151struct location {
4152 unsigned long count;
4153 unsigned long addr;
4154 long long sum_time;
4155 long min_time;
4156 long max_time;
4157 long min_pid;
4158 long max_pid;
4159 DECLARE_BITMAP(cpus, NR_CPUS);
4160 nodemask_t nodes;
4161};
4162
4163struct loc_track {
4164 unsigned long max;
4165 unsigned long count;
4166 struct location *loc;
4167};
4168
4169static void free_loc_track(struct loc_track *t)
4170{
4171 if (t->max)
4172 free_pages((unsigned long)t->loc,
4173 get_order(sizeof(struct location) * t->max));
4174}
4175
4176static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4177{
4178 struct location *l;
4179 int order;
4180
4181 order = get_order(sizeof(struct location) * max);
4182
4183 l = (void *)__get_free_pages(flags, order);
4184 if (!l)
4185 return 0;
4186
4187 if (t->count) {
4188 memcpy(l, t->loc, sizeof(struct location) * t->count);
4189 free_loc_track(t);
4190 }
4191 t->max = max;
4192 t->loc = l;
4193 return 1;
4194}
4195
4196static int add_location(struct loc_track *t, struct kmem_cache *s,
4197 const struct track *track)
4198{
4199 long start, end, pos;
4200 struct location *l;
4201 unsigned long caddr;
4202 unsigned long age = jiffies - track->when;
4203
4204 start = -1;
4205 end = t->count;
4206
4207 for ( ; ; ) {
4208 pos = start + (end - start + 1) / 2;
4209
4210
4211
4212
4213
4214 if (pos == end)
4215 break;
4216
4217 caddr = t->loc[pos].addr;
4218 if (track->addr == caddr) {
4219
4220 l = &t->loc[pos];
4221 l->count++;
4222 if (track->when) {
4223 l->sum_time += age;
4224 if (age < l->min_time)
4225 l->min_time = age;
4226 if (age > l->max_time)
4227 l->max_time = age;
4228
4229 if (track->pid < l->min_pid)
4230 l->min_pid = track->pid;
4231 if (track->pid > l->max_pid)
4232 l->max_pid = track->pid;
4233
4234 cpumask_set_cpu(track->cpu,
4235 to_cpumask(l->cpus));
4236 }
4237 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4238 return 1;
4239 }
4240
4241 if (track->addr < caddr)
4242 end = pos;
4243 else
4244 start = pos;
4245 }
4246
4247
4248
4249
4250 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4251 return 0;
4252
4253 l = t->loc + pos;
4254 if (pos < t->count)
4255 memmove(l + 1, l,
4256 (t->count - pos) * sizeof(struct location));
4257 t->count++;
4258 l->count = 1;
4259 l->addr = track->addr;
4260 l->sum_time = age;
4261 l->min_time = age;
4262 l->max_time = age;
4263 l->min_pid = track->pid;
4264 l->max_pid = track->pid;
4265 cpumask_clear(to_cpumask(l->cpus));
4266 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4267 nodes_clear(l->nodes);
4268 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4269 return 1;
4270}
4271
4272static void process_slab(struct loc_track *t, struct kmem_cache *s,
4273 struct page *page, enum track_item alloc,
4274 unsigned long *map)
4275{
4276 void *addr = page_address(page);
4277 void *p;
4278
4279 bitmap_zero(map, page->objects);
4280 get_map(s, page, map);
4281
4282 for_each_object(p, s, addr, page->objects)
4283 if (!test_bit(slab_index(p, s, addr), map))
4284 add_location(t, s, get_track(s, p, alloc));
4285}
4286
4287static int list_locations(struct kmem_cache *s, char *buf,
4288 enum track_item alloc)
4289{
4290 int len = 0;
4291 unsigned long i;
4292 struct loc_track t = { 0, 0, NULL };
4293 int node;
4294 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4295 sizeof(unsigned long), GFP_KERNEL);
4296
4297 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4298 GFP_TEMPORARY)) {
4299 kfree(map);
4300 return sprintf(buf, "Out of memory\n");
4301 }
4302
4303 flush_all(s);
4304
4305 for_each_node_state(node, N_NORMAL_MEMORY) {
4306 struct kmem_cache_node *n = get_node(s, node);
4307 unsigned long flags;
4308 struct page *page;
4309
4310 if (!atomic_long_read(&n->nr_slabs))
4311 continue;
4312
4313 spin_lock_irqsave(&n->list_lock, flags);
4314 list_for_each_entry(page, &n->partial, lru)
4315 process_slab(&t, s, page, alloc, map);
4316 list_for_each_entry(page, &n->full, lru)
4317 process_slab(&t, s, page, alloc, map);
4318 spin_unlock_irqrestore(&n->list_lock, flags);
4319 }
4320
4321 for (i = 0; i < t.count; i++) {
4322 struct location *l = &t.loc[i];
4323
4324 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4325 break;
4326 len += sprintf(buf + len, "%7ld ", l->count);
4327
4328 if (l->addr)
4329 len += sprintf(buf + len, "%pS", (void *)l->addr);
4330 else
4331 len += sprintf(buf + len, "<not-available>");
4332
4333 if (l->sum_time != l->min_time) {
4334 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4335 l->min_time,
4336 (long)div_u64(l->sum_time, l->count),
4337 l->max_time);
4338 } else
4339 len += sprintf(buf + len, " age=%ld",
4340 l->min_time);
4341
4342 if (l->min_pid != l->max_pid)
4343 len += sprintf(buf + len, " pid=%ld-%ld",
4344 l->min_pid, l->max_pid);
4345 else
4346 len += sprintf(buf + len, " pid=%ld",
4347 l->min_pid);
4348
4349 if (num_online_cpus() > 1 &&
4350 !cpumask_empty(to_cpumask(l->cpus)) &&
4351 len < PAGE_SIZE - 60) {
4352 len += sprintf(buf + len, " cpus=");
4353 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4354 to_cpumask(l->cpus));
4355 }
4356
4357 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4358 len < PAGE_SIZE - 60) {
4359 len += sprintf(buf + len, " nodes=");
4360 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4361 l->nodes);
4362 }
4363
4364 len += sprintf(buf + len, "\n");
4365 }
4366
4367 free_loc_track(&t);
4368 kfree(map);
4369 if (!t.count)
4370 len += sprintf(buf, "No data\n");
4371 return len;
4372}
4373#endif
4374
4375#ifdef SLUB_RESILIENCY_TEST
4376static void resiliency_test(void)
4377{
4378 u8 *p;
4379
4380 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4381
4382 printk(KERN_ERR "SLUB resiliency testing\n");
4383 printk(KERN_ERR "-----------------------\n");
4384 printk(KERN_ERR "A. Corruption after allocation\n");
4385
4386 p = kzalloc(16, GFP_KERNEL);
4387 p[16] = 0x12;
4388 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4389 " 0x12->0x%p\n\n", p + 16);
4390
4391 validate_slab_cache(kmalloc_caches[4]);
4392
4393
4394 p = kzalloc(32, GFP_KERNEL);
4395 p[32 + sizeof(void *)] = 0x34;
4396 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4397 " 0x34 -> -0x%p\n", p);
4398 printk(KERN_ERR
4399 "If allocated object is overwritten then not detectable\n\n");
4400
4401 validate_slab_cache(kmalloc_caches[5]);
4402 p = kzalloc(64, GFP_KERNEL);
4403 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4404 *p = 0x56;
4405 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4406 p);
4407 printk(KERN_ERR
4408 "If allocated object is overwritten then not detectable\n\n");
4409 validate_slab_cache(kmalloc_caches[6]);
4410
4411 printk(KERN_ERR "\nB. Corruption after free\n");
4412 p = kzalloc(128, GFP_KERNEL);
4413 kfree(p);
4414 *p = 0x78;
4415 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4416 validate_slab_cache(kmalloc_caches[7]);
4417
4418 p = kzalloc(256, GFP_KERNEL);
4419 kfree(p);
4420 p[50] = 0x9a;
4421 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4422 p);
4423 validate_slab_cache(kmalloc_caches[8]);
4424
4425 p = kzalloc(512, GFP_KERNEL);
4426 kfree(p);
4427 p[512] = 0xab;
4428 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4429 validate_slab_cache(kmalloc_caches[9]);
4430}
4431#else
4432#ifdef CONFIG_SYSFS
4433static void resiliency_test(void) {};
4434#endif
4435#endif
4436
4437#ifdef CONFIG_SYSFS
4438enum slab_stat_type {
4439 SL_ALL,
4440 SL_PARTIAL,
4441 SL_CPU,
4442 SL_OBJECTS,
4443 SL_TOTAL
4444};
4445
4446#define SO_ALL (1 << SL_ALL)
4447#define SO_PARTIAL (1 << SL_PARTIAL)
4448#define SO_CPU (1 << SL_CPU)
4449#define SO_OBJECTS (1 << SL_OBJECTS)
4450#define SO_TOTAL (1 << SL_TOTAL)
4451
4452static ssize_t show_slab_objects(struct kmem_cache *s,
4453 char *buf, unsigned long flags)
4454{
4455 unsigned long total = 0;
4456 int node;
4457 int x;
4458 unsigned long *nodes;
4459 unsigned long *per_cpu;
4460
4461 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4462 if (!nodes)
4463 return -ENOMEM;
4464 per_cpu = nodes + nr_node_ids;
4465
4466 if (flags & SO_CPU) {
4467 int cpu;
4468
4469 for_each_possible_cpu(cpu) {
4470 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4471 int node = ACCESS_ONCE(c->node);
4472 struct page *page;
4473
4474 if (node < 0)
4475 continue;
4476 page = ACCESS_ONCE(c->page);
4477 if (page) {
4478 if (flags & SO_TOTAL)
4479 x = page->objects;
4480 else if (flags & SO_OBJECTS)
4481 x = page->inuse;
4482 else
4483 x = 1;
4484
4485 total += x;
4486 nodes[node] += x;
4487 }
4488 page = c->partial;
4489
4490 if (page) {
4491 x = page->pobjects;
4492 total += x;
4493 nodes[node] += x;
4494 }
4495 per_cpu[node]++;
4496 }
4497 }
4498
4499 lock_memory_hotplug();
4500#ifdef CONFIG_SLUB_DEBUG
4501 if (flags & SO_ALL) {
4502 for_each_node_state(node, N_NORMAL_MEMORY) {
4503 struct kmem_cache_node *n = get_node(s, node);
4504
4505 if (flags & SO_TOTAL)
4506 x = atomic_long_read(&n->total_objects);
4507 else if (flags & SO_OBJECTS)
4508 x = atomic_long_read(&n->total_objects) -
4509 count_partial(n, count_free);
4510
4511 else
4512 x = atomic_long_read(&n->nr_slabs);
4513 total += x;
4514 nodes[node] += x;
4515 }
4516
4517 } else
4518#endif
4519 if (flags & SO_PARTIAL) {
4520 for_each_node_state(node, N_NORMAL_MEMORY) {
4521 struct kmem_cache_node *n = get_node(s, node);
4522
4523 if (flags & SO_TOTAL)
4524 x = count_partial(n, count_total);
4525 else if (flags & SO_OBJECTS)
4526 x = count_partial(n, count_inuse);
4527 else
4528 x = n->nr_partial;
4529 total += x;
4530 nodes[node] += x;
4531 }
4532 }
4533 x = sprintf(buf, "%lu", total);
4534#ifdef CONFIG_NUMA
4535 for_each_node_state(node, N_NORMAL_MEMORY)
4536 if (nodes[node])
4537 x += sprintf(buf + x, " N%d=%lu",
4538 node, nodes[node]);
4539#endif
4540 unlock_memory_hotplug();
4541 kfree(nodes);
4542 return x + sprintf(buf + x, "\n");
4543}
4544
4545#ifdef CONFIG_SLUB_DEBUG
4546static int any_slab_objects(struct kmem_cache *s)
4547{
4548 int node;
4549
4550 for_each_online_node(node) {
4551 struct kmem_cache_node *n = get_node(s, node);
4552
4553 if (!n)
4554 continue;
4555
4556 if (atomic_long_read(&n->total_objects))
4557 return 1;
4558 }
4559 return 0;
4560}
4561#endif
4562
4563#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4564#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4565
4566struct slab_attribute {
4567 struct attribute attr;
4568 ssize_t (*show)(struct kmem_cache *s, char *buf);
4569 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4570};
4571
4572#define SLAB_ATTR_RO(_name) \
4573 static struct slab_attribute _name##_attr = \
4574 __ATTR(_name, 0400, _name##_show, NULL)
4575
4576#define SLAB_ATTR(_name) \
4577 static struct slab_attribute _name##_attr = \
4578 __ATTR(_name, 0600, _name##_show, _name##_store)
4579
4580static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4581{
4582 return sprintf(buf, "%d\n", s->size);
4583}
4584SLAB_ATTR_RO(slab_size);
4585
4586static ssize_t align_show(struct kmem_cache *s, char *buf)
4587{
4588 return sprintf(buf, "%d\n", s->align);
4589}
4590SLAB_ATTR_RO(align);
4591
4592static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4593{
4594 return sprintf(buf, "%d\n", s->objsize);
4595}
4596SLAB_ATTR_RO(object_size);
4597
4598static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4599{
4600 return sprintf(buf, "%d\n", oo_objects(s->oo));
4601}
4602SLAB_ATTR_RO(objs_per_slab);
4603
4604static ssize_t order_store(struct kmem_cache *s,
4605 const char *buf, size_t length)
4606{
4607 unsigned long order;
4608 int err;
4609
4610 err = strict_strtoul(buf, 10, &order);
4611 if (err)
4612 return err;
4613
4614 if (order > slub_max_order || order < slub_min_order)
4615 return -EINVAL;
4616
4617 calculate_sizes(s, order);
4618 return length;
4619}
4620
4621static ssize_t order_show(struct kmem_cache *s, char *buf)
4622{
4623 return sprintf(buf, "%d\n", oo_order(s->oo));
4624}
4625SLAB_ATTR(order);
4626
4627static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4628{
4629 return sprintf(buf, "%lu\n", s->min_partial);
4630}
4631
4632static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4633 size_t length)
4634{
4635 unsigned long min;
4636 int err;
4637
4638 err = strict_strtoul(buf, 10, &min);
4639 if (err)
4640 return err;
4641
4642 set_min_partial(s, min);
4643 return length;
4644}
4645SLAB_ATTR(min_partial);
4646
4647static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4648{
4649 return sprintf(buf, "%u\n", s->cpu_partial);
4650}
4651
4652static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4653 size_t length)
4654{
4655 unsigned long objects;
4656 int err;
4657
4658 err = strict_strtoul(buf, 10, &objects);
4659 if (err)
4660 return err;
4661 if (objects && kmem_cache_debug(s))
4662 return -EINVAL;
4663
4664 s->cpu_partial = objects;
4665 flush_all(s);
4666 return length;
4667}
4668SLAB_ATTR(cpu_partial);
4669
4670static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4671{
4672 if (!s->ctor)
4673 return 0;
4674 return sprintf(buf, "%pS\n", s->ctor);
4675}
4676SLAB_ATTR_RO(ctor);
4677
4678static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4679{
4680 return sprintf(buf, "%d\n", s->refcount - 1);
4681}
4682SLAB_ATTR_RO(aliases);
4683
4684static ssize_t partial_show(struct kmem_cache *s, char *buf)
4685{
4686 return show_slab_objects(s, buf, SO_PARTIAL);
4687}
4688SLAB_ATTR_RO(partial);
4689
4690static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4691{
4692 return show_slab_objects(s, buf, SO_CPU);
4693}
4694SLAB_ATTR_RO(cpu_slabs);
4695
4696static ssize_t objects_show(struct kmem_cache *s, char *buf)
4697{
4698 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4699}
4700SLAB_ATTR_RO(objects);
4701
4702static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4703{
4704 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4705}
4706SLAB_ATTR_RO(objects_partial);
4707
4708static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4709{
4710 int objects = 0;
4711 int pages = 0;
4712 int cpu;
4713 int len;
4714
4715 for_each_online_cpu(cpu) {
4716 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4717
4718 if (page) {
4719 pages += page->pages;
4720 objects += page->pobjects;
4721 }
4722 }
4723
4724 len = sprintf(buf, "%d(%d)", objects, pages);
4725
4726#ifdef CONFIG_SMP
4727 for_each_online_cpu(cpu) {
4728 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4729
4730 if (page && len < PAGE_SIZE - 20)
4731 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4732 page->pobjects, page->pages);
4733 }
4734#endif
4735 return len + sprintf(buf + len, "\n");
4736}
4737SLAB_ATTR_RO(slabs_cpu_partial);
4738
4739static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4740{
4741 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4742}
4743
4744static ssize_t reclaim_account_store(struct kmem_cache *s,
4745 const char *buf, size_t length)
4746{
4747 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4748 if (buf[0] == '1')
4749 s->flags |= SLAB_RECLAIM_ACCOUNT;
4750 return length;
4751}
4752SLAB_ATTR(reclaim_account);
4753
4754static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4755{
4756 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4757}
4758SLAB_ATTR_RO(hwcache_align);
4759
4760#ifdef CONFIG_ZONE_DMA
4761static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4762{
4763 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4764}
4765SLAB_ATTR_RO(cache_dma);
4766#endif
4767
4768static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4769{
4770 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4771}
4772SLAB_ATTR_RO(destroy_by_rcu);
4773
4774static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4775{
4776 return sprintf(buf, "%d\n", s->reserved);
4777}
4778SLAB_ATTR_RO(reserved);
4779
4780#ifdef CONFIG_SLUB_DEBUG
4781static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4782{
4783 return show_slab_objects(s, buf, SO_ALL);
4784}
4785SLAB_ATTR_RO(slabs);
4786
4787static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4788{
4789 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4790}
4791SLAB_ATTR_RO(total_objects);
4792
4793static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4794{
4795 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4796}
4797
4798static ssize_t sanity_checks_store(struct kmem_cache *s,
4799 const char *buf, size_t length)
4800{
4801 s->flags &= ~SLAB_DEBUG_FREE;
4802 if (buf[0] == '1') {
4803 s->flags &= ~__CMPXCHG_DOUBLE;
4804 s->flags |= SLAB_DEBUG_FREE;
4805 }
4806 return length;
4807}
4808SLAB_ATTR(sanity_checks);
4809
4810static ssize_t trace_show(struct kmem_cache *s, char *buf)
4811{
4812 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4813}
4814
4815static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4816 size_t length)
4817{
4818 s->flags &= ~SLAB_TRACE;
4819 if (buf[0] == '1') {
4820 s->flags &= ~__CMPXCHG_DOUBLE;
4821 s->flags |= SLAB_TRACE;
4822 }
4823 return length;
4824}
4825SLAB_ATTR(trace);
4826
4827static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4828{
4829 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4830}
4831
4832static ssize_t red_zone_store(struct kmem_cache *s,
4833 const char *buf, size_t length)
4834{
4835 if (any_slab_objects(s))
4836 return -EBUSY;
4837
4838 s->flags &= ~SLAB_RED_ZONE;
4839 if (buf[0] == '1') {
4840 s->flags &= ~__CMPXCHG_DOUBLE;
4841 s->flags |= SLAB_RED_ZONE;
4842 }
4843 calculate_sizes(s, -1);
4844 return length;
4845}
4846SLAB_ATTR(red_zone);
4847
4848static ssize_t poison_show(struct kmem_cache *s, char *buf)
4849{
4850 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4851}
4852
4853static ssize_t poison_store(struct kmem_cache *s,
4854 const char *buf, size_t length)
4855{
4856 if (any_slab_objects(s))
4857 return -EBUSY;
4858
4859 s->flags &= ~SLAB_POISON;
4860 if (buf[0] == '1') {
4861 s->flags &= ~__CMPXCHG_DOUBLE;
4862 s->flags |= SLAB_POISON;
4863 }
4864 calculate_sizes(s, -1);
4865 return length;
4866}
4867SLAB_ATTR(poison);
4868
4869static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4870{
4871 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4872}
4873
4874static ssize_t store_user_store(struct kmem_cache *s,
4875 const char *buf, size_t length)
4876{
4877 if (any_slab_objects(s))
4878 return -EBUSY;
4879
4880 s->flags &= ~SLAB_STORE_USER;
4881 if (buf[0] == '1') {
4882 s->flags &= ~__CMPXCHG_DOUBLE;
4883 s->flags |= SLAB_STORE_USER;
4884 }
4885 calculate_sizes(s, -1);
4886 return length;
4887}
4888SLAB_ATTR(store_user);
4889
4890static ssize_t validate_show(struct kmem_cache *s, char *buf)
4891{
4892 return 0;
4893}
4894
4895static ssize_t validate_store(struct kmem_cache *s,
4896 const char *buf, size_t length)
4897{
4898 int ret = -EINVAL;
4899
4900 if (buf[0] == '1') {
4901 ret = validate_slab_cache(s);
4902 if (ret >= 0)
4903 ret = length;
4904 }
4905 return ret;
4906}
4907SLAB_ATTR(validate);
4908
4909static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4910{
4911 if (!(s->flags & SLAB_STORE_USER))
4912 return -ENOSYS;
4913 return list_locations(s, buf, TRACK_ALLOC);
4914}
4915SLAB_ATTR_RO(alloc_calls);
4916
4917static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4918{
4919 if (!(s->flags & SLAB_STORE_USER))
4920 return -ENOSYS;
4921 return list_locations(s, buf, TRACK_FREE);
4922}
4923SLAB_ATTR_RO(free_calls);
4924#endif
4925
4926#ifdef CONFIG_FAILSLAB
4927static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4928{
4929 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4930}
4931
4932static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4933 size_t length)
4934{
4935 s->flags &= ~SLAB_FAILSLAB;
4936 if (buf[0] == '1')
4937 s->flags |= SLAB_FAILSLAB;
4938 return length;
4939}
4940SLAB_ATTR(failslab);
4941#endif
4942
4943static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4944{
4945 return 0;
4946}
4947
4948static ssize_t shrink_store(struct kmem_cache *s,
4949 const char *buf, size_t length)
4950{
4951 if (buf[0] == '1') {
4952 int rc = kmem_cache_shrink(s);
4953
4954 if (rc)
4955 return rc;
4956 } else
4957 return -EINVAL;
4958 return length;
4959}
4960SLAB_ATTR(shrink);
4961
4962#ifdef CONFIG_NUMA
4963static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4964{
4965 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4966}
4967
4968static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4969 const char *buf, size_t length)
4970{
4971 unsigned long ratio;
4972 int err;
4973
4974 err = strict_strtoul(buf, 10, &ratio);
4975 if (err)
4976 return err;
4977
4978 if (ratio <= 100)
4979 s->remote_node_defrag_ratio = ratio * 10;
4980
4981 return length;
4982}
4983SLAB_ATTR(remote_node_defrag_ratio);
4984#endif
4985
4986#ifdef CONFIG_SLUB_STATS
4987static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4988{
4989 unsigned long sum = 0;
4990 int cpu;
4991 int len;
4992 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4993
4994 if (!data)
4995 return -ENOMEM;
4996
4997 for_each_online_cpu(cpu) {
4998 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4999
5000 data[cpu] = x;
5001 sum += x;
5002 }
5003
5004 len = sprintf(buf, "%lu", sum);
5005
5006#ifdef CONFIG_SMP
5007 for_each_online_cpu(cpu) {
5008 if (data[cpu] && len < PAGE_SIZE - 20)
5009 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5010 }
5011#endif
5012 kfree(data);
5013 return len + sprintf(buf + len, "\n");
5014}
5015
5016static void clear_stat(struct kmem_cache *s, enum stat_item si)
5017{
5018 int cpu;
5019
5020 for_each_online_cpu(cpu)
5021 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5022}
5023
5024#define STAT_ATTR(si, text) \
5025static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5026{ \
5027 return show_stat(s, buf, si); \
5028} \
5029static ssize_t text##_store(struct kmem_cache *s, \
5030 const char *buf, size_t length) \
5031{ \
5032 if (buf[0] != '0') \
5033 return -EINVAL; \
5034 clear_stat(s, si); \
5035 return length; \
5036} \
5037SLAB_ATTR(text); \
5038
5039STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5040STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5041STAT_ATTR(FREE_FASTPATH, free_fastpath);
5042STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5043STAT_ATTR(FREE_FROZEN, free_frozen);
5044STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5045STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5046STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5047STAT_ATTR(ALLOC_SLAB, alloc_slab);
5048STAT_ATTR(ALLOC_REFILL, alloc_refill);
5049STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5050STAT_ATTR(FREE_SLAB, free_slab);
5051STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5052STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5053STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5054STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5055STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5056STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5057STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5058STAT_ATTR(ORDER_FALLBACK, order_fallback);
5059STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5060STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5061STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5062STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5063#endif
5064
5065static struct attribute *slab_attrs[] = {
5066 &slab_size_attr.attr,
5067 &object_size_attr.attr,
5068 &objs_per_slab_attr.attr,
5069 &order_attr.attr,
5070 &min_partial_attr.attr,
5071 &cpu_partial_attr.attr,
5072 &objects_attr.attr,
5073 &objects_partial_attr.attr,
5074 &partial_attr.attr,
5075 &cpu_slabs_attr.attr,
5076 &ctor_attr.attr,
5077 &aliases_attr.attr,
5078 &align_attr.attr,
5079 &hwcache_align_attr.attr,
5080 &reclaim_account_attr.attr,
5081 &destroy_by_rcu_attr.attr,
5082 &shrink_attr.attr,
5083 &reserved_attr.attr,
5084 &slabs_cpu_partial_attr.attr,
5085#ifdef CONFIG_SLUB_DEBUG
5086 &total_objects_attr.attr,
5087 &slabs_attr.attr,
5088 &sanity_checks_attr.attr,
5089 &trace_attr.attr,
5090 &red_zone_attr.attr,
5091 &poison_attr.attr,
5092 &store_user_attr.attr,
5093 &validate_attr.attr,
5094 &alloc_calls_attr.attr,
5095 &free_calls_attr.attr,
5096#endif
5097#ifdef CONFIG_ZONE_DMA
5098 &cache_dma_attr.attr,
5099#endif
5100#ifdef CONFIG_NUMA
5101 &remote_node_defrag_ratio_attr.attr,
5102#endif
5103#ifdef CONFIG_SLUB_STATS
5104 &alloc_fastpath_attr.attr,
5105 &alloc_slowpath_attr.attr,
5106 &free_fastpath_attr.attr,
5107 &free_slowpath_attr.attr,
5108 &free_frozen_attr.attr,
5109 &free_add_partial_attr.attr,
5110 &free_remove_partial_attr.attr,
5111 &alloc_from_partial_attr.attr,
5112 &alloc_slab_attr.attr,
5113 &alloc_refill_attr.attr,
5114 &alloc_node_mismatch_attr.attr,
5115 &free_slab_attr.attr,
5116 &cpuslab_flush_attr.attr,
5117 &deactivate_full_attr.attr,
5118 &deactivate_empty_attr.attr,
5119 &deactivate_to_head_attr.attr,
5120 &deactivate_to_tail_attr.attr,
5121 &deactivate_remote_frees_attr.attr,
5122 &deactivate_bypass_attr.attr,
5123 &order_fallback_attr.attr,
5124 &cmpxchg_double_fail_attr.attr,
5125 &cmpxchg_double_cpu_fail_attr.attr,
5126 &cpu_partial_alloc_attr.attr,
5127 &cpu_partial_free_attr.attr,
5128#endif
5129#ifdef CONFIG_FAILSLAB
5130 &failslab_attr.attr,
5131#endif
5132
5133 NULL
5134};
5135
5136static struct attribute_group slab_attr_group = {
5137 .attrs = slab_attrs,
5138};
5139
5140static ssize_t slab_attr_show(struct kobject *kobj,
5141 struct attribute *attr,
5142 char *buf)
5143{
5144 struct slab_attribute *attribute;
5145 struct kmem_cache *s;
5146 int err;
5147
5148 attribute = to_slab_attr(attr);
5149 s = to_slab(kobj);
5150
5151 if (!attribute->show)
5152 return -EIO;
5153
5154 err = attribute->show(s, buf);
5155
5156 return err;
5157}
5158
5159static ssize_t slab_attr_store(struct kobject *kobj,
5160 struct attribute *attr,
5161 const char *buf, size_t len)
5162{
5163 struct slab_attribute *attribute;
5164 struct kmem_cache *s;
5165 int err;
5166
5167 attribute = to_slab_attr(attr);
5168 s = to_slab(kobj);
5169
5170 if (!attribute->store)
5171 return -EIO;
5172
5173 err = attribute->store(s, buf, len);
5174
5175 return err;
5176}
5177
5178static void kmem_cache_release(struct kobject *kobj)
5179{
5180 struct kmem_cache *s = to_slab(kobj);
5181
5182 kfree(s->name);
5183 kfree(s);
5184}
5185
5186static const struct sysfs_ops slab_sysfs_ops = {
5187 .show = slab_attr_show,
5188 .store = slab_attr_store,
5189};
5190
5191static struct kobj_type slab_ktype = {
5192 .sysfs_ops = &slab_sysfs_ops,
5193 .release = kmem_cache_release
5194};
5195
5196static int uevent_filter(struct kset *kset, struct kobject *kobj)
5197{
5198 struct kobj_type *ktype = get_ktype(kobj);
5199
5200 if (ktype == &slab_ktype)
5201 return 1;
5202 return 0;
5203}
5204
5205static const struct kset_uevent_ops slab_uevent_ops = {
5206 .filter = uevent_filter,
5207};
5208
5209static struct kset *slab_kset;
5210
5211#define ID_STR_LENGTH 64
5212
5213
5214
5215
5216
5217static char *create_unique_id(struct kmem_cache *s)
5218{
5219 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5220 char *p = name;
5221
5222 BUG_ON(!name);
5223
5224 *p++ = ':';
5225
5226
5227
5228
5229
5230
5231
5232 if (s->flags & SLAB_CACHE_DMA)
5233 *p++ = 'd';
5234 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5235 *p++ = 'a';
5236 if (s->flags & SLAB_DEBUG_FREE)
5237 *p++ = 'F';
5238 if (!(s->flags & SLAB_NOTRACK))
5239 *p++ = 't';
5240 if (p != name + 1)
5241 *p++ = '-';
5242 p += sprintf(p, "%07d", s->size);
5243 BUG_ON(p > name + ID_STR_LENGTH - 1);
5244 return name;
5245}
5246
5247static int sysfs_slab_add(struct kmem_cache *s)
5248{
5249 int err;
5250 const char *name;
5251 int unmergeable;
5252
5253 if (slab_state < SYSFS)
5254
5255 return 0;
5256
5257 unmergeable = slab_unmergeable(s);
5258 if (unmergeable) {
5259
5260
5261
5262
5263
5264 sysfs_remove_link(&slab_kset->kobj, s->name);
5265 name = s->name;
5266 } else {
5267
5268
5269
5270
5271 name = create_unique_id(s);
5272 }
5273
5274 s->kobj.kset = slab_kset;
5275 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5276 if (err) {
5277 kobject_put(&s->kobj);
5278 return err;
5279 }
5280
5281 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5282 if (err) {
5283 kobject_del(&s->kobj);
5284 kobject_put(&s->kobj);
5285 return err;
5286 }
5287 kobject_uevent(&s->kobj, KOBJ_ADD);
5288 if (!unmergeable) {
5289
5290 sysfs_slab_alias(s, s->name);
5291 kfree(name);
5292 }
5293 return 0;
5294}
5295
5296static void sysfs_slab_remove(struct kmem_cache *s)
5297{
5298 if (slab_state < SYSFS)
5299
5300
5301
5302
5303 return;
5304
5305 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5306 kobject_del(&s->kobj);
5307 kobject_put(&s->kobj);
5308}
5309
5310
5311
5312
5313
5314struct saved_alias {
5315 struct kmem_cache *s;
5316 const char *name;
5317 struct saved_alias *next;
5318};
5319
5320static struct saved_alias *alias_list;
5321
5322static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5323{
5324 struct saved_alias *al;
5325
5326 if (slab_state == SYSFS) {
5327
5328
5329
5330 sysfs_remove_link(&slab_kset->kobj, name);
5331 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5332 }
5333
5334 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5335 if (!al)
5336 return -ENOMEM;
5337
5338 al->s = s;
5339 al->name = name;
5340 al->next = alias_list;
5341 alias_list = al;
5342 return 0;
5343}
5344
5345static int __init slab_sysfs_init(void)
5346{
5347 struct kmem_cache *s;
5348 int err;
5349
5350 down_write(&slub_lock);
5351
5352 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5353 if (!slab_kset) {
5354 up_write(&slub_lock);
5355 printk(KERN_ERR "Cannot register slab subsystem.\n");
5356 return -ENOSYS;
5357 }
5358
5359 slab_state = SYSFS;
5360
5361 list_for_each_entry(s, &slab_caches, list) {
5362 err = sysfs_slab_add(s);
5363 if (err)
5364 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5365 " to sysfs\n", s->name);
5366 }
5367
5368 while (alias_list) {
5369 struct saved_alias *al = alias_list;
5370
5371 alias_list = alias_list->next;
5372 err = sysfs_slab_alias(al->s, al->name);
5373 if (err)
5374 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5375 " %s to sysfs\n", s->name);
5376 kfree(al);
5377 }
5378
5379 up_write(&slub_lock);
5380 resiliency_test();
5381 return 0;
5382}
5383
5384__initcall(slab_sysfs_init);
5385#endif
5386
5387
5388
5389
5390#ifdef CONFIG_SLABINFO
5391static void print_slabinfo_header(struct seq_file *m)
5392{
5393 seq_puts(m, "slabinfo - version: 2.1\n");
5394 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
5395 "<objperslab> <pagesperslab>");
5396 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5397 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
5398 seq_putc(m, '\n');
5399}
5400
5401static void *s_start(struct seq_file *m, loff_t *pos)
5402{
5403 loff_t n = *pos;
5404
5405 down_read(&slub_lock);
5406 if (!n)
5407 print_slabinfo_header(m);
5408
5409 return seq_list_start(&slab_caches, *pos);
5410}
5411
5412static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5413{
5414 return seq_list_next(p, &slab_caches, pos);
5415}
5416
5417static void s_stop(struct seq_file *m, void *p)
5418{
5419 up_read(&slub_lock);
5420}
5421
5422static int s_show(struct seq_file *m, void *p)
5423{
5424 unsigned long nr_partials = 0;
5425 unsigned long nr_slabs = 0;
5426 unsigned long nr_inuse = 0;
5427 unsigned long nr_objs = 0;
5428 unsigned long nr_free = 0;
5429 struct kmem_cache *s;
5430 int node;
5431
5432 s = list_entry(p, struct kmem_cache, list);
5433
5434 for_each_online_node(node) {
5435 struct kmem_cache_node *n = get_node(s, node);
5436
5437 if (!n)
5438 continue;
5439
5440 nr_partials += n->nr_partial;
5441 nr_slabs += atomic_long_read(&n->nr_slabs);
5442 nr_objs += atomic_long_read(&n->total_objects);
5443 nr_free += count_partial(n, count_free);
5444 }
5445
5446 nr_inuse = nr_objs - nr_free;
5447
5448 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
5449 nr_objs, s->size, oo_objects(s->oo),
5450 (1 << oo_order(s->oo)));
5451 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
5452 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
5453 0UL);
5454 seq_putc(m, '\n');
5455 return 0;
5456}
5457
5458static const struct seq_operations slabinfo_op = {
5459 .start = s_start,
5460 .next = s_next,
5461 .stop = s_stop,
5462 .show = s_show,
5463};
5464
5465static int slabinfo_open(struct inode *inode, struct file *file)
5466{
5467 return seq_open(file, &slabinfo_op);
5468}
5469
5470static const struct file_operations proc_slabinfo_operations = {
5471 .open = slabinfo_open,
5472 .read = seq_read,
5473 .llseek = seq_lseek,
5474 .release = seq_release,
5475};
5476
5477static int __init slab_proc_init(void)
5478{
5479 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5480 return 0;
5481}
5482module_init(slab_proc_init);
5483#endif
5484